# Copyright 2017 The TensorFlow Authors All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Generates test Recall@K statistics on labeled classification problems.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function from collections import defaultdict import os import numpy as np from sklearn.metrics.pairwise import pairwise_distances from six.moves import xrange import data_providers from estimators.get_estimator import get_estimator from utils import util import tensorflow as tf tf.logging.set_verbosity(tf.logging.INFO) tf.flags.DEFINE_string( 'config_paths', '', """ Path to a YAML configuration files defining FLAG values. Multiple files can be separated by the `#` symbol. Files are merged recursively. Setting a key in these files is equivalent to setting the FLAG value with the same name. """) tf.flags.DEFINE_string( 'model_params', '{}', 'YAML configuration string for the model parameters.') tf.app.flags.DEFINE_string( 'mode', 'validation', 'Which dataset to evaluate: `validation` | `test`.') tf.app.flags.DEFINE_string('master', 'local', 'BNS name of the TensorFlow master to use') tf.app.flags.DEFINE_string( 'checkpoint_iter', '', 'Evaluate this specific checkpoint.') tf.app.flags.DEFINE_string( 'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.') tf.app.flags.DEFINE_string('outdir', '/tmp/tcn', 'Path to write summaries to.') FLAGS = tf.app.flags.FLAGS def nearest_cross_sequence_neighbors(data, tasks, n_neighbors=1): """Computes the n_neighbors nearest neighbors for every row in data. Args: data: A np.float32 array of shape [num_data, embedding size] holding an embedded validation / test dataset. tasks: A list of strings of size [num_data] holding the task or sequence name that each row belongs to. n_neighbors: The number of knn indices to return for each row. Returns: indices: an np.int32 array of size [num_data, n_neighbors] holding the n_neighbors nearest indices for every row in data. These are restricted to be from different named sequences (as defined in `tasks`). """ # Compute the pairwise sequence adjacency matrix from `tasks`. num_data = data.shape[0] tasks = np.array(tasks) tasks = np.reshape(tasks, (num_data, 1)) assert len(tasks.shape) == 2 not_adjacent = (tasks != tasks.T) # Compute the symmetric pairwise distance matrix. pdist = pairwise_distances(data, metric='sqeuclidean') # For every row in the pairwise distance matrix, only consider # cross-sequence columns. indices = np.zeros((num_data, n_neighbors), dtype=np.int32) for idx in range(num_data): # Restrict to cross_sequence neighbors. distances = [( pdist[idx][i], i) for i in xrange(num_data) if not_adjacent[idx][i]] _, nearest_indices = zip(*sorted( distances, key=lambda x: x[0])[:n_neighbors]) indices[idx] = nearest_indices return indices def compute_cross_sequence_recall_at_k(retrieved_labels, labels, k_list): """Compute recall@k for a given list of k values. Recall is one if an example of the same class is retrieved among the top k nearest neighbors given a query example and zero otherwise. Counting the recall for all examples and averaging the counts returns recall@k score. Args: retrieved_labels: 2-D Numpy array of KNN labels for every embedding. labels: 1-D Numpy array of shape [number of data]. k_list: List of k values to evaluate recall@k. Returns: recall_list: List of recall@k values. """ kvalue_to_recall = dict(zip(k_list, np.zeros(len(k_list)))) # For each value of K. for k in k_list: matches = defaultdict(float) counts = defaultdict(float) # For each (row index, label value) in the query labels. for i, label_value in enumerate(labels): # Loop over the K nearest retrieved labels. if label_value in retrieved_labels[i][:k]: matches[label_value] += 1. # Increment the denominator. counts[label_value] += 1. kvalue_to_recall[k] = np.mean( [matches[l]/counts[l] for l in matches]) return [kvalue_to_recall[i] for i in k_list] def compute_cross_sequence_recalls_at_k( embeddings, labels, label_attr_keys, tasks, k_list, summary_writer, training_step): """Computes and reports the recall@k for each classification problem. This takes an embedding matrix and an array of multiclass labels with size [num_data, number of classification problems], then computes the average recall@k for each classification problem as well as the average across problems. Args: embeddings: A np.float32 array of size [num_data, embedding_size] representing the embedded validation or test dataset. labels: A np.int32 array of size [num_data, num_classification_problems] holding multiclass labels for each embedding for each problem. label_attr_keys: List of strings, holds the names of the classification problems. tasks: A list of strings describing the video sequence each row belongs to. This is used to restrict the recall@k computation to cross-sequence examples. k_list: A list of ints, the k values to evaluate recall@k. summary_writer: A tf.summary.FileWriter. training_step: Int, the current training step we're evaluating. """ num_data = float(embeddings.shape[0]) assert labels.shape[0] == num_data # Compute knn indices. indices = nearest_cross_sequence_neighbors( embeddings, tasks, n_neighbors=max(k_list)) retrieved_labels = labels[indices] # Compute the recall@k for each classification problem. recall_lists = [] for idx, label_attr in enumerate(label_attr_keys): problem_labels = labels[:, idx] # Take all indices, all k labels for the problem indexed by idx. problem_retrieved = retrieved_labels[:, :, idx] recall_list = compute_cross_sequence_recall_at_k( retrieved_labels=problem_retrieved, labels=problem_labels, k_list=k_list) recall_lists.append(recall_list) for (k, recall) in zip(k_list, recall_list): recall_error = 1-recall summ = tf.Summary(value=[tf.Summary.Value( tag='validation/classification/%s error@top%d' % ( label_attr, k), simple_value=recall_error)]) print('%s recall@K=%d' % (label_attr, k), recall_error) summary_writer.add_summary(summ, int(training_step)) # Report an average recall@k across problems. recall_lists = np.array(recall_lists) for i in range(recall_lists.shape[1]): average_recall = np.mean(recall_lists[:, i]) recall_error = 1 - average_recall summ = tf.Summary(value=[tf.Summary.Value( tag='validation/classification/average error@top%d' % k_list[i], simple_value=recall_error)]) print('Average recall@K=%d' % k_list[i], recall_error) summary_writer.add_summary(summ, int(training_step)) def evaluate_once( estimator, input_fn_by_view, batch_size, checkpoint_path, label_attr_keys, embedding_size, num_views, k_list): """Compute the recall@k for a given checkpoint path. Args: estimator: an `Estimator` object to evaluate. input_fn_by_view: An input_fn to an `Estimator's` predict method. Takes a view index and returns a dict holding ops for getting raw images for the view. batch_size: Int, size of the labeled eval batch. checkpoint_path: String, path to the specific checkpoint being evaluated. label_attr_keys: A list of Strings, holding each attribute name. embedding_size: Int, the size of the embedding. num_views: Int, number of views in the dataset. k_list: List of ints, list of K values to compute recall at K for. """ feat_matrix = np.zeros((0, embedding_size)) label_vect = np.zeros((0, len(label_attr_keys))) tasks = [] eval_tensor_keys = ['embeddings', 'tasks', 'classification_labels'] # Iterate all views in the dataset. for view_index in range(num_views): # Set up a graph for embedding entire dataset. predictions = estimator.inference( input_fn_by_view(view_index), checkpoint_path, batch_size, predict_keys=eval_tensor_keys) # Enumerate predictions. for i, p in enumerate(predictions): if i % 100 == 0: tf.logging.info('Embedded %d images for view %d' % (i, view_index)) label = p['classification_labels'] task = p['tasks'] embedding = p['embeddings'] # Collect (embedding, label, task) data. feat_matrix = np.append(feat_matrix, [embedding], axis=0) label_vect = np.append(label_vect, [label], axis=0) tasks.append(task) # Compute recall statistics. ckpt_step = int(checkpoint_path.split('-')[-1]) summary_dir = os.path.join(FLAGS.outdir, 'labeled_eval_summaries') summary_writer = tf.summary.FileWriter(summary_dir) compute_cross_sequence_recalls_at_k( feat_matrix, label_vect, label_attr_keys, tasks, k_list, summary_writer, ckpt_step) def get_labeled_tables(config): """Gets either labeled test or validation tables, based on flags.""" # Get a list of filenames corresponding to labeled data. mode = FLAGS.mode if mode == 'validation': labeled_tables = util.GetFilesRecursively(config.data.labeled.validation) elif mode == 'test': labeled_tables = util.GetFilesRecursively(config.data.labeled.test) else: raise ValueError('Unknown dataset: %s' % mode) return labeled_tables def main(_): """Runs main labeled eval loop.""" # Parse config dict from yaml config files / command line flags. config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params) # Choose an estimator based on training strategy. checkpointdir = FLAGS.checkpointdir estimator = get_estimator(config, checkpointdir) # Get data configs. image_attr_keys = config.data.labeled.image_attr_keys label_attr_keys = config.data.labeled.label_attr_keys embedding_size = config.embedding_size num_views = config.data.num_views k_list = config.val.recall_at_k_list batch_size = config.data.batch_size # Get either labeled validation or test tables. labeled_tables = get_labeled_tables(config) def input_fn_by_view(view_index): """Returns an input_fn for use with a tf.Estimator by view.""" def input_fn(): # Get raw labeled images. (preprocessed_images, labels, tasks) = data_providers.labeled_data_provider( labeled_tables, estimator.preprocess_data, view_index, image_attr_keys, label_attr_keys, batch_size=batch_size) return { 'batch_preprocessed': preprocessed_images, 'tasks': tasks, 'classification_labels': labels, }, None return input_fn # If evaluating a specific checkpoint, do that. if FLAGS.checkpoint_iter: checkpoint_path = os.path.join( '%s/model.ckpt-%s' % (checkpointdir, FLAGS.checkpoint_iter)) evaluate_once( estimator, input_fn_by_view, batch_size, checkpoint_path, label_attr_keys, embedding_size, num_views, k_list) else: for checkpoint_path in tf.contrib.training.checkpoints_iterator( checkpointdir): evaluate_once( estimator, input_fn_by_view, batch_size, checkpoint_path, label_attr_keys, embedding_size, num_views, k_list) if __name__ == '__main__': tf.app.run()