Adding TCN.

cbd571f2 · Corey Lynch · 69cf6fca · cbd571f2 · cbd571f2 · cbd571f2
Commit cbd571f2 authored Dec 06, 2017 by Corey Lynch
20 changed files
--- a/research/tcn/estimators/svtcn_estimator.py
+++ b/research/tcn/estimators/svtcn_estimator.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""SVTCN estimator implementation."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import data_providers
+import model as model_module
+from estimators import base_estimator
+from estimators import svtcn_loss
+import tensorflow as tf
+
+
+class SVTCNEstimator(base_estimator.BaseEstimator):
+  """Single-view TCN Estimator base class."""
+
+  def __init__(self, config, logdir):
+    super(SVTCNEstimator, self).__init__(config, logdir)
+
+  def construct_input_fn(self, records, is_training):
+    """See base class."""
+    config = self._config
+    num_views = config.data.num_views
+    num_parallel_calls = config.data.num_parallel_calls
+    sequence_prefetch_size = config.data.sequence_prefetch_size
+    batch_prefetch_size = config.data.batch_prefetch_size
+
+    def input_fn():
+      """Provides input to SVTCN models."""
+      (images_preprocessed,
+       images_raw,
+       timesteps) = data_providers.singleview_tcn_provider(
+           file_list=records,
+           preprocess_fn=self.preprocess_data,
+           num_views=num_views,
+           is_training=is_training,
+           batch_size=self._batch_size,
+           num_parallel_calls=num_parallel_calls,
+           sequence_prefetch_size=sequence_prefetch_size,
+           batch_prefetch_size=batch_prefetch_size)
+
+      if config.logging.summary.image_summaries and is_training:
+        tf.summary.image('training/svtcn_images', images_raw)
+
+      features = {'batch_preprocessed': images_preprocessed}
+      return (features, timesteps)
+    return input_fn
+
+  def forward(self, images, is_training, reuse=False):
+    """See base class."""
+    embedder_strategy = self._config.embedder_strategy
+    embedder = model_module.get_embedder(
+        embedder_strategy,
+        self._config,
+        images,
+        is_training=is_training, reuse=reuse)
+    embeddings = embedder.construct_embedding()
+
+    if is_training:
+      self.variables_to_train = embedder.get_trainable_variables()
+      self.pretrained_init_fn = embedder.init_fn
+    return embeddings
+
+
+class SVTCNTripletEstimator(SVTCNEstimator):
+  """Single-View TCN with semihard triplet loss."""
+
+  def __init__(self, config, logdir):
+    super(SVTCNTripletEstimator, self).__init__(config, logdir)
+
+  def define_loss(self, embeddings, timesteps, is_training):
+    """See base class."""
+    pos_radius = self._config.svtcn.pos_radius
+    neg_radius = self._config.svtcn.neg_radius
+    margin = self._config.triplet_semihard.margin
+    loss = svtcn_loss.singleview_tcn_loss(
+        embeddings, timesteps, pos_radius, neg_radius, margin=margin)
+    self._loss = loss
+    if is_training:
+      tf.summary.scalar('training/svtcn_loss', loss)
+    return loss
+
+  def define_eval_metric_ops(self):
+    """See base class."""
+    return {'validation/svtcn_loss': tf.metrics.mean(self._loss)}
--- a/research/tcn/estimators/svtcn_loss.py
+++ b/research/tcn/estimators/svtcn_loss.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""This implements single view TCN triplet loss."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tensorflow as tf
+
+
+def pairwise_squared_distance(feature):
+  """Computes the squared pairwise distance matrix.
+
+  output[i, j] = || feature[i, :] - feature[j, :] ||_2^2
+
+  Args:
+    feature: 2-D Tensor of size [number of data, feature dimension]
+
+  Returns:
+    pairwise_squared_distances: 2-D Tensor of size
+      [number of data, number of data]
+  """
+  pairwise_squared_distances = tf.add(
+      tf.reduce_sum(
+          tf.square(feature), axis=1, keep_dims=True),
+      tf.reduce_sum(
+          tf.square(tf.transpose(feature)), axis=0,
+          keep_dims=True)) - 2.0 * tf.matmul(feature, tf.transpose(feature))
+
+  # Deal with numerical inaccuracies. Set small negatives to zero.
+  pairwise_squared_distances = tf.maximum(pairwise_squared_distances, 0.0)
+  return pairwise_squared_distances
+
+
+def masked_maximum(data, mask, dim=1):
+  """Computes the axis wise maximum over chosen elements.
+
+  Args:
+    data: N-D Tensor.
+    mask: N-D Tensor of zeros or ones.
+    dim: The dimension over which to compute the maximum.
+
+  Returns:
+    masked_maximums: N-D Tensor.
+      The maximized dimension is of size 1 after the operation.
+  """
+  axis_minimums = tf.reduce_min(data, dim, keep_dims=True)
+  masked_maximums = tf.reduce_max(
+      tf.multiply(
+          data - axis_minimums, mask), dim, keep_dims=True) + axis_minimums
+  return masked_maximums
+
+
+def masked_minimum(data, mask, dim=1):
+  """Computes the axis wise minimum over chosen elements.
+
+  Args:
+    data: 2-D Tensor of size [n, m].
+    mask: 2-D Boolean Tensor of size [n, m].
+    dim: The dimension over which to compute the minimum.
+
+  Returns:
+    masked_minimums: N-D Tensor.
+      The minimized dimension is of size 1 after the operation.
+  """
+  axis_maximums = tf.reduce_max(data, dim, keep_dims=True)
+  masked_minimums = tf.reduce_min(
+      tf.multiply(
+          data - axis_maximums, mask), dim, keep_dims=True) + axis_maximums
+  return masked_minimums
+
+
+def singleview_tcn_loss(
+    embeddings, timesteps, pos_radius, neg_radius, margin=1.0,
+    sequence_ids=None, multiseq=False):
+  """Computes the single view triplet loss with semi-hard negative mining.
+
+  The loss encourages the positive distances (between a pair of embeddings with
+  the same labels) to be smaller than the minimum negative distance among
+  which are at least greater than the positive distance plus the margin constant
+  (called semi-hard negative) in the mini-batch. If no such negative exists,
+  uses the largest negative distance instead.
+
+  Anchor, positive, negative selection is as follow:
+  Anchors: We consider every embedding timestep as an anchor.
+  Positives: pos_radius defines a radius (in timesteps) around each anchor from
+    which positives can be drawn. E.g. An anchor with t=10 and a pos_radius of
+    2 produces a set of 4 (anchor,pos) pairs [(a=10, p=8), ... (a=10, p=12)].
+  Negatives: neg_radius defines a boundary (in timesteps) around each anchor,
+    outside of which negatives can be drawn. E.g. An anchor with t=10 and a
+    neg_radius of 4 means negatives can be any t_neg where t_neg < 6 and
+    t_neg > 14.
+
+  Args:
+    embeddings: 2-D Tensor of embedding vectors.
+    timesteps: 1-D Tensor with shape [batch_size, 1] of sequence timesteps.
+    pos_radius: int32; the size of the window (in timesteps) around each anchor
+      timestep that a positive can be drawn from.
+    neg_radius: int32; the size of the window (in timesteps) around each anchor
+      timestep that defines a negative boundary. Negatives can only be chosen
+      where negative timestep t is < negative boundary min or > negative
+      boundary max.
+    margin: Float; the triplet loss margin hyperparameter.
+    sequence_ids: (Optional) 1-D Tensor with shape [batch_size, 1] of sequence
+      ids. Together (sequence_id, sequence_timestep) give us a unique index for
+      each image if we have multiple sequences in a batch.
+    multiseq: Boolean, whether or not the batch is composed of multiple
+      sequences (with possibly colliding timesteps).
+
+  Returns:
+    triplet_loss: tf.float32 scalar.
+  """
+  assert neg_radius > pos_radius
+
+  # If timesteps shape isn't [batchsize, 1], reshape to [batch_size, 1].
+  tshape = tf.shape(timesteps)
+  assert tshape.shape == 2 or tshape.shape == 1
+  if tshape.shape == 1:
+    timesteps = tf.reshape(timesteps, [tshape[0], 1])
+
+  # Build pairwise squared distance matrix.
+  pdist_matrix = pairwise_squared_distance(embeddings)
+
+  # Build pairwise binary adjacency matrix, where adjacency[i,j] is True
+  # if timestep j is inside the positive range for timestep i and both
+  # timesteps come from the same sequence.
+  pos_radius = tf.cast(pos_radius, tf.int32)
+
+  if multiseq:
+    # If sequence_ids shape isn't [batchsize, 1], reshape to [batch_size, 1].
+    tshape = tf.shape(sequence_ids)
+    assert tshape.shape == 2 or tshape.shape == 1
+    if tshape.shape == 1:
+      sequence_ids = tf.reshape(sequence_ids, [tshape[0], 1])
+
+    # Build pairwise binary adjacency matrix based on sequence_ids
+    sequence_adjacency = tf.equal(sequence_ids, tf.transpose(sequence_ids))
+
+    # Invert so we can select negatives only.
+    sequence_adjacency_not = tf.logical_not(sequence_adjacency)
+
+    in_pos_range = tf.logical_and(
+        tf.less_equal(
+            tf.abs(timesteps - tf.transpose(timesteps)), pos_radius),
+        sequence_adjacency)
+    # Build pairwise binary discordance matrix, where discordance[i,j] is True
+    # if timestep j is inside the negative range for timestep i or if the
+    # timesteps come from different sequences.
+    in_neg_range = tf.logical_or(
+        tf.greater(tf.abs(timesteps - tf.transpose(timesteps)), neg_radius),
+        sequence_adjacency_not
+    )
+  else:
+    in_pos_range = tf.less_equal(
+        tf.abs(timesteps - tf.transpose(timesteps)), pos_radius)
+    in_neg_range = tf.greater(tf.abs(timesteps - tf.transpose(timesteps)),
+                              neg_radius)
+
+  batch_size = tf.size(timesteps)
+
+  # compute the mask
+  pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
+  mask = tf.logical_and(
+      tf.tile(in_neg_range, [batch_size, 1]),
+      tf.greater(pdist_matrix_tile,
+                 tf.reshape(tf.transpose(pdist_matrix), [-1, 1])))
+  mask_final = tf.reshape(
+      tf.greater(
+          tf.reduce_sum(
+              tf.cast(
+                  mask, dtype=tf.float32), 1, keep_dims=True),
+          0.0), [batch_size, batch_size])
+  mask_final = tf.transpose(mask_final)
+
+  in_neg_range = tf.cast(in_neg_range, dtype=tf.float32)
+  mask = tf.cast(mask, dtype=tf.float32)
+
+  # negatives_outside: smallest D_an where D_an > D_ap
+  negatives_outside = tf.reshape(
+      masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
+  negatives_outside = tf.transpose(negatives_outside)
+
+  # negatives_inside: largest D_an
+  negatives_inside = tf.tile(
+      masked_maximum(pdist_matrix, in_neg_range), [1, batch_size])
+  semi_hard_negatives = tf.where(
+      mask_final, negatives_outside, negatives_inside)
+
+  loss_mat = tf.add(margin, pdist_matrix - semi_hard_negatives)
+
+  mask_positives = tf.cast(
+      in_pos_range, dtype=tf.float32) - tf.diag(tf.ones([batch_size]))
+
+  # In lifted-struct, the authors multiply 0.5 for upper triangular
+  #   in semihard, they take all positive pairs except the diagonal.
+  num_positives = tf.reduce_sum(mask_positives)
+
+  triplet_loss = tf.truediv(
+      tf.reduce_sum(tf.maximum(tf.multiply(loss_mat, mask_positives), 0.0)),
+      num_positives,
+      name='triplet_svtcn_loss')
+
+  return triplet_loss
--- a/research/tcn/estimators/svtcn_loss_test.py
+++ b/research/tcn/estimators/svtcn_loss_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for svtcn_loss.py."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+from sklearn.metrics.pairwise import euclidean_distances
+from estimators import svtcn_loss
+import tensorflow as tf
+
+
+class SVTCNLoss(tf.test.TestCase):
+
+  def testSVTCNLoss(self):
+    with self.test_session():
+      num_data = 64
+      num_sequences = 2
+      num_data_per_seq = num_data // num_sequences
+      feat_dim = 6
+      margin = 1.0
+      times = np.tile(np.arange(num_data_per_seq, dtype=np.int32),
+                      num_sequences)
+      times = np.reshape(times, [times.shape[0], 1])
+      sequence_ids = np.concatenate(
+          [np.ones(num_data_per_seq)*i for i in range(num_sequences)])
+      sequence_ids = np.reshape(sequence_ids, [sequence_ids.shape[0], 1])
+
+      pos_radius = 6
+      neg_radius = 12
+
+      embedding = np.random.rand(num_data, feat_dim).astype(np.float32)
+
+      # Compute the loss in NP
+
+      # Get a positive mask, i.e. indices for each time index
+      # that are inside the positive range.
+      in_pos_range = np.less_equal(
+          np.abs(times - times.transpose()), pos_radius)
+
+      # Get a negative mask, i.e. indices for each time index
+      # that are inside the negative range (> t + (neg_mult * pos_radius)
+      # and < t - (neg_mult * pos_radius).
+      in_neg_range = np.greater(np.abs(times - times.transpose()), neg_radius)
+
+      sequence_adjacency = sequence_ids == sequence_ids.T
+      sequence_adjacency_not = np.logical_not(sequence_adjacency)
+
+      pdist_matrix = euclidean_distances(embedding, squared=True)
+      loss_np = 0.0
+      num_positives = 0.0
+      for i in range(num_data):
+        for j in range(num_data):
+          if in_pos_range[i, j] and i != j and sequence_adjacency[i, j]:
+            num_positives += 1.0
+
+            pos_distance = pdist_matrix[i][j]
+            neg_distances = []
+
+            for k in range(num_data):
+              if in_neg_range[i, k] or sequence_adjacency_not[i, k]:
+                neg_distances.append(pdist_matrix[i][k])
+
+            neg_distances.sort()  # sort by distance
+            chosen_neg_distance = neg_distances[0]
+
+            for l in range(len(neg_distances)):
+              chosen_neg_distance = neg_distances[l]
+              if chosen_neg_distance > pos_distance:
+                break
+
+            loss_np += np.maximum(
+                0.0, margin - chosen_neg_distance + pos_distance)
+
+      loss_np /= num_positives
+
+      # Compute the loss in TF
+      loss_tf = svtcn_loss.singleview_tcn_loss(
+          embeddings=tf.convert_to_tensor(embedding),
+          timesteps=tf.convert_to_tensor(times),
+          pos_radius=pos_radius,
+          neg_radius=neg_radius,
+          margin=margin,
+          sequence_ids=tf.convert_to_tensor(sequence_ids),
+          multiseq=True
+      )
+      loss_tf = loss_tf.eval()
+      self.assertAllClose(loss_np, loss_tf)
+
+
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/tcn/eval.py
+++ b/research/tcn/eval.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Calculates running validation of TCN models (and baseline comparisons)."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import time
+from estimators.get_estimator import get_estimator
+from utils import util
+import tensorflow as tf
+tf.logging.set_verbosity(tf.logging.INFO)
+
+tf.flags.DEFINE_string(
+    'config_paths', '',
+    """
+    Path to a YAML configuration files defining FLAG values. Multiple files
+    can be separated by the `#` symbol. Files are merged recursively. Setting
+    a key in these files is equivalent to setting the FLAG value with
+    the same name.
+    """)
+tf.flags.DEFINE_string(
+    'model_params', '{}', 'YAML configuration string for the model parameters.')
+tf.app.flags.DEFINE_string('master', 'local',
+                           'BNS name of the TensorFlow master to use')
+tf.app.flags.DEFINE_string(
+    'logdir', '/tmp/tcn', 'Directory where to write event logs.')
+FLAGS = tf.app.flags.FLAGS
+
+
+def main(_):
+  """Runs main eval loop."""
+  # Parse config dict from yaml config files / command line flags.
+  logdir = FLAGS.logdir
+  config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
+
+  # Choose an estimator based on training strategy.
+  estimator = get_estimator(config, logdir)
+
+  # Wait for the first checkpoint file to be written.
+  while not tf.train.latest_checkpoint(logdir):
+    tf.logging.info('Waiting for a checkpoint file...')
+    time.sleep(10)
+
+  # Run validation.
+  while True:
+    estimator.evaluate()
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/tcn/g3doc/alignment.png
+++ b/research/tcn/g3doc/alignment.png
--- a/research/tcn/g3doc/all_error.png
+++ b/research/tcn/g3doc/all_error.png
--- a/research/tcn/g3doc/avg_error.png
+++ b/research/tcn/g3doc/avg_error.png
--- a/research/tcn/g3doc/im.gif
+++ b/research/tcn/g3doc/im.gif
--- a/research/tcn/g3doc/loss.png
+++ b/research/tcn/g3doc/loss.png
--- a/research/tcn/g3doc/pca.png
+++ b/research/tcn/g3doc/pca.png
--- a/research/tcn/g3doc/val_loss.png
+++ b/research/tcn/g3doc/val_loss.png
--- a/research/tcn/generate_videos.py
+++ b/research/tcn/generate_videos.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Generates imitation videos.
+
+Generate single pairwise imitation videos:
+blaze build -c opt --config=cuda --copt=-mavx \
+learning/brain/research/tcn/generate_videos && \
+blaze-bin/learning/brain/research/tcn/generate_videos \
+--logtostderr \
+--config_paths $config_paths \
+--checkpointdir $checkpointdir \
+--checkpoint_iter $checkpoint_iter \
+--query_records_dir $query_records_dir \
+--target_records_dir $target_records_dir \
+--outdir $outdir \
+--mode single \
+--num_query_sequences 1 \
+--num_target_sequences -1
+
+# Generate imitation videos with multiple sequences in the target set:
+query_records_path
+blaze build -c opt --config=cuda --copt=-mavx \
+learning/brain/research/tcn/generate_videos && \
+blaze-bin/learning/brain/research/tcn/generate_videos \
+--logtostderr \
+--config_paths $config_paths \
+--checkpointdir $checkpointdir \
+--checkpoint_iter $checkpoint_iter \
+--query_records_dir $query_records_dir \
+--target_records_dir $target_records_dir \
+--outdir $outdir \
+--num_multi_targets 1 \
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import cv2
+import tensorflow as tf
+import os
+import matplotlib
+matplotlib.use("pdf")
+import matplotlib.animation as animation
+import matplotlib.pyplot as plt
+import numpy as np
+from estimators.get_estimator import get_estimator
+from utils import util
+tf.logging.set_verbosity(tf.logging.INFO)
+
+tf.flags.DEFINE_string(
+    'config_paths', '',
+    """
+    Path to a YAML configuration files defining FLAG values. Multiple files
+    can be separated by the `#` symbol. Files are merged recursively. Setting
+    a key in these files is equivalent to setting the FLAG value with
+    the same name.
+    """)
+tf.flags.DEFINE_string(
+    'model_params', '{}', 'YAML configuration string for the model parameters.')
+tf.app.flags.DEFINE_string(
+    'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.')
+tf.app.flags.DEFINE_string(
+    'checkpoint_iter', '', 'Checkpoint iter to use.')
+tf.app.flags.DEFINE_integer(
+    'num_multi_targets', -1,
+    'Number of imitation vids in the target set per imitation video.')
+tf.app.flags.DEFINE_string(
+    'outdir', '/tmp/tcn', 'Path to write embeddings to.')
+tf.app.flags.DEFINE_string(
+    'mode', 'single', 'single | multi. Single means generate imitation vids'
+                      'where query is being imitated by single sequence. Multi'
+                      'means generate imitation vids where query is being'
+                      'imitated by multiple.')
+tf.app.flags.DEFINE_string('query_records_dir', '',
+                           'Directory of image tfrecords.')
+tf.app.flags.DEFINE_string('target_records_dir', '',
+                           'Directory of image tfrecords.')
+tf.app.flags.DEFINE_integer('query_view', 1,
+                            'Viewpoint of the query video.')
+tf.app.flags.DEFINE_integer('target_view', 0,
+                            'Viewpoint of the imitation video.')
+tf.app.flags.DEFINE_integer('smoothing_window', 5,
+                            'Number of frames to smooth over.')
+tf.app.flags.DEFINE_integer('num_query_sequences', -1,
+                            'Number of query sequences to embed.')
+tf.app.flags.DEFINE_integer('num_target_sequences', -1,
+                            'Number of target sequences to embed.')
+FLAGS = tf.app.flags.FLAGS
+
+
+def SmoothEmbeddings(embs):
+  """Temporally smoothes a sequence of embeddings."""
+  new_embs = []
+  window = int(FLAGS.smoothing_window)
+  for i in range(len(embs)):
+    min_i = max(i-window, 0)
+    max_i = min(i+window, len(embs))
+    new_embs.append(np.mean(embs[min_i:max_i, :], axis=0))
+  return np.array(new_embs)
+
+
+def MakeImitationVideo(
+    outdir, vidname, query_im_strs, knn_im_strs, height=640, width=360):
+  """Creates a KNN imitation video.
+
+  For each frame in vid0, pair with the frame at index in knn_indices in
+  vids1. Write video to disk.
+
+  Args:
+    outdir: String, directory to write videos.
+    vidname: String, name of video.
+    query_im_strs: Numpy array holding query image strings.
+    knn_im_strs: Numpy array holding knn image strings.
+    height: Int, height of raw images.
+    width: Int, width of raw images.
+  """
+  if not tf.gfile.Exists(outdir):
+    tf.gfile.MakeDirs(outdir)
+  vid_path = os.path.join(outdir, vidname)
+  combined = zip(query_im_strs, knn_im_strs)
+
+  # Create and write the video.
+  fig = plt.figure()
+  ax = fig.add_subplot(111)
+  ax.set_aspect('equal')
+  ax.get_xaxis().set_visible(False)
+  ax.get_yaxis().set_visible(False)
+  im = ax.imshow(
+      np.zeros((height, width*2, 3)), cmap='gray', interpolation='nearest')
+  im.set_clim([0, 1])
+  plt.tight_layout(pad=0, w_pad=0, h_pad=0)
+  # pylint: disable=invalid-name
+  def update_img(pair):
+    """Decode pairs of image strings, update a video."""
+    im_i, im_j = pair
+    nparr_i = np.fromstring(str(im_i), np.uint8)
+    img_np_i = cv2.imdecode(nparr_i, 1)
+    img_np_i = img_np_i[..., [2, 1, 0]]
+    nparr_j = np.fromstring(str(im_j), np.uint8)
+    img_np_j = cv2.imdecode(nparr_j, 1)
+    img_np_j = img_np_j[..., [2, 1, 0]]
+
+    # Optionally reshape the images to be same size.
+    frame = np.concatenate([img_np_i, img_np_j], axis=1)
+    im.set_data(frame)
+    return im
+  ani = animation.FuncAnimation(fig, update_img, combined, interval=15)
+  writer = animation.writers['ffmpeg'](fps=15)
+  dpi = 100
+  tf.logging.info('Writing video to:\n %s \n' % vid_path)
+  ani.save('%s.mp4' % vid_path, writer=writer, dpi=dpi)
+
+
+def GenerateImitationVideo(
+    vid_name, query_ims, query_embs, target_ims, target_embs, height, width):
+  """Generates a single cross-sequence imitation video.
+
+  For each frame in some query sequence, find the nearest neighbor from
+  some target sequence in embedding space.
+
+  Args:
+    vid_name: String, the name of the video.
+    query_ims: Numpy array of shape [query sequence length, height, width, 3].
+    query_embs: Numpy array of shape [query sequence length, embedding size].
+    target_ims: Numpy array of shape [target sequence length, height, width,
+      3].
+    target_embs: Numpy array of shape [target sequence length, embedding
+      size].
+    height: Int, height of the raw image.
+    width: Int, width of the raw image.
+  """
+  # For each query frame, find the index of the nearest neighbor in the
+  # target video.
+  knn_indices = [util.KNNIds(q, target_embs, k=1)[0] for q in query_embs]
+
+  # Create and write out the video.
+  assert knn_indices
+  knn_ims = np.array([target_ims[k] for k in knn_indices])
+  MakeImitationVideo(FLAGS.outdir, vid_name, query_ims, knn_ims, height, width)
+
+
+def SingleImitationVideos(
+    query_records, target_records, config, height, width):
+  """Generates pairwise imitation videos.
+
+  This creates all pairs of target imitating query videos, where each frame
+  on the left is matched to a nearest neighbor coming a single
+  embedded target video.
+
+  Args:
+    query_records: List of Strings, paths to tfrecord datasets to use as
+      queries.
+    target_records: List of Strings, paths to tfrecord datasets to use as
+      targets.
+    config: A T object describing training config.
+    height: Int, height of the raw image.
+    width: Int, width of the raw image.
+  """
+  # Embed query and target data.
+  (query_sequences_to_data,
+   target_sequences_to_data) = EmbedQueryTargetData(
+       query_records, target_records, config)
+
+  qview = FLAGS.query_view
+  tview = FLAGS.target_view
+
+  # Loop over query videos.
+  for task_i, data_i in query_sequences_to_data.iteritems():
+    for task_j, data_j in target_sequences_to_data.iteritems():
+      i_ims = data_i['images']
+      i_embs = data_i['embeddings']
+      query_embs = SmoothEmbeddings(i_embs[qview])
+      query_ims = i_ims[qview]
+
+      j_ims = data_j['images']
+      j_embs = data_j['embeddings']
+      target_embs = SmoothEmbeddings(j_embs[tview])
+      target_ims = j_ims[tview]
+
+      tf.logging.info('Generating %s imitating %s video.' % (task_j, task_i))
+      vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_j, tview)
+      vid_name = vid_name.replace('/', '_')
+      GenerateImitationVideo(vid_name, query_ims, query_embs,
+                             target_ims, target_embs, height, width)
+
+
+def MultiImitationVideos(
+    query_records, target_records, config, height, width):
+  """Creates multi-imitation videos.
+
+  This creates videos where every frame on the left is matched to a nearest
+  neighbor coming from a set of multiple embedded target videos.
+
+  Args:
+    query_records: List of Strings, paths to tfrecord datasets to use as
+      queries.
+    target_records: List of Strings, paths to tfrecord datasets to use as
+      targets.
+    config: A T object describing training config.
+    height: Int, height of the raw image.
+    width: Int, width of the raw image.
+  """
+  # Embed query and target data.
+  (query_sequences_to_data,
+   target_sequences_to_data) = EmbedQueryTargetData(
+       query_records, target_records, config)
+
+  qview = FLAGS.query_view
+  tview = FLAGS.target_view
+
+  # Loop over query videos.
+  for task_i, data_i in query_sequences_to_data.iteritems():
+    i_ims = data_i['images']
+    i_embs = data_i['embeddings']
+    query_embs = SmoothEmbeddings(i_embs[qview])
+    query_ims = i_ims[qview]
+
+    all_target_embs = []
+    all_target_ims = []
+
+    # If num_imitation_vids is -1, add all seq embeddings to the target set.
+    if FLAGS.num_multi_targets == -1:
+      num_multi_targets = len(target_sequences_to_data)
+    else:
+      # Else, add some specified number of seq embeddings to the target set.
+      num_multi_targets = FLAGS.num_multi_targets
+    for j in range(num_multi_targets):
+      task_j = target_sequences_to_data.keys()[j]
+      data_j = target_sequences_to_data[task_j]
+      print('Adding %s to target set' % task_j)
+      j_ims = data_j['images']
+      j_embs = data_j['embeddings']
+
+      target_embs = SmoothEmbeddings(j_embs[tview])
+      target_ims = j_ims[tview]
+      all_target_embs.extend(target_embs)
+      all_target_ims.extend(target_ims)
+
+    # Generate a "j imitating i" video.
+    tf.logging.info('Generating all imitating %s video.' % task_i)
+    vid_name = 'q%sv%s_multiv%s' % (task_i, qview, tview)
+    vid_name = vid_name.replace('/', '_')
+    GenerateImitationVideo(vid_name, query_ims, query_embs,
+                           all_target_ims, all_target_embs, height, width)
+
+
+def SameSequenceVideos(query_records, config, height, width):
+  """Generate same sequence, cross-view imitation videos."""
+  batch_size = config.data.embed_batch_size
+
+  # Choose an estimator based on training strategy.
+  estimator = get_estimator(config, FLAGS.checkpointdir)
+
+  # Choose a checkpoint path to restore.
+  checkpointdir = FLAGS.checkpointdir
+  checkpoint_path = os.path.join(checkpointdir,
+                                 'model.ckpt-%s' % FLAGS.checkpoint_iter)
+
+  # Embed num_sequences query sequences, store embeddings and image strings in
+  # query_sequences_to_data.
+  sequences_to_data = {}
+  for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
+      query_records, checkpoint_path, batch_size,
+      num_sequences=FLAGS.num_query_sequences):
+    sequences_to_data[seqname] = {
+        'embeddings': view_embeddings,
+        'images': view_raw_image_strings,
+    }
+
+  # Loop over query videos.
+  qview = FLAGS.query_view
+  tview = FLAGS.target_view
+  for task_i, data_i in sequences_to_data.iteritems():
+    ims = data_i['images']
+    embs = data_i['embeddings']
+    query_embs = SmoothEmbeddings(embs[qview])
+    query_ims = ims[qview]
+
+    target_embs = SmoothEmbeddings(embs[tview])
+    target_ims = ims[tview]
+
+    tf.logging.info('Generating %s imitating %s video.' % (task_i, task_i))
+    vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_i, tview)
+    vid_name = vid_name.replace('/', '_')
+    GenerateImitationVideo(vid_name, query_ims, query_embs,
+                           target_ims, target_embs, height, width)
+
+
+def EmbedQueryTargetData(query_records, target_records, config):
+  """Embeds the full set of query_records and target_records.
+
+  Args:
+    query_records: List of Strings, paths to tfrecord datasets to use as
+      queries.
+    target_records: List of Strings, paths to tfrecord datasets to use as
+      targets.
+    config: A T object describing training config.
+
+  Returns:
+    query_sequences_to_data: A dict holding 'embeddings' and 'images'
+    target_sequences_to_data: A dict holding 'embeddings' and 'images'
+  """
+  batch_size = config.data.embed_batch_size
+
+  # Choose an estimator based on training strategy.
+  estimator = get_estimator(config, FLAGS.checkpointdir)
+
+  # Choose a checkpoint path to restore.
+  checkpointdir = FLAGS.checkpointdir
+  checkpoint_path = os.path.join(checkpointdir,
+                                 'model.ckpt-%s' % FLAGS.checkpoint_iter)
+
+  # Embed num_sequences query sequences, store embeddings and image strings in
+  # query_sequences_to_data.
+  num_query_sequences = FLAGS.num_query_sequences
+  num_target_sequences = FLAGS.num_target_sequences
+  query_sequences_to_data = {}
+  for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
+      query_records, checkpoint_path, batch_size,
+      num_sequences=num_query_sequences):
+    query_sequences_to_data[seqname] = {
+        'embeddings': view_embeddings,
+        'images': view_raw_image_strings,
+    }
+
+  if (query_records == target_records) and (
+      num_query_sequences == num_target_sequences):
+    target_sequences_to_data = query_sequences_to_data
+  else:
+    # Embed num_sequences target sequences, store embeddings and image strings
+    # in sequences_to_data.
+    target_sequences_to_data = {}
+    for (view_embeddings, view_raw_image_strings,
+         seqname) in estimator.inference(
+             target_records, checkpoint_path, batch_size,
+             num_sequences=num_target_sequences):
+      target_sequences_to_data[seqname] = {
+          'embeddings': view_embeddings,
+          'images': view_raw_image_strings,
+      }
+  return query_sequences_to_data, target_sequences_to_data
+
+
+def main(_):
+  # Parse config dict from yaml config files / command line flags.
+  config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
+
+  # Get tables to embed.
+  query_records_dir = FLAGS.query_records_dir
+  query_records = util.GetFilesRecursively(query_records_dir)
+
+  target_records_dir = FLAGS.target_records_dir
+  target_records = util.GetFilesRecursively(target_records_dir)
+
+  height = config.data.raw_height
+  width = config.data.raw_width
+  mode = FLAGS.mode
+  if mode == 'multi':
+    # Generate videos where target set is composed of multiple videos.
+    MultiImitationVideos(query_records, target_records, config,
+                         height, width)
+  elif mode == 'single':
+    # Generate videos where target set is a single video.
+    SingleImitationVideos(query_records, target_records, config,
+                          height, width)
+  elif mode == 'same':
+    # Generate videos where target set is the same as query, but diff view.
+    SameSequenceVideos(query_records, config, height, width)
+  else:
+    raise ValueError('Unknown mode %s' % mode)
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/tcn/labeled_eval.py
+++ b/research/tcn/labeled_eval.py
--- a/research/tcn/labeled_eval_test.py
+++ b/research/tcn/labeled_eval_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for tcn.labeled_eval."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import labeled_eval
+import tensorflow as tf
+
+
+class LabeledEvalTest(tf.test.TestCase):
+
+  def testNearestCrossSequenceNeighbors(self):
+    # Generate embeddings.
+    num_data = 64
+    embedding_size = 4
+    num_tasks = 8
+    n_neighbors = 2
+    data = np.random.randn(num_data, embedding_size)
+    tasks = np.repeat(range(num_tasks), num_data // num_tasks)
+
+    # Get nearest cross-sequence indices.
+    indices = labeled_eval.nearest_cross_sequence_neighbors(
+        data, tasks, n_neighbors=n_neighbors)
+
+    # Assert that no nearest neighbor indices come from the same task.
+    repeated_tasks = np.tile(np.reshape(tasks, (num_data, 1)), n_neighbors)
+    self.assertTrue(np.all(np.not_equal(repeated_tasks, tasks[indices])))
+
+  def testPerfectCrossSequenceRecall(self):
+    # Make sure cross-sequence recall@k returns 1.0 for near-duplicate features.
+    embeddings = np.random.randn(10, 2)
+    embeddings[5:, :] = 0.00001 + embeddings[:5, :]
+    tasks = np.repeat([0, 1], 5)
+    labels = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
+    # find k=1, k=2 nearest neighbors.
+    k_list = [1, 2]
+
+    # Compute knn indices.
+    indices = labeled_eval.nearest_cross_sequence_neighbors(
+        embeddings, tasks, n_neighbors=max(k_list))
+    retrieved_labels = labels[indices]
+    recall_list = labeled_eval.compute_cross_sequence_recall_at_k(
+        retrieved_labels=retrieved_labels,
+        labels=labels,
+        k_list=k_list)
+    self.assertTrue(np.allclose(
+        np.array(recall_list), np.array([1.0, 1.0])))
+
+  def testRelativeRecall(self):
+    # Make sure cross-sequence recall@k is strictly non-decreasing over k.
+    num_data = 100
+    num_tasks = 10
+    embeddings = np.random.randn(100, 5)
+    tasks = np.repeat(range(num_tasks), num_data // num_tasks)
+    labels = np.random.randint(0, 5, 100)
+
+    k_list = [1, 2, 4, 8, 16, 32, 64]
+    indices = labeled_eval.nearest_cross_sequence_neighbors(
+        embeddings, tasks, n_neighbors=max(k_list))
+    retrieved_labels = labels[indices]
+    recall_list = labeled_eval.compute_cross_sequence_recall_at_k(
+        retrieved_labels=retrieved_labels,
+        labels=labels,
+        k_list=k_list)
+    recall_list_sorted = sorted(recall_list)
+    self.assertTrue(np.allclose(
+        np.array(recall_list), np.array(recall_list_sorted)))
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/research/tcn/model.py
+++ b/research/tcn/model.py
--- a/research/tcn/preprocessing.py
+++ b/research/tcn/preprocessing.py
--- a/research/tcn/train.py
+++ b/research/tcn/train.py
--- a/research/tcn/utils/luatables.py
+++ b/research/tcn/utils/luatables.py
--- a/research/tcn/utils/progress.py
+++ b/research/tcn/utils/progress.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A utility class for reporting processing progress."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import datetime
+
+
+class Progress(object):
+  """A utility class for reporting processing progress."""
+
+  def __init__(self, target_size):
+    self.target_size = target_size
+    self.current_size = 0
+    self.start_time = datetime.datetime.now()
+
+  def Update(self, current_size):
+    """Replaces internal current_size with current_size."""
+    self.current_size = current_size
+
+  def Add(self, size):
+    """Increments internal current_size by size."""
+    self.current_size += size
+
+  def __str__(self):
+    processed = 1e-5 + self.current_size / float(self.target_size)
+    current_time = datetime.datetime.now()
+    elapsed = current_time - self.start_time
+    eta = datetime.timedelta(
+        seconds=elapsed.total_seconds() / processed - elapsed.total_seconds())
+    return "%d / %d (elapsed %s eta %s)" % (
+        self.current_size, self.target_size,
+        str(elapsed).split(".")[0],
+        str(eta).split(".")[0])
--- a/research/tcn/utils/util.py
+++ b/research/tcn/utils/util.py