Merge branch 'master' into patch-6

78ddf6eb · cclauss · GitHub · 50cb0365 · 1f34fcaf · 78ddf6eb
Unverified Commit 78ddf6eb authored Jan 26, 2018 by cclauss Committed by GitHub Jan 26, 2018
20 changed files
--- a/research/tcn/download_pretrained.py
+++ b/research/tcn/download_pretrained.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Downloads pretrained InceptionV3 and ResnetV2-50 checkpoints."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import tarfile
+import urllib
+INCEPTION_URL = 'http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz'
+RESNET_URL = 'http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz'
+def DownloadWeights(model_dir, url):
+  os.makedirs(model_dir)
+  tar_path = os.path.join(model_dir, 'ckpt.tar.gz')
+  urllib.urlretrieve(url, tar_path)
+  tar = tarfile.open(os.path.join(model_dir, 'ckpt.tar.gz'))
+  tar.extractall(model_dir)
+if __name__ == '__main__':
+  # Create a directory for all pretrained checkpoints.
+  ckpt_dir = 'pretrained_checkpoints'
+  if not os.path.exists(ckpt_dir):
+    os.makedirs(ckpt_dir)
+  # Download inception.
+  print('Downloading inception pretrained weights...')
+  inception_dir = os.path.join(ckpt_dir, 'inception')
+  DownloadWeights(inception_dir, INCEPTION_URL)
+  print('Done downloading inception pretrained weights.')
+  print('Downloading resnet pretrained weights...')
+  resnet_dir = os.path.join(ckpt_dir, 'resnet')
+  DownloadWeights(resnet_dir, RESNET_URL)
+  print('Done downloading resnet pretrained weights.')
--- a/research/tcn/estimators/base_estimator.py
+++ b/research/tcn/estimators/base_estimator.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Base estimator defining TCN training, test, and inference."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from abc import ABCMeta
+from abc import abstractmethod
+import os
+import numpy as np
+import numpy as np
+import data_providers
+import preprocessing
+from utils import util
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+from tensorflow.contrib.tpu.python.tpu import tpu_config
+from tensorflow.contrib.tpu.python.tpu import tpu_estimator
+from tensorflow.contrib.tpu.python.tpu import tpu_optimizer
+from tensorflow.python.training import session_run_hook
+tf.app.flags.DEFINE_integer(
+    'tf_random_seed', 0, 'Random seed.')
+FLAGS = tf.app.flags.FLAGS
+class InitFromPretrainedCheckpointHook(session_run_hook.SessionRunHook):
+  """Hook that can init graph from a pretrained checkpoint."""
+  def __init__(self, pretrained_checkpoint_dir):
+    """Initializes a `InitFromPretrainedCheckpointHook`.
+    Args:
+      pretrained_checkpoint_dir: The dir of pretrained checkpoint.
+    Raises:
+      ValueError: If pretrained_checkpoint_dir is invalid.
+    """
+    if pretrained_checkpoint_dir is None:
+      raise ValueError('pretrained_checkpoint_dir must be specified.')
+    self._pretrained_checkpoint_dir = pretrained_checkpoint_dir
+  def begin(self):
+    checkpoint_reader = tf.contrib.framework.load_checkpoint(
+        self._pretrained_checkpoint_dir)
+    variable_shape_map = checkpoint_reader.get_variable_to_shape_map()
+    exclude_scopes = 'logits/,final_layer/,aux_'
+    # Skip restoring global_step as to run fine tuning from step=0.
+    exclusions = ['global_step']
+    if exclude_scopes:
+      exclusions.extend([scope.strip() for scope in exclude_scopes.split(',')])
+    variable_to_restore = tf.contrib.framework.get_model_variables()
+    # Variable filtering by given exclude_scopes.
+    filtered_variables_to_restore = {}
+    for v in variable_to_restore:
+      excluded = False
+      for exclusion in exclusions:
+        if v.name.startswith(exclusion):
+          excluded = True
+          break
+      if not excluded:
+        var_name = v.name.split(':')[0]
+        filtered_variables_to_restore[var_name] = v
+    # Final filter by checking shape matching and skipping variables that
+    # are not in the checkpoint.
+    final_variables_to_restore = {}
+    for var_name, var_tensor in filtered_variables_to_restore.iteritems():
+      if var_name not in variable_shape_map:
+        # Try moving average version of variable.
+        var_name = os.path.join(var_name, 'ExponentialMovingAverage')
+        if var_name not in variable_shape_map:
+          tf.logging.info(
+              'Skip init [%s] because it is not in ckpt.', var_name)
+          # Skip variables not in the checkpoint.
+          continue
+      if not var_tensor.get_shape().is_compatible_with(
+          variable_shape_map[var_name]):
+        # Skip init variable from ckpt if shape dismatch.
+        tf.logging.info(
+            'Skip init [%s] from [%s] in ckpt because shape dismatch: %s vs %s',
+            var_tensor.name, var_name,
+            var_tensor.get_shape(), variable_shape_map[var_name])
+        continue
+      tf.logging.info('Init %s from %s in ckpt' % (var_tensor, var_name))
+      final_variables_to_restore[var_name] = var_tensor
+    self._init_fn = tf.contrib.framework.assign_from_checkpoint_fn(
+        self._pretrained_checkpoint_dir,
+        final_variables_to_restore)
+  def after_create_session(self, session, coord):
+    tf.logging.info('Restoring InceptionV3 weights.')
+    self._init_fn(session)
+    tf.logging.info('Done restoring InceptionV3 weights.')
+class BaseEstimator(object):
+  """Abstract TCN base estimator class."""
+  __metaclass__ = ABCMeta
+  def __init__(self, config, logdir):
+    """Constructor.
+    Args:
+      config: A Luatable-like T object holding training config.
+      logdir: String, a directory where checkpoints and summaries are written.
+    """
+    self._config = config
+    self._logdir = logdir
+  @abstractmethod
+  def construct_input_fn(self, records, is_training):
+    """Builds an estimator input_fn.
+    The input_fn is used to pass feature and target data to the train,
+    evaluate, and predict methods of the Estimator.
+    Method to be overridden by implementations.
+    Args:
+      records: A list of Strings, paths to TFRecords with image data.
+      is_training: Boolean, whether or not we're training.
+    Returns:
+      Function, that has signature of ()->(dict of features, target).
+        features is a dict mapping feature names to `Tensors`
+        containing the corresponding feature data (typically, just a single
+        key/value pair 'raw_data' -> image `Tensor` for TCN.
+        labels is a 1-D int32 `Tensor` holding labels.
+    """
+    pass
+  def preprocess_data(self, images, is_training):
+    """Preprocesses raw images for either training or inference.
+    Args:
+      images: A 4-D float32 `Tensor` holding images to preprocess.
+      is_training: Boolean, whether or not we're in training.
+    Returns:
+      data_preprocessed: data after the preprocessor.
+    """
+    config = self._config
+    height = config.data.height
+    width = config.data.width
+    min_scale = config.data.augmentation.minscale
+    max_scale = config.data.augmentation.maxscale
+    p_scale_up = config.data.augmentation.proportion_scaled_up
+    aug_color = config.data.augmentation.color
+    fast_mode = config.data.augmentation.fast_mode
+    crop_strategy = config.data.preprocessing.eval_cropping
+    preprocessed_images = preprocessing.preprocess_images(
+        images, is_training, height, width,
+        min_scale, max_scale, p_scale_up,
+        aug_color=aug_color, fast_mode=fast_mode,
+        crop_strategy=crop_strategy)
+    return preprocessed_images
+  @abstractmethod
+  def forward(self, images, is_training, reuse=False):
+    """Defines the forward pass that converts batch images to embeddings.
+    Method to be overridden by implementations.
+    Args:
+      images: A 4-D float32 `Tensor` holding images to be embedded.
+      is_training: Boolean, whether or not we're in training mode.
+      reuse: Boolean, whether or not to reuse embedder.
+    Returns:
+      embeddings: A 2-D float32 `Tensor` holding embedded images.
+    """
+    pass
+  @abstractmethod
+  def define_loss(self, embeddings, labels, is_training):
+    """Defines the loss function on the embedding vectors.
+    Method to be overridden by implementations.
+    Args:
+      embeddings: A 2-D float32 `Tensor` holding embedded images.
+      labels: A 1-D int32 `Tensor` holding problem labels.
+      is_training: Boolean, whether or not we're in training mode.
+    Returns:
+      loss: tf.float32 scalar.
+    """
+    pass
+  @abstractmethod
+  def define_eval_metric_ops(self):
+    """Defines the dictionary of eval metric tensors.
+    Method to be overridden by implementations.
+    Returns:
+      eval_metric_ops:  A dict of name/value pairs specifying the
+        metrics that will be calculated when the model runs in EVAL mode.
+    """
+    pass
+  def get_train_op(self, loss):
+    """Creates a training op.
+    Args:
+      loss: A float32 `Tensor` representing the total training loss.
+    Returns:
+      train_op: A slim.learning.create_train_op train_op.
+    Raises:
+      ValueError: If specified optimizer isn't supported.
+    """
+    # Get variables to train (defined in subclass).
+    assert self.variables_to_train
+    # Define a learning rate schedule.
+    decay_steps = self._config.learning.decay_steps
+    decay_factor = self._config.learning.decay_factor
+    learning_rate = float(self._config.learning.learning_rate)
+    # Define a learning rate schedule.
+    global_step = slim.get_or_create_global_step()
+    learning_rate = tf.train.exponential_decay(
+        learning_rate,
+        global_step,
+        decay_steps,
+        decay_factor,
+        staircase=True)
+    # Create an optimizer.
+    opt_type = self._config.learning.optimizer
+    if opt_type == 'adam':
+      opt = tf.train.AdamOptimizer(learning_rate)
+    elif opt_type == 'momentum':
+      opt = tf.train.MomentumOptimizer(learning_rate, 0.9)
+    elif opt_type == 'rmsprop':
+      opt = tf.train.RMSPropOptimizer(learning_rate, momentum=0.9,
+                                      epsilon=1.0, decay=0.9)
+    else:
+      raise ValueError('Unsupported optimizer %s' % opt_type)
+    if self._config.use_tpu:
+      opt = tpu_optimizer.CrossShardOptimizer(opt)
+    # Create a training op.
+    # train_op = opt.minimize(loss, var_list=self.variables_to_train)
+    # Create a training op.
+    train_op = slim.learning.create_train_op(
+        loss,
+        optimizer=opt,
+        variables_to_train=self.variables_to_train,
+        update_ops=tf.get_collection(tf.GraphKeys.UPDATE_OPS))
+    return train_op
+  def _get_model_fn(self):
+    """Defines behavior for training, evaluation, and inference (prediction).
+    Returns:
+      `model_fn` for `Estimator`.
+    """
+    # pylint: disable=unused-argument
+    def model_fn(features, labels, mode, params):
+      """Build the model based on features, labels, and mode.
+      Args:
+        features: Dict, strings to `Tensor` input data, returned by the
+          input_fn.
+        labels: The labels Tensor returned by the input_fn.
+        mode: A string indicating the mode. This will be either
+          tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.PREDICT,
+          or tf.estimator.ModeKeys.EVAL.
+        params: A dict holding training parameters, passed in during TPU
+          training.
+      Returns:
+        A tf.estimator.EstimatorSpec specifying train/test/inference behavior.
+      """
+      is_training = mode == tf.estimator.ModeKeys.TRAIN
+      # Get preprocessed images from the features dict.
+      batch_preprocessed = features['batch_preprocessed']
+      # Do a forward pass to embed data.
+      batch_encoded = self.forward(batch_preprocessed, is_training)
+      # Optionally set the pretrained initialization function.
+      initializer_fn = None
+      if mode == tf.estimator.ModeKeys.TRAIN:
+        initializer_fn = self.pretrained_init_fn
+      # If we're training or evaluating, define total loss.
+      total_loss = None
+      if mode in (tf.estimator.ModeKeys.TRAIN, tf.estimator.ModeKeys.EVAL):
+        loss = self.define_loss(batch_encoded, labels, is_training)
+        tf.losses.add_loss(loss)
+        total_loss = tf.losses.get_total_loss()
+      # If we're training, define a train op.
+      train_op = None
+      if mode == tf.estimator.ModeKeys.TRAIN:
+        train_op = self.get_train_op(total_loss)
+      # If we're doing inference, set the output to be the embedded images.
+      predictions_dict = None
+      if mode == tf.estimator.ModeKeys.PREDICT:
+        predictions_dict = {'embeddings': batch_encoded}
+        # Pass through additional metadata stored in features.
+        for k, v in features.iteritems():
+          predictions_dict[k] = v
+      # If we're evaluating, define some eval metrics.
+      eval_metric_ops = None
+      if mode == tf.estimator.ModeKeys.EVAL:
+        eval_metric_ops = self.define_eval_metric_ops()
+      # Define training scaffold to load pretrained weights.
+      num_checkpoint_to_keep = self._config.logging.checkpoint.num_to_keep
+      saver = tf.train.Saver(
+          max_to_keep=num_checkpoint_to_keep)
+      if is_training and self._config.use_tpu:
+        # TPU doesn't have a scaffold option at the moment, so initialize
+        # pretrained weights using a custom train_hook instead.
+        return tpu_estimator.TPUEstimatorSpec(
+            mode,
+            loss=total_loss,
+            eval_metrics=None,
+            train_op=train_op,
+            predictions=predictions_dict)
+      else:
+        # Build a scaffold to initialize pretrained weights.
+        scaffold = tf.train.Scaffold(
+            init_fn=initializer_fn,
+            saver=saver,
+            summary_op=None)
+        return tf.estimator.EstimatorSpec(
+            mode=mode,
+            predictions=predictions_dict,
+            loss=total_loss,
+            train_op=train_op,
+            eval_metric_ops=eval_metric_ops,
+            scaffold=scaffold)
+    return model_fn
+  def train(self):
+    """Runs training."""
+    # Get a list of training tfrecords.
+    config = self._config
+    training_dir = config.data.training
+    training_records = util.GetFilesRecursively(training_dir)
+    # Define batch size.
+    self._batch_size = config.data.batch_size
+    # Create a subclass-defined training input function.
+    train_input_fn = self.construct_input_fn(
+        training_records, is_training=True)
+    # Create the estimator.
+    estimator = self._build_estimator(is_training=True)
+    train_hooks = None
+    if config.use_tpu:
+      # TPU training initializes pretrained weights using a custom train hook.
+      train_hooks = []
+      if tf.train.latest_checkpoint(self._logdir) is None:
+        train_hooks.append(
+            InitFromPretrainedCheckpointHook(
+                config[config.embedder_strategy].pretrained_checkpoint))
+    # Run training.
+    estimator.train(input_fn=train_input_fn, hooks=train_hooks,
+                    steps=config.learning.max_step)
+  def _build_estimator(self, is_training):
+    """Returns an Estimator object.
+    Args:
+      is_training: Boolean, whether or not we're in training mode.
+    Returns:
+      A tf.estimator.Estimator.
+    """
+    config = self._config
+    save_checkpoints_steps = config.logging.checkpoint.save_checkpoints_steps
+    keep_checkpoint_max = self._config.logging.checkpoint.num_to_keep
+    if is_training and config.use_tpu:
+      iterations = config.tpu.iterations
+      num_shards = config.tpu.num_shards
+      run_config = tpu_config.RunConfig(
+          save_checkpoints_secs=None,
+          save_checkpoints_steps=save_checkpoints_steps,
+          keep_checkpoint_max=keep_checkpoint_max,
+          master=FLAGS.master,
+          evaluation_master=FLAGS.master,
+          model_dir=self._logdir,
+          tpu_config=tpu_config.TPUConfig(
+              iterations_per_loop=iterations,
+              num_shards=num_shards,
+              per_host_input_for_training=num_shards <= 8),
+          tf_random_seed=FLAGS.tf_random_seed)
+      batch_size = config.data.batch_size
+      return tpu_estimator.TPUEstimator(
+          model_fn=self._get_model_fn(),
+          config=run_config,
+          use_tpu=True,
+          train_batch_size=batch_size,
+          eval_batch_size=batch_size)
+    else:
+      run_config = tf.estimator.RunConfig().replace(
+          model_dir=self._logdir,
+          save_checkpoints_steps=save_checkpoints_steps,
+          keep_checkpoint_max=keep_checkpoint_max,
+          tf_random_seed=FLAGS.tf_random_seed)
+      return tf.estimator.Estimator(
+          model_fn=self._get_model_fn(),
+          config=run_config)
+  def evaluate(self):
+    """Runs `Estimator` validation.
+    """
+    config = self._config
+    # Get a list of validation tfrecords.
+    validation_dir = config.data.validation
+    validation_records = util.GetFilesRecursively(validation_dir)
+    # Define batch size.
+    self._batch_size = config.data.batch_size
+    # Create a subclass-defined training input function.
+    validation_input_fn = self.construct_input_fn(
+        validation_records, False)
+    # Create the estimator.
+    estimator = self._build_estimator(is_training=False)
+    # Run validation.
+    eval_batch_size = config.data.batch_size
+    num_eval_samples = config.val.num_eval_samples
+    num_eval_batches = int(num_eval_samples / eval_batch_size)
+    estimator.evaluate(input_fn=validation_input_fn, steps=num_eval_batches)
+  def inference(
+      self, inference_input, checkpoint_path, batch_size=None, **kwargs):
+    """Defines 3 of modes of inference.
+    Inputs:
+    * Mode 1: Input is an input_fn.
+    * Mode 2: Input is a TFRecord (or list of TFRecords).
+    * Mode 3: Input is a numpy array holding an image (or array of images).
+    Outputs:
+    * Mode 1: this returns an iterator over embeddings and additional
+      metadata. See
+      https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator#predict
+      for details.
+    * Mode 2: Returns an iterator over tuples of
+      (embeddings, raw_image_strings, sequence_name), where embeddings is a
+      2-D float32 numpy array holding [sequence_size, embedding_size] image
+      embeddings, raw_image_strings is a 1-D string numpy array holding
+      [sequence_size] jpeg-encoded image strings, and sequence_name is a
+      string holding the name of the embedded sequence.
+    * Mode 3: Returns a tuple of (embeddings, raw_image_strings), where
+      embeddings is a 2-D float32 numpy array holding
+      [batch_size, embedding_size] image embeddings, raw_image_strings is a
+      1-D string numpy array holding [batch_size] jpeg-encoded image strings.
+    Args:
+      inference_input: This can be a tf.Estimator input_fn, a TFRecord path,
+        a list of TFRecord paths, a numpy image, or an array of numpy images.
+      checkpoint_path: String, path to the checkpoint to restore for inference.
+      batch_size: Int, the size of the batch to use for inference.
+      **kwargs: Additional keyword arguments, depending on the mode.
+        See _input_fn_inference, _tfrecord_inference, and _np_inference.
+    Returns:
+      inference_output: Inference output depending on mode, see above for
+        details.
+    Raises:
+      ValueError: If inference_input isn't a tf.Estimator input_fn,
+        a TFRecord path, a list of TFRecord paths, or a numpy array,
+    """
+    # Mode 1: input is a callable tf.Estimator input_fn.
+    if callable(inference_input):
+      return self._input_fn_inference(
+          input_fn=inference_input, checkpoint_path=checkpoint_path, **kwargs)
+    # Mode 2: Input is a TFRecord path (or list of TFRecord paths).
+    elif util.is_tfrecord_input(inference_input):
+      return self._tfrecord_inference(
+          records=inference_input, checkpoint_path=checkpoint_path,
+          batch_size=batch_size, **kwargs)
+    # Mode 3: Input is a numpy array of raw images.
+    elif util.is_np_array(inference_input):
+      return self._np_inference(
+          np_images=inference_input, checkpoint_path=checkpoint_path, **kwargs)
+    else:
+      raise ValueError(
+          'inference input must be a tf.Estimator input_fn, a TFRecord path,'
+          'a list of TFRecord paths, or a numpy array. Got: %s' % str(type(
+              inference_input)))
+  def _input_fn_inference(self, input_fn, checkpoint_path, predict_keys=None):
+    """Mode 1: tf.Estimator inference.
+    Args:
+      input_fn: Function, that has signature of ()->(dict of features, None).
+        This is a function called by the estimator to get input tensors (stored
+        in the features dict) to do inference over.
+      checkpoint_path: String, path to a specific checkpoint to restore.
+      predict_keys: List of strings, the keys of the `Tensors` in the features
+        dict (returned by the input_fn) to evaluate during inference.
+    Returns:
+      predictions: An Iterator, yielding evaluated values of `Tensors`
+        specified in `predict_keys`.
+    """
+    # Create the estimator.
+    estimator = self._build_estimator(is_training=False)
+    # Create an iterator of predicted embeddings.
+    predictions = estimator.predict(input_fn=input_fn,
+                                    checkpoint_path=checkpoint_path,
+                                    predict_keys=predict_keys)
+    return predictions
+  def _tfrecord_inference(self, records, checkpoint_path, batch_size,
+                          num_sequences=-1, reuse=False):
+    """Mode 2: TFRecord inference.
+    Args:
+      records: List of strings, paths to TFRecords.
+      checkpoint_path: String, path to a specific checkpoint to restore.
+      batch_size: Int, size of inference batch.
+      num_sequences: Int, number of sequences to embed. If -1,
+        embed everything.
+      reuse: Boolean, whether or not to reuse embedder weights.
+    Yields:
+      (embeddings, raw_image_strings, sequence_name):
+        embeddings is a 2-D float32 numpy array holding
+        [sequence_size, embedding_size] image embeddings.
+        raw_image_strings is a 1-D string numpy array holding
+        [sequence_size] jpeg-encoded image strings.
+        sequence_name is a string holding the name of the embedded sequence.
+    """
+    tf.reset_default_graph()
+    if not isinstance(records, list):
+      records = list(records)
+    # Map the list of tfrecords to a dataset of preprocessed images.
+    num_views = self._config.data.num_views
+    (views, task, seq_len) = data_providers.full_sequence_provider(
+        records, num_views)
+    tensor_dict = {
+        'raw_image_strings': views,
+        'task': task,
+        'seq_len': seq_len
+    }
+    # Create a preprocess function over raw image string placeholders.
+    image_str_placeholder = tf.placeholder(tf.string, shape=[None])
+    decoded = preprocessing.decode_images(image_str_placeholder)
+    decoded.set_shape([batch_size, None, None, 3])
+    preprocessed = self.preprocess_data(decoded, is_training=False)
+    # Create an inference graph over preprocessed images.
+    embeddings = self.forward(preprocessed, is_training=False, reuse=reuse)
+    # Create a saver to restore model variables.
+    tf.train.get_or_create_global_step()
+    saver = tf.train.Saver(tf.all_variables())
+    # Create a session and restore model variables.
+    with tf.train.MonitoredSession() as sess:
+      saver.restore(sess, checkpoint_path)
+      cnt = 0
+      # If num_sequences is specified, embed that many sequences, else embed
+      # everything.
+      try:
+        while cnt < num_sequences if num_sequences != -1 else True:
+          # Get a preprocessed image sequence.
+          np_data = sess.run(tensor_dict)
+          np_raw_images = np_data['raw_image_strings']
+          np_seq_len = np_data['seq_len']
+          np_task = np_data['task']
+          # Embed each view.
+          embedding_size = self._config.embedding_size
+          view_embeddings = [
+              np.zeros((0, embedding_size)) for _ in range(num_views)]
+          for view_index in range(num_views):
+            view_raw = np_raw_images[view_index]
+            # Embed the full sequence.
+            t = 0
+            while t < np_seq_len:
+              # Decode and preprocess the batch of image strings.
+              embeddings_np = sess.run(
+                  embeddings, feed_dict={
+                      image_str_placeholder: view_raw[t:t+batch_size]})
+              view_embeddings[view_index] = np.append(
+                  view_embeddings[view_index], embeddings_np, axis=0)
+              tf.logging.info('Embedded %d images for task %s' % (t, np_task))
+              t += batch_size
+          # Done embedding for all views.
+          view_raw_images = np_data['raw_image_strings']
+          yield (view_embeddings, view_raw_images, np_task)
+          cnt += 1
+      except tf.errors.OutOfRangeError:
+        tf.logging.info('Done embedding entire dataset.')
+  def _np_inference(self, np_images, checkpoint_path):
+    """Mode 3: Call this repeatedly to do inference over numpy images.
+    This mode is for when we we want to do real-time inference over
+    some stream of images (represented as numpy arrays).
+    Args:
+      np_images: A float32 numpy array holding images to embed.
+      checkpoint_path: String, path to a specific checkpoint to restore.
+    Returns:
+      (embeddings, raw_image_strings):
+        embeddings is a 2-D float32 numpy array holding
+        [inferred batch_size, embedding_size] image embeddings.
+        raw_image_strings is a 1-D string numpy array holding
+        [inferred batch_size] jpeg-encoded image strings.
+    """
+    if isinstance(np_images, list):
+      np_images = np.asarray(np_images)
+    # Add a batch dimension if only 3-dimensional.
+    if len(np_images.shape) == 3:
+      np_images = np.expand_dims(np_images, axis=0)
+    # If np_images are in the range [0,255], convert to [0,1].
+    assert np.min(np_images) >= 0.
+    if (np.min(np_images), np.max(np_images)) == (0, 255):
+      np_images = np_images.astype(np.float32) / 255.
+      assert (np.min(np_images), np.max(np_images)) == (0., 1.)
+    # If this is the first pass, set up inference graph.
+    if not hasattr(self, '_np_inf_tensor_dict'):
+      self._setup_np_inference(np_images, checkpoint_path)
+    # Convert np_images to embeddings.
+    np_tensor_dict = self._sess.run(self._np_inf_tensor_dict, feed_dict={
+        self._image_placeholder: np_images
+    })
+    return np_tensor_dict['embeddings'], np_tensor_dict['raw_image_strings']
+  def _setup_np_inference(self, np_images, checkpoint_path):
+    """Sets up and restores inference graph, creates and caches a Session."""
+    tf.logging.info('Restoring model weights.')
+    # Define inference over an image placeholder.
+    _, height, width, _ = np.shape(np_images)
+    image_placeholder = tf.placeholder(
+        tf.float32, shape=(None, height, width, 3))
+    # Preprocess batch.
+    preprocessed = self.preprocess_data(image_placeholder, is_training=False)
+    # Unscale and jpeg encode preprocessed images for display purposes.
+    im_strings = preprocessing.unscale_jpeg_encode(preprocessed)
+    # Do forward pass to get embeddings.
+    embeddings = self.forward(preprocessed, is_training=False)
+    # Create a saver to restore model variables.
+    tf.train.get_or_create_global_step()
+    saver = tf.train.Saver(tf.all_variables())
+    self._image_placeholder = image_placeholder
+    self._batch_encoded = embeddings
+    self._np_inf_tensor_dict = {
+        'embeddings': embeddings,
+        'raw_image_strings': im_strings,
+    }
+    # Create a session and restore model variables.
+    self._sess = tf.Session()
+    saver.restore(self._sess, checkpoint_path)
--- a/research/tcn/estimators/get_estimator.py
+++ b/research/tcn/estimators/get_estimator.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Get a configured estimator."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from estimators import mvtcn_estimator as mvtcn_estimators
+from estimators import svtcn_estimator
+def get_mvtcn_estimator(loss_strategy, config, logdir):
+  """Returns a configured MVTCN estimator."""
+  loss_to_trainer = {
+      'triplet_semihard': mvtcn_estimators.MVTCNTripletEstimator,
+      'npairs': mvtcn_estimators.MVTCNNpairsEstimator,
+  }
+  if loss_strategy not in loss_to_trainer:
+    raise ValueError('Unknown loss for MVTCN: %s' % loss_strategy)
+  estimator = loss_to_trainer[loss_strategy](config, logdir)
+  return estimator
+def get_estimator(config, logdir):
+  """Returns an unsupervised model trainer based on config.
+  Args:
+    config: A T object holding training configs.
+    logdir: String, path to directory where model checkpoints and summaries
+      are saved.
+  Returns:
+    estimator: A configured `TCNEstimator` object.
+  Raises:
+    ValueError: If unknown training strategy is specified.
+  """
+  # Get the training strategy.
+  training_strategy = config.training_strategy
+  if training_strategy == 'mvtcn':
+    loss_strategy = config.loss_strategy
+    estimator = get_mvtcn_estimator(
+        loss_strategy, config, logdir)
+  elif training_strategy == 'svtcn':
+    estimator = svtcn_estimator.SVTCNTripletEstimator(config, logdir)
+  else:
+    raise ValueError('Unknown training strategy: %s' % training_strategy)
+  return estimator
--- a/research/tcn/estimators/mvtcn_estimator.py
+++ b/research/tcn/estimators/mvtcn_estimator.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""MVTCN trainer implementations with various metric learning losses."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+import data_providers
+import model as model_module
+from estimators import base_estimator
+import tensorflow as tf
+class MVTCNEstimator(base_estimator.BaseEstimator):
+  """Multi-view TCN base class."""
+  def __init__(self, config, logdir):
+    super(MVTCNEstimator, self).__init__(config, logdir)
+  def _pairs_provider(self, records, is_training):
+    config = self._config
+    num_views = config.data.num_views
+    window = config.mvtcn.window
+    num_parallel_calls = config.data.num_parallel_calls
+    sequence_prefetch_size = config.data.sequence_prefetch_size
+    batch_prefetch_size = config.data.batch_prefetch_size
+    examples_per_seq = config.data.examples_per_sequence
+    return functools.partial(
+        data_providers.multiview_pairs_provider,
+        file_list=records,
+        preprocess_fn=self.preprocess_data,
+        num_views=num_views,
+        window=window,
+        is_training=is_training,
+        examples_per_seq=examples_per_seq,
+        num_parallel_calls=num_parallel_calls,
+        sequence_prefetch_size=sequence_prefetch_size,
+        batch_prefetch_size=batch_prefetch_size)
+  def forward(self, images_concat, is_training, reuse=False):
+    """See base class."""
+    embedder_strategy = self._config.embedder_strategy
+    loss_strategy = self._config.loss_strategy
+    l2_normalize_embedding = self._config[loss_strategy].embedding_l2
+    embedder = model_module.get_embedder(
+        embedder_strategy,
+        self._config,
+        images_concat,
+        is_training=is_training,
+        l2_normalize_embedding=l2_normalize_embedding, reuse=reuse)
+    embeddings_concat = embedder.construct_embedding()
+    variables_to_train = embedder.get_trainable_variables()
+    self.variables_to_train = variables_to_train
+    self.pretrained_init_fn = embedder.init_fn
+    return embeddings_concat
+  def _collect_image_summaries(self, anchor_images, positive_images,
+                               images_concat):
+    image_summaries = self._config.logging.summary.image_summaries
+    if image_summaries and not self._config.use_tpu:
+      batch_pairs_summary = tf.concat(
+          [anchor_images, positive_images], axis=2)
+      tf.summary.image('training/mvtcn_pairs', batch_pairs_summary)
+      tf.summary.image('training/images_preprocessed_concat', images_concat)
+class MVTCNTripletEstimator(MVTCNEstimator):
+  """Multi-View TCN with semihard triplet loss."""
+  def __init__(self, config, logdir):
+    super(MVTCNTripletEstimator, self).__init__(config, logdir)
+  def construct_input_fn(self, records, is_training):
+    """See base class."""
+    def input_fn(params):
+      """Provides input to MVTCN models."""
+      if is_training and self._config.use_tpu:
+        batch_size = params['batch_size']
+      else:
+        batch_size = self._batch_size
+      (images_concat,
+       anchor_labels,
+       positive_labels,
+       anchor_images,
+       positive_images) = self._pairs_provider(
+           records, is_training)(batch_size=batch_size)
+      if is_training:
+        self._collect_image_summaries(anchor_images, positive_images,
+                                      images_concat)
+      labels = tf.concat([anchor_labels, positive_labels], axis=0)
+      features = {'batch_preprocessed': images_concat}
+      return (features, labels)
+    return input_fn
+  def define_loss(self, embeddings, labels, is_training):
+    """See base class."""
+    margin = self._config.triplet_semihard.margin
+    loss = tf.contrib.losses.metric_learning.triplet_semihard_loss(
+        labels=labels, embeddings=embeddings, margin=margin)
+    self._loss = loss
+    if is_training and not self._config.use_tpu:
+      tf.summary.scalar('training/triplet_semihard', loss)
+    return loss
+  def define_eval_metric_ops(self):
+    """See base class."""
+    return {'validation/triplet_semihard': tf.metrics.mean(self._loss)}
+class MVTCNNpairsEstimator(MVTCNEstimator):
+  """Multi-View TCN with npairs loss."""
+  def __init__(self, config, logdir):
+    super(MVTCNNpairsEstimator, self).__init__(config, logdir)
+  def construct_input_fn(self, records, is_training):
+    """See base class."""
+    def input_fn(params):
+      """Provides input to MVTCN models."""
+      if is_training and self._config.use_tpu:
+        batch_size = params['batch_size']
+      else:
+        batch_size = self._batch_size
+      (images_concat,
+       npairs_labels,
+       _,
+       anchor_images,
+       positive_images) = self._pairs_provider(
+           records, is_training)(batch_size=batch_size)
+      if is_training:
+        self._collect_image_summaries(anchor_images, positive_images,
+                                      images_concat)
+      features = {'batch_preprocessed': images_concat}
+      return (features, npairs_labels)
+    return input_fn
+  def define_loss(self, embeddings, labels, is_training):
+    """See base class."""
+    embeddings_anchor, embeddings_positive = tf.split(embeddings, 2, axis=0)
+    loss = tf.contrib.losses.metric_learning.npairs_loss(
+        labels=labels, embeddings_anchor=embeddings_anchor,
+        embeddings_positive=embeddings_positive)
+    self._loss = loss
+    if is_training and not self._config.use_tpu:
+      tf.summary.scalar('training/npairs', loss)
+    return loss
+  def define_eval_metric_ops(self):
+    """See base class."""
+    return {'validation/npairs': tf.metrics.mean(self._loss)}
--- a/research/tcn/estimators/svtcn_estimator.py
+++ b/research/tcn/estimators/svtcn_estimator.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""SVTCN estimator implementation."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import data_providers
+import model as model_module
+from estimators import base_estimator
+from estimators import svtcn_loss
+import tensorflow as tf
+class SVTCNEstimator(base_estimator.BaseEstimator):
+  """Single-view TCN Estimator base class."""
+  def __init__(self, config, logdir):
+    super(SVTCNEstimator, self).__init__(config, logdir)
+  def construct_input_fn(self, records, is_training):
+    """See base class."""
+    config = self._config
+    num_views = config.data.num_views
+    num_parallel_calls = config.data.num_parallel_calls
+    sequence_prefetch_size = config.data.sequence_prefetch_size
+    batch_prefetch_size = config.data.batch_prefetch_size
+    def input_fn():
+      """Provides input to SVTCN models."""
+      (images_preprocessed,
+       images_raw,
+       timesteps) = data_providers.singleview_tcn_provider(
+           file_list=records,
+           preprocess_fn=self.preprocess_data,
+           num_views=num_views,
+           is_training=is_training,
+           batch_size=self._batch_size,
+           num_parallel_calls=num_parallel_calls,
+           sequence_prefetch_size=sequence_prefetch_size,
+           batch_prefetch_size=batch_prefetch_size)
+      if config.logging.summary.image_summaries and is_training:
+        tf.summary.image('training/svtcn_images', images_raw)
+      features = {'batch_preprocessed': images_preprocessed}
+      return (features, timesteps)
+    return input_fn
+  def forward(self, images, is_training, reuse=False):
+    """See base class."""
+    embedder_strategy = self._config.embedder_strategy
+    embedder = model_module.get_embedder(
+        embedder_strategy,
+        self._config,
+        images,
+        is_training=is_training, reuse=reuse)
+    embeddings = embedder.construct_embedding()
+    if is_training:
+      self.variables_to_train = embedder.get_trainable_variables()
+      self.pretrained_init_fn = embedder.init_fn
+    return embeddings
+class SVTCNTripletEstimator(SVTCNEstimator):
+  """Single-View TCN with semihard triplet loss."""
+  def __init__(self, config, logdir):
+    super(SVTCNTripletEstimator, self).__init__(config, logdir)
+  def define_loss(self, embeddings, timesteps, is_training):
+    """See base class."""
+    pos_radius = self._config.svtcn.pos_radius
+    neg_radius = self._config.svtcn.neg_radius
+    margin = self._config.triplet_semihard.margin
+    loss = svtcn_loss.singleview_tcn_loss(
+        embeddings, timesteps, pos_radius, neg_radius, margin=margin)
+    self._loss = loss
+    if is_training:
+      tf.summary.scalar('training/svtcn_loss', loss)
+    return loss
+  def define_eval_metric_ops(self):
+    """See base class."""
+    return {'validation/svtcn_loss': tf.metrics.mean(self._loss)}
--- a/research/tcn/estimators/svtcn_loss.py
+++ b/research/tcn/estimators/svtcn_loss.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""This implements single view TCN triplet loss."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow as tf
+def pairwise_squared_distance(feature):
+  """Computes the squared pairwise distance matrix.
+  output[i, j] = || feature[i, :] - feature[j, :] ||_2^2
+  Args:
+    feature: 2-D Tensor of size [number of data, feature dimension]
+  Returns:
+    pairwise_squared_distances: 2-D Tensor of size
+      [number of data, number of data]
+  """
+  pairwise_squared_distances = tf.add(
+      tf.reduce_sum(
+          tf.square(feature), axis=1, keep_dims=True),
+      tf.reduce_sum(
+          tf.square(tf.transpose(feature)), axis=0,
+          keep_dims=True)) - 2.0 * tf.matmul(feature, tf.transpose(feature))
+  # Deal with numerical inaccuracies. Set small negatives to zero.
+  pairwise_squared_distances = tf.maximum(pairwise_squared_distances, 0.0)
+  return pairwise_squared_distances
+def masked_maximum(data, mask, dim=1):
+  """Computes the axis wise maximum over chosen elements.
+  Args:
+    data: N-D Tensor.
+    mask: N-D Tensor of zeros or ones.
+    dim: The dimension over which to compute the maximum.
+  Returns:
+    masked_maximums: N-D Tensor.
+      The maximized dimension is of size 1 after the operation.
+  """
+  axis_minimums = tf.reduce_min(data, dim, keep_dims=True)
+  masked_maximums = tf.reduce_max(
+      tf.multiply(
+          data - axis_minimums, mask), dim, keep_dims=True) + axis_minimums
+  return masked_maximums
+def masked_minimum(data, mask, dim=1):
+  """Computes the axis wise minimum over chosen elements.
+  Args:
+    data: 2-D Tensor of size [n, m].
+    mask: 2-D Boolean Tensor of size [n, m].
+    dim: The dimension over which to compute the minimum.
+  Returns:
+    masked_minimums: N-D Tensor.
+      The minimized dimension is of size 1 after the operation.
+  """
+  axis_maximums = tf.reduce_max(data, dim, keep_dims=True)
+  masked_minimums = tf.reduce_min(
+      tf.multiply(
+          data - axis_maximums, mask), dim, keep_dims=True) + axis_maximums
+  return masked_minimums
+def singleview_tcn_loss(
+    embeddings, timesteps, pos_radius, neg_radius, margin=1.0,
+    sequence_ids=None, multiseq=False):
+  """Computes the single view triplet loss with semi-hard negative mining.
+  The loss encourages the positive distances (between a pair of embeddings with
+  the same labels) to be smaller than the minimum negative distance among
+  which are at least greater than the positive distance plus the margin constant
+  (called semi-hard negative) in the mini-batch. If no such negative exists,
+  uses the largest negative distance instead.
+  Anchor, positive, negative selection is as follow:
+  Anchors: We consider every embedding timestep as an anchor.
+  Positives: pos_radius defines a radius (in timesteps) around each anchor from
+    which positives can be drawn. E.g. An anchor with t=10 and a pos_radius of
+    2 produces a set of 4 (anchor,pos) pairs [(a=10, p=8), ... (a=10, p=12)].
+  Negatives: neg_radius defines a boundary (in timesteps) around each anchor,
+    outside of which negatives can be drawn. E.g. An anchor with t=10 and a
+    neg_radius of 4 means negatives can be any t_neg where t_neg < 6 and
+    t_neg > 14.
+  Args:
+    embeddings: 2-D Tensor of embedding vectors.
+    timesteps: 1-D Tensor with shape [batch_size, 1] of sequence timesteps.
+    pos_radius: int32; the size of the window (in timesteps) around each anchor
+      timestep that a positive can be drawn from.
+    neg_radius: int32; the size of the window (in timesteps) around each anchor
+      timestep that defines a negative boundary. Negatives can only be chosen
+      where negative timestep t is < negative boundary min or > negative
+      boundary max.
+    margin: Float; the triplet loss margin hyperparameter.
+    sequence_ids: (Optional) 1-D Tensor with shape [batch_size, 1] of sequence
+      ids. Together (sequence_id, sequence_timestep) give us a unique index for
+      each image if we have multiple sequences in a batch.
+    multiseq: Boolean, whether or not the batch is composed of multiple
+      sequences (with possibly colliding timesteps).
+  Returns:
+    triplet_loss: tf.float32 scalar.
+  """
+  assert neg_radius > pos_radius
+  # If timesteps shape isn't [batchsize, 1], reshape to [batch_size, 1].
+  tshape = tf.shape(timesteps)
+  assert tshape.shape == 2 or tshape.shape == 1
+  if tshape.shape == 1:
+    timesteps = tf.reshape(timesteps, [tshape[0], 1])
+  # Build pairwise squared distance matrix.
+  pdist_matrix = pairwise_squared_distance(embeddings)
+  # Build pairwise binary adjacency matrix, where adjacency[i,j] is True
+  # if timestep j is inside the positive range for timestep i and both
+  # timesteps come from the same sequence.
+  pos_radius = tf.cast(pos_radius, tf.int32)
+  if multiseq:
+    # If sequence_ids shape isn't [batchsize, 1], reshape to [batch_size, 1].
+    tshape = tf.shape(sequence_ids)
+    assert tshape.shape == 2 or tshape.shape == 1
+    if tshape.shape == 1:
+      sequence_ids = tf.reshape(sequence_ids, [tshape[0], 1])
+    # Build pairwise binary adjacency matrix based on sequence_ids
+    sequence_adjacency = tf.equal(sequence_ids, tf.transpose(sequence_ids))
+    # Invert so we can select negatives only.
+    sequence_adjacency_not = tf.logical_not(sequence_adjacency)
+    in_pos_range = tf.logical_and(
+        tf.less_equal(
+            tf.abs(timesteps - tf.transpose(timesteps)), pos_radius),
+        sequence_adjacency)
+    # Build pairwise binary discordance matrix, where discordance[i,j] is True
+    # if timestep j is inside the negative range for timestep i or if the
+    # timesteps come from different sequences.
+    in_neg_range = tf.logical_or(
+        tf.greater(tf.abs(timesteps - tf.transpose(timesteps)), neg_radius),
+        sequence_adjacency_not
+    )
+  else:
+    in_pos_range = tf.less_equal(
+        tf.abs(timesteps - tf.transpose(timesteps)), pos_radius)
+    in_neg_range = tf.greater(tf.abs(timesteps - tf.transpose(timesteps)),
+                              neg_radius)
+  batch_size = tf.size(timesteps)
+  # compute the mask
+  pdist_matrix_tile = tf.tile(pdist_matrix, [batch_size, 1])
+  mask = tf.logical_and(
+      tf.tile(in_neg_range, [batch_size, 1]),
+      tf.greater(pdist_matrix_tile,
+                 tf.reshape(tf.transpose(pdist_matrix), [-1, 1])))
+  mask_final = tf.reshape(
+      tf.greater(
+          tf.reduce_sum(
+              tf.cast(
+                  mask, dtype=tf.float32), 1, keep_dims=True),
+          0.0), [batch_size, batch_size])
+  mask_final = tf.transpose(mask_final)
+  in_neg_range = tf.cast(in_neg_range, dtype=tf.float32)
+  mask = tf.cast(mask, dtype=tf.float32)
+  # negatives_outside: smallest D_an where D_an > D_ap
+  negatives_outside = tf.reshape(
+      masked_minimum(pdist_matrix_tile, mask), [batch_size, batch_size])
+  negatives_outside = tf.transpose(negatives_outside)
+  # negatives_inside: largest D_an
+  negatives_inside = tf.tile(
+      masked_maximum(pdist_matrix, in_neg_range), [1, batch_size])
+  semi_hard_negatives = tf.where(
+      mask_final, negatives_outside, negatives_inside)
+  loss_mat = tf.add(margin, pdist_matrix - semi_hard_negatives)
+  mask_positives = tf.cast(
+      in_pos_range, dtype=tf.float32) - tf.diag(tf.ones([batch_size]))
+  # In lifted-struct, the authors multiply 0.5 for upper triangular
+  #   in semihard, they take all positive pairs except the diagonal.
+  num_positives = tf.reduce_sum(mask_positives)
+  triplet_loss = tf.truediv(
+      tf.reduce_sum(tf.maximum(tf.multiply(loss_mat, mask_positives), 0.0)),
+      num_positives,
+      name='triplet_svtcn_loss')
+  return triplet_loss
--- a/research/tcn/estimators/svtcn_loss_test.py
+++ b/research/tcn/estimators/svtcn_loss_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for svtcn_loss.py."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from sklearn.metrics.pairwise import euclidean_distances
+from estimators import svtcn_loss
+import tensorflow as tf
+class SVTCNLoss(tf.test.TestCase):
+  def testSVTCNLoss(self):
+    with self.test_session():
+      num_data = 64
+      num_sequences = 2
+      num_data_per_seq = num_data // num_sequences
+      feat_dim = 6
+      margin = 1.0
+      times = np.tile(np.arange(num_data_per_seq, dtype=np.int32),
+                      num_sequences)
+      times = np.reshape(times, [times.shape[0], 1])
+      sequence_ids = np.concatenate(
+          [np.ones(num_data_per_seq)*i for i in range(num_sequences)])
+      sequence_ids = np.reshape(sequence_ids, [sequence_ids.shape[0], 1])
+      pos_radius = 6
+      neg_radius = 12
+      embedding = np.random.rand(num_data, feat_dim).astype(np.float32)
+      # Compute the loss in NP
+      # Get a positive mask, i.e. indices for each time index
+      # that are inside the positive range.
+      in_pos_range = np.less_equal(
+          np.abs(times - times.transpose()), pos_radius)
+      # Get a negative mask, i.e. indices for each time index
+      # that are inside the negative range (> t + (neg_mult * pos_radius)
+      # and < t - (neg_mult * pos_radius).
+      in_neg_range = np.greater(np.abs(times - times.transpose()), neg_radius)
+      sequence_adjacency = sequence_ids == sequence_ids.T
+      sequence_adjacency_not = np.logical_not(sequence_adjacency)
+      pdist_matrix = euclidean_distances(embedding, squared=True)
+      loss_np = 0.0
+      num_positives = 0.0
+      for i in range(num_data):
+        for j in range(num_data):
+          if in_pos_range[i, j] and i != j and sequence_adjacency[i, j]:
+            num_positives += 1.0
+            pos_distance = pdist_matrix[i][j]
+            neg_distances = []
+            for k in range(num_data):
+              if in_neg_range[i, k] or sequence_adjacency_not[i, k]:
+                neg_distances.append(pdist_matrix[i][k])
+            neg_distances.sort()  # sort by distance
+            chosen_neg_distance = neg_distances[0]
+            for l in range(len(neg_distances)):
+              chosen_neg_distance = neg_distances[l]
+              if chosen_neg_distance > pos_distance:
+                break
+            loss_np += np.maximum(
+                0.0, margin - chosen_neg_distance + pos_distance)
+      loss_np /= num_positives
+      # Compute the loss in TF
+      loss_tf = svtcn_loss.singleview_tcn_loss(
+          embeddings=tf.convert_to_tensor(embedding),
+          timesteps=tf.convert_to_tensor(times),
+          pos_radius=pos_radius,
+          neg_radius=neg_radius,
+          margin=margin,
+          sequence_ids=tf.convert_to_tensor(sequence_ids),
+          multiseq=True
+      )
+      loss_tf = loss_tf.eval()
+      self.assertAllClose(loss_np, loss_tf)
+if __name__ == '__main__':
+  tf.test.main()
--- a/research/tcn/eval.py
+++ b/research/tcn/eval.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Calculates running validation of TCN models (and baseline comparisons)."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import time
+from estimators.get_estimator import get_estimator
+from utils import util
+import tensorflow as tf
+tf.logging.set_verbosity(tf.logging.INFO)
+tf.flags.DEFINE_string(
+    'config_paths', '',
+    """
+    Path to a YAML configuration files defining FLAG values. Multiple files
+    can be separated by the `#` symbol. Files are merged recursively. Setting
+    a key in these files is equivalent to setting the FLAG value with
+    the same name.
+    """)
+tf.flags.DEFINE_string(
+    'model_params', '{}', 'YAML configuration string for the model parameters.')
+tf.app.flags.DEFINE_string('master', 'local',
+                           'BNS name of the TensorFlow master to use')
+tf.app.flags.DEFINE_string(
+    'logdir', '/tmp/tcn', 'Directory where to write event logs.')
+FLAGS = tf.app.flags.FLAGS
+def main(_):
+  """Runs main eval loop."""
+  # Parse config dict from yaml config files / command line flags.
+  logdir = FLAGS.logdir
+  config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
+  # Choose an estimator based on training strategy.
+  estimator = get_estimator(config, logdir)
+  # Wait for the first checkpoint file to be written.
+  while not tf.train.latest_checkpoint(logdir):
+    tf.logging.info('Waiting for a checkpoint file...')
+    time.sleep(10)
+  # Run validation.
+  while True:
+    estimator.evaluate()
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/tcn/g3doc/alignment.png
+++ b/research/tcn/g3doc/alignment.png
--- a/research/tcn/g3doc/all_error.png
+++ b/research/tcn/g3doc/all_error.png
--- a/research/tcn/g3doc/avg_error.png
+++ b/research/tcn/g3doc/avg_error.png
--- a/research/tcn/g3doc/im.gif
+++ b/research/tcn/g3doc/im.gif
--- a/research/tcn/g3doc/loss.png
+++ b/research/tcn/g3doc/loss.png
--- a/research/tcn/g3doc/pca.png
+++ b/research/tcn/g3doc/pca.png
--- a/research/tcn/g3doc/val_loss.png
+++ b/research/tcn/g3doc/val_loss.png
--- a/research/tcn/generate_videos.py
+++ b/research/tcn/generate_videos.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Generates imitation videos.
+Generate single pairwise imitation videos:
+blaze build -c opt --config=cuda --copt=-mavx \
+learning/brain/research/tcn/generate_videos && \
+blaze-bin/learning/brain/research/tcn/generate_videos \
+--logtostderr \
+--config_paths $config_paths \
+--checkpointdir $checkpointdir \
+--checkpoint_iter $checkpoint_iter \
+--query_records_dir $query_records_dir \
+--target_records_dir $target_records_dir \
+--outdir $outdir \
+--mode single \
+--num_query_sequences 1 \
+--num_target_sequences -1
+# Generate imitation videos with multiple sequences in the target set:
+query_records_path
+blaze build -c opt --config=cuda --copt=-mavx \
+learning/brain/research/tcn/generate_videos && \
+blaze-bin/learning/brain/research/tcn/generate_videos \
+--logtostderr \
+--config_paths $config_paths \
+--checkpointdir $checkpointdir \
+--checkpoint_iter $checkpoint_iter \
+--query_records_dir $query_records_dir \
+--target_records_dir $target_records_dir \
+--outdir $outdir \
+--num_multi_targets 1 \
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import tensorflow as tf
+import os
+import matplotlib
+matplotlib.use("pdf")
+import matplotlib.animation as animation
+import matplotlib.pyplot as plt
+import numpy as np
+from estimators.get_estimator import get_estimator
+from utils import util
+tf.logging.set_verbosity(tf.logging.INFO)
+tf.flags.DEFINE_string(
+    'config_paths', '',
+    """
+    Path to a YAML configuration files defining FLAG values. Multiple files
+    can be separated by the `#` symbol. Files are merged recursively. Setting
+    a key in these files is equivalent to setting the FLAG value with
+    the same name.
+    """)
+tf.flags.DEFINE_string(
+    'model_params', '{}', 'YAML configuration string for the model parameters.')
+tf.app.flags.DEFINE_string(
+    'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.')
+tf.app.flags.DEFINE_string(
+    'checkpoint_iter', '', 'Checkpoint iter to use.')
+tf.app.flags.DEFINE_integer(
+    'num_multi_targets', -1,
+    'Number of imitation vids in the target set per imitation video.')
+tf.app.flags.DEFINE_string(
+    'outdir', '/tmp/tcn', 'Path to write embeddings to.')
+tf.app.flags.DEFINE_string(
+    'mode', 'single', 'single | multi. Single means generate imitation vids'
+                      'where query is being imitated by single sequence. Multi'
+                      'means generate imitation vids where query is being'
+                      'imitated by multiple.')
+tf.app.flags.DEFINE_string('query_records_dir', '',
+                           'Directory of image tfrecords.')
+tf.app.flags.DEFINE_string('target_records_dir', '',
+                           'Directory of image tfrecords.')
+tf.app.flags.DEFINE_integer('query_view', 1,
+                            'Viewpoint of the query video.')
+tf.app.flags.DEFINE_integer('target_view', 0,
+                            'Viewpoint of the imitation video.')
+tf.app.flags.DEFINE_integer('smoothing_window', 5,
+                            'Number of frames to smooth over.')
+tf.app.flags.DEFINE_integer('num_query_sequences', -1,
+                            'Number of query sequences to embed.')
+tf.app.flags.DEFINE_integer('num_target_sequences', -1,
+                            'Number of target sequences to embed.')
+FLAGS = tf.app.flags.FLAGS
+def SmoothEmbeddings(embs):
+  """Temporally smoothes a sequence of embeddings."""
+  new_embs = []
+  window = int(FLAGS.smoothing_window)
+  for i in range(len(embs)):
+    min_i = max(i-window, 0)
+    max_i = min(i+window, len(embs))
+    new_embs.append(np.mean(embs[min_i:max_i, :], axis=0))
+  return np.array(new_embs)
+def MakeImitationVideo(
+    outdir, vidname, query_im_strs, knn_im_strs, height=640, width=360):
+  """Creates a KNN imitation video.
+  For each frame in vid0, pair with the frame at index in knn_indices in
+  vids1. Write video to disk.
+  Args:
+    outdir: String, directory to write videos.
+    vidname: String, name of video.
+    query_im_strs: Numpy array holding query image strings.
+    knn_im_strs: Numpy array holding knn image strings.
+    height: Int, height of raw images.
+    width: Int, width of raw images.
+  """
+  if not tf.gfile.Exists(outdir):
+    tf.gfile.MakeDirs(outdir)
+  vid_path = os.path.join(outdir, vidname)
+  combined = zip(query_im_strs, knn_im_strs)
+  # Create and write the video.
+  fig = plt.figure()
+  ax = fig.add_subplot(111)
+  ax.set_aspect('equal')
+  ax.get_xaxis().set_visible(False)
+  ax.get_yaxis().set_visible(False)
+  im = ax.imshow(
+      np.zeros((height, width*2, 3)), cmap='gray', interpolation='nearest')
+  im.set_clim([0, 1])
+  plt.tight_layout(pad=0, w_pad=0, h_pad=0)
+  # pylint: disable=invalid-name
+  def update_img(pair):
+    """Decode pairs of image strings, update a video."""
+    im_i, im_j = pair
+    nparr_i = np.fromstring(str(im_i), np.uint8)
+    img_np_i = cv2.imdecode(nparr_i, 1)
+    img_np_i = img_np_i[..., [2, 1, 0]]
+    nparr_j = np.fromstring(str(im_j), np.uint8)
+    img_np_j = cv2.imdecode(nparr_j, 1)
+    img_np_j = img_np_j[..., [2, 1, 0]]
+    # Optionally reshape the images to be same size.
+    frame = np.concatenate([img_np_i, img_np_j], axis=1)
+    im.set_data(frame)
+    return im
+  ani = animation.FuncAnimation(fig, update_img, combined, interval=15)
+  writer = animation.writers['ffmpeg'](fps=15)
+  dpi = 100
+  tf.logging.info('Writing video to:\n %s \n' % vid_path)
+  ani.save('%s.mp4' % vid_path, writer=writer, dpi=dpi)
+def GenerateImitationVideo(
+    vid_name, query_ims, query_embs, target_ims, target_embs, height, width):
+  """Generates a single cross-sequence imitation video.
+  For each frame in some query sequence, find the nearest neighbor from
+  some target sequence in embedding space.
+  Args:
+    vid_name: String, the name of the video.
+    query_ims: Numpy array of shape [query sequence length, height, width, 3].
+    query_embs: Numpy array of shape [query sequence length, embedding size].
+    target_ims: Numpy array of shape [target sequence length, height, width,
+      3].
+    target_embs: Numpy array of shape [target sequence length, embedding
+      size].
+    height: Int, height of the raw image.
+    width: Int, width of the raw image.
+  """
+  # For each query frame, find the index of the nearest neighbor in the
+  # target video.
+  knn_indices = [util.KNNIds(q, target_embs, k=1)[0] for q in query_embs]
+  # Create and write out the video.
+  assert knn_indices
+  knn_ims = np.array([target_ims[k] for k in knn_indices])
+  MakeImitationVideo(FLAGS.outdir, vid_name, query_ims, knn_ims, height, width)
+def SingleImitationVideos(
+    query_records, target_records, config, height, width):
+  """Generates pairwise imitation videos.
+  This creates all pairs of target imitating query videos, where each frame
+  on the left is matched to a nearest neighbor coming a single
+  embedded target video.
+  Args:
+    query_records: List of Strings, paths to tfrecord datasets to use as
+      queries.
+    target_records: List of Strings, paths to tfrecord datasets to use as
+      targets.
+    config: A T object describing training config.
+    height: Int, height of the raw image.
+    width: Int, width of the raw image.
+  """
+  # Embed query and target data.
+  (query_sequences_to_data,
+   target_sequences_to_data) = EmbedQueryTargetData(
+       query_records, target_records, config)
+  qview = FLAGS.query_view
+  tview = FLAGS.target_view
+  # Loop over query videos.
+  for task_i, data_i in query_sequences_to_data.iteritems():
+    for task_j, data_j in target_sequences_to_data.iteritems():
+      i_ims = data_i['images']
+      i_embs = data_i['embeddings']
+      query_embs = SmoothEmbeddings(i_embs[qview])
+      query_ims = i_ims[qview]
+      j_ims = data_j['images']
+      j_embs = data_j['embeddings']
+      target_embs = SmoothEmbeddings(j_embs[tview])
+      target_ims = j_ims[tview]
+      tf.logging.info('Generating %s imitating %s video.' % (task_j, task_i))
+      vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_j, tview)
+      vid_name = vid_name.replace('/', '_')
+      GenerateImitationVideo(vid_name, query_ims, query_embs,
+                             target_ims, target_embs, height, width)
+def MultiImitationVideos(
+    query_records, target_records, config, height, width):
+  """Creates multi-imitation videos.
+  This creates videos where every frame on the left is matched to a nearest
+  neighbor coming from a set of multiple embedded target videos.
+  Args:
+    query_records: List of Strings, paths to tfrecord datasets to use as
+      queries.
+    target_records: List of Strings, paths to tfrecord datasets to use as
+      targets.
+    config: A T object describing training config.
+    height: Int, height of the raw image.
+    width: Int, width of the raw image.
+  """
+  # Embed query and target data.
+  (query_sequences_to_data,
+   target_sequences_to_data) = EmbedQueryTargetData(
+       query_records, target_records, config)
+  qview = FLAGS.query_view
+  tview = FLAGS.target_view
+  # Loop over query videos.
+  for task_i, data_i in query_sequences_to_data.iteritems():
+    i_ims = data_i['images']
+    i_embs = data_i['embeddings']
+    query_embs = SmoothEmbeddings(i_embs[qview])
+    query_ims = i_ims[qview]
+    all_target_embs = []
+    all_target_ims = []
+    # If num_imitation_vids is -1, add all seq embeddings to the target set.
+    if FLAGS.num_multi_targets == -1:
+      num_multi_targets = len(target_sequences_to_data)
+    else:
+      # Else, add some specified number of seq embeddings to the target set.
+      num_multi_targets = FLAGS.num_multi_targets
+    for j in range(num_multi_targets):
+      task_j = target_sequences_to_data.keys()[j]
+      data_j = target_sequences_to_data[task_j]
+      print('Adding %s to target set' % task_j)
+      j_ims = data_j['images']
+      j_embs = data_j['embeddings']
+      target_embs = SmoothEmbeddings(j_embs[tview])
+      target_ims = j_ims[tview]
+      all_target_embs.extend(target_embs)
+      all_target_ims.extend(target_ims)
+    # Generate a "j imitating i" video.
+    tf.logging.info('Generating all imitating %s video.' % task_i)
+    vid_name = 'q%sv%s_multiv%s' % (task_i, qview, tview)
+    vid_name = vid_name.replace('/', '_')
+    GenerateImitationVideo(vid_name, query_ims, query_embs,
+                           all_target_ims, all_target_embs, height, width)
+def SameSequenceVideos(query_records, config, height, width):
+  """Generate same sequence, cross-view imitation videos."""
+  batch_size = config.data.embed_batch_size
+  # Choose an estimator based on training strategy.
+  estimator = get_estimator(config, FLAGS.checkpointdir)
+  # Choose a checkpoint path to restore.
+  checkpointdir = FLAGS.checkpointdir
+  checkpoint_path = os.path.join(checkpointdir,
+                                 'model.ckpt-%s' % FLAGS.checkpoint_iter)
+  # Embed num_sequences query sequences, store embeddings and image strings in
+  # query_sequences_to_data.
+  sequences_to_data = {}
+  for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
+      query_records, checkpoint_path, batch_size,
+      num_sequences=FLAGS.num_query_sequences):
+    sequences_to_data[seqname] = {
+        'embeddings': view_embeddings,
+        'images': view_raw_image_strings,
+    }
+  # Loop over query videos.
+  qview = FLAGS.query_view
+  tview = FLAGS.target_view
+  for task_i, data_i in sequences_to_data.iteritems():
+    ims = data_i['images']
+    embs = data_i['embeddings']
+    query_embs = SmoothEmbeddings(embs[qview])
+    query_ims = ims[qview]
+    target_embs = SmoothEmbeddings(embs[tview])
+    target_ims = ims[tview]
+    tf.logging.info('Generating %s imitating %s video.' % (task_i, task_i))
+    vid_name = 'q%sv%s_im%sv%s' % (task_i, qview, task_i, tview)
+    vid_name = vid_name.replace('/', '_')
+    GenerateImitationVideo(vid_name, query_ims, query_embs,
+                           target_ims, target_embs, height, width)
+def EmbedQueryTargetData(query_records, target_records, config):
+  """Embeds the full set of query_records and target_records.
+  Args:
+    query_records: List of Strings, paths to tfrecord datasets to use as
+      queries.
+    target_records: List of Strings, paths to tfrecord datasets to use as
+      targets.
+    config: A T object describing training config.
+  Returns:
+    query_sequences_to_data: A dict holding 'embeddings' and 'images'
+    target_sequences_to_data: A dict holding 'embeddings' and 'images'
+  """
+  batch_size = config.data.embed_batch_size
+  # Choose an estimator based on training strategy.
+  estimator = get_estimator(config, FLAGS.checkpointdir)
+  # Choose a checkpoint path to restore.
+  checkpointdir = FLAGS.checkpointdir
+  checkpoint_path = os.path.join(checkpointdir,
+                                 'model.ckpt-%s' % FLAGS.checkpoint_iter)
+  # Embed num_sequences query sequences, store embeddings and image strings in
+  # query_sequences_to_data.
+  num_query_sequences = FLAGS.num_query_sequences
+  num_target_sequences = FLAGS.num_target_sequences
+  query_sequences_to_data = {}
+  for (view_embeddings, view_raw_image_strings, seqname) in estimator.inference(
+      query_records, checkpoint_path, batch_size,
+      num_sequences=num_query_sequences):
+    query_sequences_to_data[seqname] = {
+        'embeddings': view_embeddings,
+        'images': view_raw_image_strings,
+    }
+  if (query_records == target_records) and (
+      num_query_sequences == num_target_sequences):
+    target_sequences_to_data = query_sequences_to_data
+  else:
+    # Embed num_sequences target sequences, store embeddings and image strings
+    # in sequences_to_data.
+    target_sequences_to_data = {}
+    for (view_embeddings, view_raw_image_strings,
+         seqname) in estimator.inference(
+             target_records, checkpoint_path, batch_size,
+             num_sequences=num_target_sequences):
+      target_sequences_to_data[seqname] = {
+          'embeddings': view_embeddings,
+          'images': view_raw_image_strings,
+      }
+  return query_sequences_to_data, target_sequences_to_data
+def main(_):
+  # Parse config dict from yaml config files / command line flags.
+  config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
+  # Get tables to embed.
+  query_records_dir = FLAGS.query_records_dir
+  query_records = util.GetFilesRecursively(query_records_dir)
+  target_records_dir = FLAGS.target_records_dir
+  target_records = util.GetFilesRecursively(target_records_dir)
+  height = config.data.raw_height
+  width = config.data.raw_width
+  mode = FLAGS.mode
+  if mode == 'multi':
+    # Generate videos where target set is composed of multiple videos.
+    MultiImitationVideos(query_records, target_records, config,
+                         height, width)
+  elif mode == 'single':
+    # Generate videos where target set is a single video.
+    SingleImitationVideos(query_records, target_records, config,
+                          height, width)
+  elif mode == 'same':
+    # Generate videos where target set is the same as query, but diff view.
+    SameSequenceVideos(query_records, config, height, width)
+  else:
+    raise ValueError('Unknown mode %s' % mode)
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/tcn/labeled_eval.py
+++ b/research/tcn/labeled_eval.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Generates test Recall@K statistics on labeled classification problems."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from collections import defaultdict
+import os
+import numpy as np
+from sklearn.metrics.pairwise import pairwise_distances
+from six.moves import xrange
+import data_providers
+from estimators.get_estimator import get_estimator
+from utils import util
+import tensorflow as tf
+tf.logging.set_verbosity(tf.logging.INFO)
+tf.flags.DEFINE_string(
+    'config_paths', '',
+    """
+    Path to a YAML configuration files defining FLAG values. Multiple files
+    can be separated by the `#` symbol. Files are merged recursively. Setting
+    a key in these files is equivalent to setting the FLAG value with
+    the same name.
+    """)
+tf.flags.DEFINE_string(
+    'model_params', '{}', 'YAML configuration string for the model parameters.')
+tf.app.flags.DEFINE_string(
+    'mode', 'validation',
+    'Which dataset to evaluate: `validation` | `test`.')
+tf.app.flags.DEFINE_string('master', 'local',
+                           'BNS name of the TensorFlow master to use')
+tf.app.flags.DEFINE_string(
+    'checkpoint_iter', '', 'Evaluate this specific checkpoint.')
+tf.app.flags.DEFINE_string(
+    'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.')
+tf.app.flags.DEFINE_string('outdir', '/tmp/tcn', 'Path to write summaries to.')
+FLAGS = tf.app.flags.FLAGS
+def nearest_cross_sequence_neighbors(data, tasks, n_neighbors=1):
+  """Computes the n_neighbors nearest neighbors for every row in data.
+  Args:
+    data: A np.float32 array of shape [num_data, embedding size] holding
+      an embedded validation / test dataset.
+    tasks: A list of strings of size [num_data] holding the task or sequence
+      name that each row belongs to.
+    n_neighbors: The number of knn indices to return for each row.
+  Returns:
+    indices: an np.int32 array of size [num_data, n_neighbors] holding the
+      n_neighbors nearest indices for every row in data. These are
+      restricted to be from different named sequences (as defined in `tasks`).
+  """
+  # Compute the pairwise sequence adjacency matrix from `tasks`.
+  num_data = data.shape[0]
+  tasks = np.array(tasks)
+  tasks = np.reshape(tasks, (num_data, 1))
+  assert len(tasks.shape) == 2
+  not_adjacent = (tasks != tasks.T)
+  # Compute the symmetric pairwise distance matrix.
+  pdist = pairwise_distances(data, metric='sqeuclidean')
+  # For every row in the pairwise distance matrix, only consider
+  # cross-sequence columns.
+  indices = np.zeros((num_data, n_neighbors), dtype=np.int32)
+  for idx in range(num_data):
+    # Restrict to cross_sequence neighbors.
+    distances = [(
+        pdist[idx][i], i) for i in xrange(num_data) if not_adjacent[idx][i]]
+    _, nearest_indices = zip(*sorted(
+        distances, key=lambda x: x[0])[:n_neighbors])
+    indices[idx] = nearest_indices
+  return indices
+def compute_cross_sequence_recall_at_k(retrieved_labels, labels, k_list):
+  """Compute recall@k for a given list of k values.
+  Recall is one if an example of the same class is retrieved among the
+    top k nearest neighbors given a query example and zero otherwise.
+    Counting the recall for all examples and averaging the counts returns
+    recall@k score.
+  Args:
+    retrieved_labels: 2-D Numpy array of KNN labels for every embedding.
+    labels: 1-D Numpy array of shape [number of data].
+    k_list: List of k values to evaluate recall@k.
+  Returns:
+    recall_list: List of recall@k values.
+  """
+  kvalue_to_recall = dict(zip(k_list, np.zeros(len(k_list))))
+  # For each value of K.
+  for k in k_list:
+    matches = defaultdict(float)
+    counts = defaultdict(float)
+    # For each (row index, label value) in the query labels.
+    for i, label_value in enumerate(labels):
+      # Loop over the K nearest retrieved labels.
+      if label_value in retrieved_labels[i][:k]:
+        matches[label_value] += 1.
+      # Increment the denominator.
+      counts[label_value] += 1.
+    kvalue_to_recall[k] = np.mean(
+        [matches[l]/counts[l] for l in matches])
+  return [kvalue_to_recall[i] for i in k_list]
+def compute_cross_sequence_recalls_at_k(
+    embeddings, labels, label_attr_keys, tasks, k_list, summary_writer,
+    training_step):
+  """Computes and reports the recall@k for each classification problem.
+  This takes an embedding matrix and an array of multiclass labels
+  with size [num_data, number of classification problems], then
+  computes the average recall@k for each classification problem
+  as well as the average across problems.
+  Args:
+    embeddings: A np.float32 array of size [num_data, embedding_size]
+      representing the embedded validation or test dataset.
+    labels: A np.int32 array of size [num_data, num_classification_problems]
+      holding multiclass labels for each embedding for each problem.
+    label_attr_keys: List of strings, holds the names of the classification
+      problems.
+    tasks: A list of strings describing the video sequence each row
+      belongs to. This is used to restrict the recall@k computation
+      to cross-sequence examples.
+    k_list: A list of ints, the k values to evaluate recall@k.
+    summary_writer: A tf.summary.FileWriter.
+    training_step: Int, the current training step we're evaluating.
+  """
+  num_data = float(embeddings.shape[0])
+  assert labels.shape[0] == num_data
+  # Compute knn indices.
+  indices = nearest_cross_sequence_neighbors(
+      embeddings, tasks, n_neighbors=max(k_list))
+  retrieved_labels = labels[indices]
+  # Compute the recall@k for each classification problem.
+  recall_lists = []
+  for idx, label_attr in enumerate(label_attr_keys):
+    problem_labels = labels[:, idx]
+    # Take all indices, all k labels for the problem indexed by idx.
+    problem_retrieved = retrieved_labels[:, :, idx]
+    recall_list = compute_cross_sequence_recall_at_k(
+        retrieved_labels=problem_retrieved,
+        labels=problem_labels,
+        k_list=k_list)
+    recall_lists.append(recall_list)
+    for (k, recall) in zip(k_list, recall_list):
+      recall_error = 1-recall
+      summ = tf.Summary(value=[tf.Summary.Value(
+          tag='validation/classification/%s error@top%d' % (
+              label_attr, k),
+          simple_value=recall_error)])
+      print('%s recall@K=%d' % (label_attr, k), recall_error)
+      summary_writer.add_summary(summ, int(training_step))
+  # Report an average recall@k across problems.
+  recall_lists = np.array(recall_lists)
+  for i in range(recall_lists.shape[1]):
+    average_recall = np.mean(recall_lists[:, i])
+    recall_error = 1 - average_recall
+    summ = tf.Summary(value=[tf.Summary.Value(
+        tag='validation/classification/average error@top%d' % k_list[i],
+        simple_value=recall_error)])
+    print('Average recall@K=%d' % k_list[i], recall_error)
+    summary_writer.add_summary(summ, int(training_step))
+def evaluate_once(
+    estimator, input_fn_by_view, batch_size, checkpoint_path,
+    label_attr_keys, embedding_size, num_views, k_list):
+  """Compute the recall@k for a given checkpoint path.
+  Args:
+    estimator: an `Estimator` object to evaluate.
+    input_fn_by_view: An input_fn to an `Estimator's` predict method. Takes
+      a view index and returns a dict holding ops for getting raw images for
+      the view.
+    batch_size: Int, size of the labeled eval batch.
+    checkpoint_path: String, path to the specific checkpoint being evaluated.
+    label_attr_keys: A list of Strings, holding each attribute name.
+    embedding_size: Int, the size of the embedding.
+    num_views: Int, number of views in the dataset.
+    k_list: List of ints, list of K values to compute recall at K for.
+  """
+  feat_matrix = np.zeros((0, embedding_size))
+  label_vect = np.zeros((0, len(label_attr_keys)))
+  tasks = []
+  eval_tensor_keys = ['embeddings', 'tasks', 'classification_labels']
+  # Iterate all views in the dataset.
+  for view_index in range(num_views):
+    # Set up a graph for embedding entire dataset.
+    predictions = estimator.inference(
+        input_fn_by_view(view_index), checkpoint_path,
+        batch_size, predict_keys=eval_tensor_keys)
+    # Enumerate predictions.
+    for i, p in enumerate(predictions):
+      if i % 100 == 0:
+        tf.logging.info('Embedded %d images for view %d' % (i, view_index))
+      label = p['classification_labels']
+      task = p['tasks']
+      embedding = p['embeddings']
+      # Collect (embedding, label, task) data.
+      feat_matrix = np.append(feat_matrix, [embedding], axis=0)
+      label_vect = np.append(label_vect, [label], axis=0)
+      tasks.append(task)
+  # Compute recall statistics.
+  ckpt_step = int(checkpoint_path.split('-')[-1])
+  summary_dir = os.path.join(FLAGS.outdir, 'labeled_eval_summaries')
+  summary_writer = tf.summary.FileWriter(summary_dir)
+  compute_cross_sequence_recalls_at_k(
+      feat_matrix, label_vect, label_attr_keys, tasks, k_list,
+      summary_writer, ckpt_step)
+def get_labeled_tables(config):
+  """Gets either labeled test or validation tables, based on flags."""
+  # Get a list of filenames corresponding to labeled data.
+  mode = FLAGS.mode
+  if mode == 'validation':
+    labeled_tables = util.GetFilesRecursively(config.data.labeled.validation)
+  elif mode == 'test':
+    labeled_tables = util.GetFilesRecursively(config.data.labeled.test)
+  else:
+    raise ValueError('Unknown dataset: %s' % mode)
+  return labeled_tables
+def main(_):
+  """Runs main labeled eval loop."""
+  # Parse config dict from yaml config files / command line flags.
+  config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
+  # Choose an estimator based on training strategy.
+  checkpointdir = FLAGS.checkpointdir
+  estimator = get_estimator(config, checkpointdir)
+  # Get data configs.
+  image_attr_keys = config.data.labeled.image_attr_keys
+  label_attr_keys = config.data.labeled.label_attr_keys
+  embedding_size = config.embedding_size
+  num_views = config.data.num_views
+  k_list = config.val.recall_at_k_list
+  batch_size = config.data.batch_size
+  # Get either labeled validation or test tables.
+  labeled_tables = get_labeled_tables(config)
+  def input_fn_by_view(view_index):
+    """Returns an input_fn for use with a tf.Estimator by view."""
+    def input_fn():
+      # Get raw labeled images.
+      (preprocessed_images, labels,
+       tasks) = data_providers.labeled_data_provider(
+           labeled_tables,
+           estimator.preprocess_data, view_index, image_attr_keys,
+           label_attr_keys, batch_size=batch_size)
+      return {
+          'batch_preprocessed': preprocessed_images,
+          'tasks': tasks,
+          'classification_labels': labels,
+      }, None
+    return input_fn
+  # If evaluating a specific checkpoint, do that.
+  if FLAGS.checkpoint_iter:
+    checkpoint_path = os.path.join(
+        '%s/model.ckpt-%s' % (checkpointdir, FLAGS.checkpoint_iter))
+    evaluate_once(
+        estimator, input_fn_by_view, batch_size, checkpoint_path,
+        label_attr_keys, embedding_size, num_views, k_list)
+  else:
+    for checkpoint_path in tf.contrib.training.checkpoints_iterator(
+        checkpointdir):
+      evaluate_once(
+          estimator, input_fn_by_view, batch_size, checkpoint_path,
+          label_attr_keys, embedding_size, num_views, k_list)
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/tcn/labeled_eval_test.py
+++ b/research/tcn/labeled_eval_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tcn.labeled_eval."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import labeled_eval
+import tensorflow as tf
+class LabeledEvalTest(tf.test.TestCase):
+  def testNearestCrossSequenceNeighbors(self):
+    # Generate embeddings.
+    num_data = 64
+    embedding_size = 4
+    num_tasks = 8
+    n_neighbors = 2
+    data = np.random.randn(num_data, embedding_size)
+    tasks = np.repeat(range(num_tasks), num_data // num_tasks)
+    # Get nearest cross-sequence indices.
+    indices = labeled_eval.nearest_cross_sequence_neighbors(
+        data, tasks, n_neighbors=n_neighbors)
+    # Assert that no nearest neighbor indices come from the same task.
+    repeated_tasks = np.tile(np.reshape(tasks, (num_data, 1)), n_neighbors)
+    self.assertTrue(np.all(np.not_equal(repeated_tasks, tasks[indices])))
+  def testPerfectCrossSequenceRecall(self):
+    # Make sure cross-sequence recall@k returns 1.0 for near-duplicate features.
+    embeddings = np.random.randn(10, 2)
+    embeddings[5:, :] = 0.00001 + embeddings[:5, :]
+    tasks = np.repeat([0, 1], 5)
+    labels = np.array([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
+    # find k=1, k=2 nearest neighbors.
+    k_list = [1, 2]
+    # Compute knn indices.
+    indices = labeled_eval.nearest_cross_sequence_neighbors(
+        embeddings, tasks, n_neighbors=max(k_list))
+    retrieved_labels = labels[indices]
+    recall_list = labeled_eval.compute_cross_sequence_recall_at_k(
+        retrieved_labels=retrieved_labels,
+        labels=labels,
+        k_list=k_list)
+    self.assertTrue(np.allclose(
+        np.array(recall_list), np.array([1.0, 1.0])))
+  def testRelativeRecall(self):
+    # Make sure cross-sequence recall@k is strictly non-decreasing over k.
+    num_data = 100
+    num_tasks = 10
+    embeddings = np.random.randn(100, 5)
+    tasks = np.repeat(range(num_tasks), num_data // num_tasks)
+    labels = np.random.randint(0, 5, 100)
+    k_list = [1, 2, 4, 8, 16, 32, 64]
+    indices = labeled_eval.nearest_cross_sequence_neighbors(
+        embeddings, tasks, n_neighbors=max(k_list))
+    retrieved_labels = labels[indices]
+    recall_list = labeled_eval.compute_cross_sequence_recall_at_k(
+        retrieved_labels=retrieved_labels,
+        labels=labels,
+        k_list=k_list)
+    recall_list_sorted = sorted(recall_list)
+    self.assertTrue(np.allclose(
+        np.array(recall_list), np.array(recall_list_sorted)))
+if __name__ == "__main__":
+  tf.test.main()
--- a/research/tcn/model.py
+++ b/research/tcn/model.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Model implementations."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from abc import ABCMeta
+from abc import abstractmethod
+import tensorflow as tf
+import tensorflow.contrib.slim as slim
+from tensorflow.contrib.slim.python.slim.nets import inception
+from tensorflow.contrib.slim.python.slim.nets import resnet_v2 as resnet_v2
+from tensorflow.contrib.slim.python.slim.nets import resnet_utils as resnet_utils
+def get_embedder(
+    embedder_strategy, config, images, is_training, reuse=False,
+    l2_normalize_embedding=True):
+  """Returns an embedder based on config.
+  Args:
+    embedder_strategy: String, name of embedder version to return.
+    config: LuaTable object, training config.
+    images: 4-D float `Tensor` containing batch images.
+    is_training: Boolean or placeholder for boolean,
+      indicator for whether or not we're training.
+    reuse: Boolean: Reuse embedder variable scope.
+    l2_normalize_embedding: Boolean, whether or not to l2 normalize the
+      embedding.
+  Returns:
+    embedder: An `Embedder` object.
+  Raises:
+    ValueError: if unknown embedder_strategy specified.
+  """
+  if embedder_strategy == 'inception_baseline':
+    pretrained_ckpt = config.inception_conv_ss_fc.pretrained_checkpoint
+    return InceptionBaselineEmbedder(
+        images,
+        pretrained_ckpt,
+        config.random_projection,
+        config.random_projection_dim)
+  strategy_to_embedder = {
+      'inception_conv_ss_fc': InceptionConvSSFCEmbedder,
+      'resnet': ResnetEmbedder,
+  }
+  if embedder_strategy not in strategy_to_embedder:
+    raise ValueError('unknown embedder_strategy', embedder_strategy)
+  embedding_size = config.embedding_size
+  l2_reg_weight = config.learning.l2_reg_weight
+  embedder = strategy_to_embedder[embedder_strategy](
+      config[embedder_strategy], images, embedding_size,
+      is_training, embedding_l2=l2_normalize_embedding,
+      l2_reg_weight=l2_reg_weight, reuse=reuse)
+  return embedder
+def build_inceptionv3_graph(images, endpoint, is_training, checkpoint,
+                            reuse=False):
+  """Builds an InceptionV3 model graph.
+  Args:
+    images: A 4-D float32 `Tensor` of batch images.
+    endpoint: String, name of the InceptionV3 endpoint.
+    is_training: Boolean, whether or not to build a training or inference graph.
+    checkpoint: String, path to the pretrained model checkpoint.
+    reuse: Boolean, whether or not we are reusing the embedder.
+  Returns:
+    inception_output: `Tensor` holding the InceptionV3 output.
+    inception_variables: List of inception variables.
+    init_fn: Function to initialize the weights (if not reusing, then None).
+  """
+  with slim.arg_scope(inception.inception_v3_arg_scope()):
+    _, endpoints = inception.inception_v3(
+        images, num_classes=1001, is_training=is_training)
+    inception_output = endpoints[endpoint]
+    inception_variables = slim.get_variables_to_restore()
+    inception_variables = [
+        i for i in inception_variables if 'global_step' not in i.name]
+    if is_training and not reuse:
+      init_saver = tf.train.Saver(inception_variables)
+      def init_fn(scaffold, sess):
+        del scaffold
+        init_saver.restore(sess, checkpoint)
+    else:
+      init_fn = None
+    return inception_output, inception_variables, init_fn
+class InceptionBaselineEmbedder(object):
+  """Produces pre-trained InceptionV3 embeddings."""
+  def __init__(self, images, pretrained_ckpt, reuse=False,
+               random_projection=False, random_projection_dim=32):
+    # Build InceptionV3 graph.
+    (inception_output,
+     self.inception_variables,
+     self.init_fn) = build_inceptionv3_graph(
+         images, 'Mixed_7c', False, pretrained_ckpt, reuse)
+    # Pool 8x8x2048 -> 1x1x2048.
+    embedding = slim.avg_pool2d(inception_output, [8, 8], stride=1)
+    embedding = tf.squeeze(embedding, [1, 2])
+    if random_projection:
+      embedding = tf.matmul(
+          embedding, tf.random_normal(
+              shape=[2048, random_projection_dim], seed=123))
+    self.embedding = embedding
+class PretrainedEmbedder(object):
+  """Base class for embedders that take pre-trained networks as input."""
+  __metaclass__ = ABCMeta
+  def __init__(self, config, images, embedding_size, is_training,
+               embedding_l2=True, l2_reg_weight=1e-6, reuse=False):
+    """Constructor.
+    Args:
+      config: A T object holding training config.
+      images: A 4-D float32 `Tensor` holding images to embed.
+      embedding_size: Int, the size of the embedding.
+      is_training: Boolean, whether or not this is a training or inference-time
+        graph.
+      embedding_l2: Boolean, whether or not to l2 normalize the embedding.
+      l2_reg_weight: Float, weight applied to l2 weight regularization.
+      reuse: Boolean, whether or not we're reusing this graph.
+    """
+    # Pull out all the embedder hyperparameters.
+    self._config = config
+    self._embedding_size = embedding_size
+    self._l2_reg_weight = l2_reg_weight
+    self._embedding_l2 = embedding_l2
+    self._is_training = is_training
+    self._reuse = reuse
+    # Pull out pretrained hparams.
+    pretrained_checkpoint = config.pretrained_checkpoint
+    pretrained_layer = config.pretrained_layer
+    pretrained_keep_prob = config.dropout.keep_pretrained
+    # Build pretrained graph.
+    (pretrained_output,
+     self._pretrained_variables,
+     self.init_fn) = self.build_pretrained_graph(
+         images, pretrained_layer, pretrained_checkpoint, is_training, reuse)
+    # Optionally drop out the activations.
+    pretrained_output = slim.dropout(
+        pretrained_output, keep_prob=pretrained_keep_prob,
+        is_training=is_training)
+    self._pretrained_output = pretrained_output
+  @abstractmethod
+  def build_pretrained_graph(self, images, layer, pretrained_checkpoint,
+                             is_training, reuse):
+    """Builds the graph for the pre-trained network.
+    Method to be overridden by implementations.
+    Args:
+      images: A 4-D tf.float32 `Tensor` holding images to embed.
+      layer: String, defining which pretrained layer to take as input
+        to adaptation layers.
+      pretrained_checkpoint: String, path to a checkpoint used to load
+        pretrained weights.
+      is_training: Boolean, whether or not we're in training mode.
+      reuse: Boolean, whether or not to reuse embedder weights.
+    Returns:
+      pretrained_output: A 2 or 3-d tf.float32 `Tensor` holding pretrained
+        activations.
+    """
+    pass
+  @abstractmethod
+  def construct_embedding(self):
+    """Builds an embedding function on top of images.
+    Method to be overridden by implementations.
+    Returns:
+      embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size]
+        holding the embedded images.
+    """
+    pass
+  def get_trainable_variables(self):
+    """Gets a list of variables to optimize."""
+    if self._config.finetune:
+      return tf.trainable_variables()
+    else:
+      adaptation_only_vars = tf.get_collection(
+          tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._adaptation_scope)
+      return adaptation_only_vars
+class ResnetEmbedder(PretrainedEmbedder):
+  """Resnet TCN.
+  ResnetV2 -> resnet adaptation layers -> optional l2 normalize -> embedding.
+  """
+  def __init__(self, config, images, embedding_size, is_training,
+               embedding_l2=True, l2_reg_weight=1e-6, reuse=False):
+    super(ResnetEmbedder, self).__init__(
+        config, images, embedding_size, is_training, embedding_l2,
+        l2_reg_weight, reuse)
+  def build_pretrained_graph(
+      self, images, resnet_layer, checkpoint, is_training, reuse=False):
+    """See baseclass."""
+    with slim.arg_scope(resnet_v2.resnet_arg_scope()):
+      _, endpoints = resnet_v2.resnet_v2_50(
+          images, is_training=is_training, reuse=reuse)
+      resnet_layer = 'resnet_v2_50/block%d' % resnet_layer
+      resnet_output = endpoints[resnet_layer]
+      resnet_variables = slim.get_variables_to_restore()
+      resnet_variables = [
+          i for i in resnet_variables if 'global_step' not in i.name]
+      if is_training and not reuse:
+        init_saver = tf.train.Saver(resnet_variables)
+        def init_fn(scaffold, sess):
+          del scaffold
+          init_saver.restore(sess, checkpoint)
+      else:
+        init_fn = None
+      return resnet_output, resnet_variables, init_fn
+  def construct_embedding(self):
+    """Builds an embedding function on top of images.
+    Method to be overridden by implementations.
+    Returns:
+      embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size]
+        holding the embedded images.
+    """
+    with tf.variable_scope('tcn_net', reuse=self._reuse) as vs:
+      self._adaptation_scope = vs.name
+      net = self._pretrained_output
+      # Define some adaptation blocks on top of the pre-trained resnet output.
+      adaptation_blocks = []
+      adaptation_block_params = [map(
+          int, i.split('_')) for i in self._config.adaptation_blocks.split('-')]
+      for i, (depth, num_units) in enumerate(adaptation_block_params):
+        block = resnet_v2.resnet_v2_block(
+            'adaptation_block_%d' % i, base_depth=depth, num_units=num_units,
+            stride=1)
+        adaptation_blocks.append(block)
+      # Stack them on top of the resent output.
+      net = resnet_utils.stack_blocks_dense(
+          net, adaptation_blocks, output_stride=None)
+      # Average pool the output.
+      net = tf.reduce_mean(net, [1, 2], name='adaptation_pool', keep_dims=True)
+      if self._config.emb_connection == 'fc':
+        # Use fully connected layer to project to embedding layer.
+        fc_hidden_sizes = self._config.fc_hidden_sizes
+        if fc_hidden_sizes == 'None':
+          fc_hidden_sizes = []
+        else:
+          fc_hidden_sizes = map(int, fc_hidden_sizes.split('_'))
+        fc_hidden_keep_prob = self._config.dropout.keep_fc
+        net = tf.squeeze(net)
+        for fc_hidden_size in fc_hidden_sizes:
+          net = slim.layers.fully_connected(net, fc_hidden_size)
+          if fc_hidden_keep_prob < 1.0:
+            net = slim.dropout(net, keep_prob=fc_hidden_keep_prob,
+                               is_training=self._is_training)
+        # Connect last FC layer to embedding.
+        embedding = slim.layers.fully_connected(net, self._embedding_size,
+                                                activation_fn=None)
+      else:
+        # Use 1x1 conv layer to project to embedding layer.
+        embedding = slim.conv2d(
+            net, self._embedding_size, [1, 1], activation_fn=None,
+            normalizer_fn=None, scope='embedding')
+        embedding = tf.squeeze(embedding)
+      # Optionally L2 normalize the embedding.
+      if self._embedding_l2:
+        embedding = tf.nn.l2_normalize(embedding, dim=1)
+      return embedding
+  def get_trainable_variables(self):
+    """Gets a list of variables to optimize."""
+    if self._config.finetune:
+      return tf.trainable_variables()
+    else:
+      adaptation_only_vars = tf.get_collection(
+          tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._adaptation_scope)
+      return adaptation_only_vars
+class InceptionEmbedderBase(PretrainedEmbedder):
+  """Base class for embedders that take pre-trained InceptionV3 activations."""
+  def __init__(self, config, images, embedding_size, is_training,
+               embedding_l2=True, l2_reg_weight=1e-6, reuse=False):
+    super(InceptionEmbedderBase, self).__init__(
+        config, images, embedding_size, is_training, embedding_l2,
+        l2_reg_weight, reuse)
+  def build_pretrained_graph(
+      self, images, inception_layer, checkpoint, is_training, reuse=False):
+    """See baseclass."""
+    # Build InceptionV3 graph.
+    inception_output, inception_variables, init_fn = build_inceptionv3_graph(
+        images, inception_layer, is_training, checkpoint, reuse)
+    return inception_output, inception_variables, init_fn
+class InceptionConvSSFCEmbedder(InceptionEmbedderBase):
+  """TCN Embedder V1.
+  InceptionV3 (mixed_5d) -> conv layers -> spatial softmax ->
+    fully connected -> optional l2 normalize -> embedding.
+  """
+  def __init__(self, config, images, embedding_size, is_training,
+               embedding_l2=True, l2_reg_weight=1e-6, reuse=False):
+    super(InceptionConvSSFCEmbedder, self).__init__(
+        config, images, embedding_size, is_training, embedding_l2,
+        l2_reg_weight, reuse)
+    # Pull out all the hyperparameters specific to this embedder.
+    self._additional_conv_sizes = config.additional_conv_sizes
+    self._conv_hidden_keep_prob = config.dropout.keep_conv
+    self._fc_hidden_sizes = config.fc_hidden_sizes
+    self._fc_hidden_keep_prob = config.dropout.keep_fc
+  def construct_embedding(self):
+    """Builds a conv -> spatial softmax -> FC adaptation network."""
+    is_training = self._is_training
+    normalizer_params = {'is_training': is_training}
+    with tf.variable_scope('tcn_net', reuse=self._reuse) as vs:
+      self._adaptation_scope = vs.name
+      with slim.arg_scope(
+          [slim.layers.conv2d],
+          activation_fn=tf.nn.relu,
+          normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params,
+          weights_regularizer=slim.regularizers.l2_regularizer(
+              self._l2_reg_weight),
+          biases_regularizer=slim.regularizers.l2_regularizer(
+              self._l2_reg_weight)):
+        with slim.arg_scope(
+            [slim.layers.fully_connected],
+            activation_fn=tf.nn.relu,
+            normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params,
+            weights_regularizer=slim.regularizers.l2_regularizer(
+                self._l2_reg_weight),
+            biases_regularizer=slim.regularizers.l2_regularizer(
+                self._l2_reg_weight)):
+          # Input to embedder is pre-trained inception output.
+          net = self._pretrained_output
+          # Optionally add more conv layers.
+          for num_filters in self._additional_conv_sizes:
+            net = slim.layers.conv2d(
+                net, num_filters, kernel_size=[3, 3], stride=[1, 1])
+            net = slim.dropout(net, keep_prob=self._conv_hidden_keep_prob,
+                               is_training=is_training)
+          # Take the spatial soft arg-max of the last convolutional layer.
+          # This is a form of spatial attention over the activations.
+          # See more here: http://arxiv.org/abs/1509.06113.
+          net = tf.contrib.layers.spatial_softmax(net)
+          self.spatial_features = net
+          # Add fully connected layers.
+          net = slim.layers.flatten(net)
+          for fc_hidden_size in self._fc_hidden_sizes:
+            net = slim.layers.fully_connected(net, fc_hidden_size)
+            if self._fc_hidden_keep_prob < 1.0:
+              net = slim.dropout(net, keep_prob=self._fc_hidden_keep_prob,
+                                 is_training=is_training)
+          # Connect last FC layer to embedding.
+          net = slim.layers.fully_connected(net, self._embedding_size,
+                                            activation_fn=None)
+          # Optionally L2 normalize the embedding.
+          if self._embedding_l2:
+            net = tf.nn.l2_normalize(net, dim=1)
+          return net
--- a/research/tcn/preprocessing.py
+++ b/research/tcn/preprocessing.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Image preprocessing helpers."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+from scipy import ndimage
+import tensorflow as tf
+from tensorflow.python.ops import control_flow_ops
+def apply_with_random_selector(x, func, num_cases):
+  """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+  TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
+  pipfied.
+  Source:
+  https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
+  Args:
+    x: input Tensor.
+    func: Python function to apply.
+    num_cases: Python int32, number of cases to sample sel from.
+  Returns:
+    The result of func(x, sel), where func receives the value of the
+    selector as a python integer, but sel is sampled dynamically.
+  """
+  sel = tf.random_uniform([], maxval=num_cases, dtype=tf.int32)
+  # Pass the real x only to one of the func calls.
+  return control_flow_ops.merge([
+      func(control_flow_ops.switch(x, tf.equal(sel, case))[1], case)
+      for case in range(num_cases)])[0]
+def distorted_bounding_box_crop(image,
+                                bbox,
+                                min_object_covered=0.1,
+                                aspect_ratio_range=(0.75, 1.33),
+                                area_range=(0.05, 1.0),
+                                max_attempts=100,
+                                scope=None):
+  """Generates cropped_image using a one of the bboxes randomly distorted.
+  TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
+  pipfied.
+  Source:
+  https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
+  See `tf.image.sample_distorted_bounding_box` for more documentation.
+  Args:
+    image: 3-D Tensor of image (it will be converted to floats in [0, 1]).
+    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
+      where each coordinate is [0, 1) and the coordinates are arranged
+      as [ymin, xmin, ymax, xmax]. If num_boxes is 0 then it would use the whole
+      image.
+    min_object_covered: An optional `float`. Defaults to `0.1`. The cropped
+      area of the image must contain at least this fraction of any bounding box
+      supplied.
+    aspect_ratio_range: An optional list of `floats`. The cropped area of the
+      image must have an aspect ratio = width / height within this range.
+    area_range: An optional list of `floats`. The cropped area of the image
+      must contain a fraction of the supplied image within in this range.
+    max_attempts: An optional `int`. Number of attempts at generating a cropped
+      region of the image of the specified constraints. After `max_attempts`
+      failures, return the entire image.
+    scope: Optional scope for name_scope.
+  Returns:
+    A tuple, a 3-D Tensor cropped_image and the distorted bbox
+  """
+  with tf.name_scope(scope, 'distorted_bounding_box_crop', [image, bbox]):
+    # Each bounding box has shape [1, num_boxes, box coords] and
+    # the coordinates are ordered [ymin, xmin, ymax, xmax].
+    # A large fraction of image datasets contain a human-annotated bounding
+    # box delineating the region of the image containing the object of interest.
+    # We choose to create a new bounding box for the object which is a randomly
+    # distorted version of the human-annotated bounding box that obeys an
+    # allowed range of aspect ratios, sizes and overlap with the human-annotated
+    # bounding box. If no box is supplied, then we assume the bounding box is
+    # the entire image.
+    sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
+        tf.shape(image),
+        bounding_boxes=bbox,
+        min_object_covered=min_object_covered,
+        aspect_ratio_range=aspect_ratio_range,
+        area_range=area_range,
+        max_attempts=max_attempts,
+        use_image_if_no_bounding_boxes=True)
+    bbox_begin, bbox_size, distort_bbox = sample_distorted_bounding_box
+    # Crop the image to the specified bounding box.
+    cropped_image = tf.slice(image, bbox_begin, bbox_size)
+    return cropped_image, distort_bbox
+def distort_color(image, color_ordering=0, fast_mode=True, scope=None):
+  """Distort the color of a Tensor image.
+  TODO(coreylynch): add as a dependency, when slim or tensorflow/models are
+  pipfied.
+  Source:
+  https://raw.githubusercontent.com/tensorflow/models/a9d0e6e8923a4/slim/preprocessing/inception_preprocessing.py
+  Each color distortion is non-commutative and thus ordering of the color ops
+  matters. Ideally we would randomly permute the ordering of the color ops.
+  Rather then adding that level of complication, we select a distinct ordering
+  of color ops for each preprocessing thread.
+  Args:
+    image: 3-D Tensor containing single image in [0, 1].
+    color_ordering: Python int, a type of distortion (valid values: 0-3).
+    fast_mode: Avoids slower ops (random_hue and random_contrast)
+    scope: Optional scope for name_scope.
+  Returns:
+    3-D Tensor color-distorted image on range [0, 1]
+  Raises:
+    ValueError: if color_ordering not in [0, 3]
+  """
+  with tf.name_scope(scope, 'distort_color', [image]):
+    if fast_mode:
+      if color_ordering == 0:
+        image = tf.image.random_brightness(image, max_delta=32. / 255.)
+        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+      else:
+        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+        image = tf.image.random_brightness(image, max_delta=32. / 255.)
+    else:
+      if color_ordering == 0:
+        image = tf.image.random_brightness(image, max_delta=32. / 255.)
+        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+        image = tf.image.random_hue(image, max_delta=0.2)
+        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+      elif color_ordering == 1:
+        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+        image = tf.image.random_brightness(image, max_delta=32. / 255.)
+        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+        image = tf.image.random_hue(image, max_delta=0.2)
+      elif color_ordering == 2:
+        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+        image = tf.image.random_hue(image, max_delta=0.2)
+        image = tf.image.random_brightness(image, max_delta=32. / 255.)
+        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+      elif color_ordering == 3:
+        image = tf.image.random_hue(image, max_delta=0.2)
+        image = tf.image.random_saturation(image, lower=0.5, upper=1.5)
+        image = tf.image.random_contrast(image, lower=0.5, upper=1.5)
+        image = tf.image.random_brightness(image, max_delta=32. / 255.)
+      else:
+        raise ValueError('color_ordering must be in [0, 3]')
+    # The random_* ops do not necessarily clamp.
+    return tf.clip_by_value(image, 0.0, 1.0)
+def crop_center(image):
+  """Returns a cropped square image."""
+  shape = tf.shape(image)
+  new_shape = tf.minimum(shape[0], shape[1])
+  offset_y = tf.maximum(shape[0] - shape[1], 0) // 2
+  offset_x = tf.maximum(shape[1] - shape[0], 0) // 2
+  image = tf.image.crop_to_bounding_box(
+      image, offset_y, offset_x, new_shape, new_shape)
+  return image
+def pad(image):
+  """Returns an image padded to be square."""
+  shape = tf.shape(image)
+  new_shape = tf.maximum(shape[0], shape[1])
+  height = shape[0]
+  width = shape[1]
+  offset_x = tf.maximum((height-width), 0) // 2
+  offset_y = tf.maximum((width-height), 0) // 2
+  image = tf.image.pad_to_bounding_box(
+      image, offset_y, offset_x, new_shape, new_shape)
+  return image
+def pad_200(image):
+  """Returns an image padded width-padded with 200 pixels."""
+  shape = tf.shape(image)
+  image = tf.image.pad_to_bounding_box(
+      image, 0, 200, shape[0], shape[1]+400)
+  shape = tf.shape(image)
+  new_shape = tf.minimum(shape[0], shape[1])
+  offset_y = tf.maximum(shape[0] - shape[1], 0) // 2
+  offset_x = tf.maximum(shape[1] - shape[0], 0) // 2
+  image = tf.image.crop_to_bounding_box(
+      image, offset_y, offset_x, new_shape, new_shape)
+  return image
+def pad_crop_central(image, central_fraction=0.875):
+  """Pads the image to the maximum length, crops the central fraction."""
+  # Pad the image to be square.
+  image = pad(image)
+  # Crop the central region of the image with an area containing 87.5% of
+  # the original image.
+  image = tf.image.central_crop(image, central_fraction=central_fraction)
+  return image
+def crop_image_by_strategy(image, cropping):
+  """Crops an image according to a strategy defined in config.
+  Args:
+    image: 3-d image tensor.
+    cropping: str, name of cropping strategy.
+  Returns:
+    image: cropped image.
+  Raises:
+    ValueError: When unknown cropping strategy is specified.
+  """
+  strategy_to_method = {
+      'crop_center': crop_center,
+      'pad': pad,
+      'pad200': pad_200,
+      'pad_crop_central': pad_crop_central
+  }
+  tf.logging.info('Cropping strategy: %s.' % cropping)
+  if cropping not in strategy_to_method:
+    raise ValueError('Unknown cropping strategy: %s' % cropping)
+  return strategy_to_method[cropping](image)
+def scale_augment_crop(image, central_bbox, area_range, min_object_covered):
+  """Training time scale augmentation.
+  Args:
+    image: 3-d float tensor.
+    central_bbox: Bounding box defining the central region of interest.
+    area_range: Range of allowed areas for the augmented bounding box.
+    min_object_covered: Constraint for the fraction of original image in
+      augmented bounding box.
+  Returns:
+    distort_image: The scaled, cropped image.
+  """
+  (distorted_image, _) = distorted_bounding_box_crop(
+      image, central_bbox, area_range=area_range,
+      aspect_ratio_range=(1.0, 1.0),
+      min_object_covered=min_object_covered)
+  # Restore the shape since the dynamic slice based upon the bbox_size loses
+  # the third dimension.
+  distorted_image.set_shape([None, None, 3])
+  return distorted_image
+def scale_to_inception_range(image):
+  """Scales an image in the range [0,1] to [-1,1] as expected by inception."""
+  # Assert that incoming images have been properly scaled to [0,1].
+  with tf.control_dependencies(
+      [tf.assert_less_equal(tf.reduce_max(image), 1.),
+       tf.assert_greater_equal(tf.reduce_min(image), 0.)]):
+    image = tf.subtract(image, 0.5)
+    image = tf.multiply(image, 2.0)
+    return image
+def resize_image(image, height, width):
+  """Resizes an image to a target height and width."""
+  image = tf.expand_dims(image, 0)
+  image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
+  image = tf.squeeze(image, [0])
+  return image
+def crop_or_pad(image, curr_height, curr_width, new, height=True, crop=True):
+  """Crops or pads an image.
+  Args:
+    image: 3-D float32 `Tensor` image.
+    curr_height: Int, current height.
+    curr_width: Int, current width.
+    new: Int, new width or height.
+    height: Boolean, cropping or padding for height.
+    crop: Boolean, True if we're cropping, False if we're padding.
+  Returns:
+    image: 3-D float32 `Tensor` image.
+  """
+  # Crop the image to fit the new shape.
+  abs_diff = tf.abs(new-curr_height)//2 if height else tf.abs(new-curr_width)//2
+  offset_x = 0 if height else abs_diff
+  offset_y = abs_diff if height else 0
+  # We process height first, so always pad/crop to new height.
+  target_height = new
+  # We process height first, so pad/crop to new width only if not doing height.
+  target_width = curr_width if height else new
+  if crop:
+    image = tf.image.crop_to_bounding_box(
+        image, offset_y, offset_x, target_height, target_width)
+  else:
+    image = tf.image.pad_to_bounding_box(
+        image, offset_y, offset_x, target_height, target_width)
+  return image
+def get_central_bbox(min_side, new_size):
+  """Gets the central bounding box for an image.
+  If image is square, returns bounding box [0,0,1,1].
+  Otherwise, returns the bounding box containing the central
+  smallest side x smallest side square.
+  Args:
+    min_side: Int, size of smallest side in pixels.
+    new_size: Int, resize image to a square of new_size x new_size pixels.
+  Returns:
+    bbox: A 4-D Int `Tensor`, holding the coordinates of the central bounding
+      box.
+  """
+  max_shape = tf.cast(new_size, tf.float32)
+  min_shape = tf.cast(min_side, tf.float32)
+  top_xy = ((max_shape-min_shape)/2)/max_shape
+  bottom_xy = (min_shape+(max_shape-min_shape)/2)/max_shape
+  # Create a bbox for the center region of interest.
+  bbox = tf.stack([[[top_xy, top_xy, bottom_xy, bottom_xy]]])
+  bbox.set_shape([1, 1, 4])
+  return bbox
+def pad_to_max(image, max_scale):
+  """Pads an image to max_scale times the current center crop size.
+  E.g.: For an image with dimensions 1920x1080 and a max_scale of 1.5,
+  returns an image that is 1.5 * (1080x1080).
+  Args:
+    image: 3-D float32 `Tensor` image.
+    max_scale: Float, maximum scale of the image, as a multiplier on the
+      central bounding box.
+  Returns:
+    image: 3-D float32 `Tensor` image.
+  """
+  orig_shape = tf.shape(image)
+  orig_height = orig_shape[0]
+  orig_width = orig_shape[1]
+  # Find the smallest side and corresponding new size.
+  min_side = tf.cast(tf.minimum(orig_height, orig_width), tf.float32)
+  new_shape = tf.cast(tf.sqrt(max_scale*min_side*min_side), tf.int32)
+  # Crop or pad height.
+  # pylint: disable=g-long-lambda
+  image = tf.cond(
+      orig_height >= new_shape,
+      lambda: crop_or_pad(
+          image, orig_height, orig_width, new_shape, height=True, crop=True),
+      lambda: crop_or_pad(
+          image, orig_height, orig_width, new_shape, height=True, crop=False))
+  # Crop or pad width.
+  image = tf.cond(
+      orig_width >= new_shape,
+      lambda: crop_or_pad(
+          image, orig_height, orig_width, new_shape, height=False, crop=True),
+      lambda: crop_or_pad(
+          image, orig_height, orig_width, new_shape, height=False, crop=False))
+  # Get the bounding box of the original centered box in the new resized image.
+  original_bounding_box = get_central_bbox(min_side, new_shape)
+  return image, original_bounding_box
+def scale_up_augmentation(image, max_scale):
+  """Scales an image randomly >100% up to some max scale."""
+  # Pad to max size.
+  image, original_central_bbox = pad_to_max(image, max_scale)
+  # Determine area range of the augmented crop, as a percentage of the
+  # new max area.
+  # aug_max == 100% of new max area.
+  aug_max = 1.0
+  # aug_min == original_area/new_area == original_area/(max_scale*original_area)
+  # == 1/max_scale.
+  aug_min = 1.0/max_scale
+  area_range = (aug_min, aug_max)
+  # Since we're doing >100% scale, always have the full original crop in frame.
+  min_object_covered = 1.0
+  # Get a random scaled, cropped image.
+  image = scale_augment_crop(image, original_central_bbox, area_range,
+                             min_object_covered)
+  return image
+def scale_down_augmentation(image, min_scale):
+  """Scales an image randomly <100% down to some min scale."""
+  # Crop the center, and consider the whole image the bounding box ROI.
+  image = crop_center(image)
+  bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=tf.float32, shape=[1, 1, 4])
+  # Determine area range of the augmented crop, as a percentage of the
+  # original crop center area.
+  # aug_max == 100% of original area.
+  area_range = (min_scale, 1.0)
+  # Get a random scaled, cropped image.
+  image = scale_augment_crop(image, bbox, area_range, min_scale)
+  return image
+def augment_image_scale(image, min_scale, max_scale, p_scale_up):
+  """Training time scale augmentation.
+  Args:
+    image: 3-d float tensor representing image.
+    min_scale: minimum scale augmentation allowed, as a fraction of the
+      central min_side * min_side area of the original image.
+    max_scale: maximum scale augmentation allowed, as a fraction of the
+      central min_side * min_side area of the original image.
+    p_scale_up: Fraction of images scaled up.
+  Returns:
+    image: The scale-augmented image.
+  """
+  assert max_scale >= 1.0
+  assert min_scale <= 1.0
+  if min_scale == max_scale == 1.0:
+    tf.logging.info('Min and max scale are 1.0, don`t augment.')
+    # Do no augmentation, just crop the center.
+    return crop_center(image)
+  elif (max_scale == 1.0) and (min_scale < 1.0):
+    tf.logging.info('Max scale is 1.0, only scale down augment.')
+    # Always do <100% augmentation.
+    return scale_down_augmentation(image, min_scale)
+  elif (min_scale == 1.0) and (max_scale > 1.0):
+    tf.logging.info('Min scale is 1.0, only scale up augment.')
+    # Always do >100% augmentation.
+    return scale_up_augmentation(image, max_scale)
+  else:
+    tf.logging.info('Sample both augmentations.')
+    # Choose to scale image up or down.
+    rn = tf.random_uniform([], minval=0., maxval=1., dtype=tf.float32)
+    image = tf.cond(rn >= p_scale_up,
+                    lambda: scale_up_augmentation(image, max_scale),
+                    lambda: scale_down_augmentation(image, min_scale))
+  return image
+def decode_image(image_str):
+  """Decodes a jpeg-encoded image string into a image in range [0,1]."""
+  # Decode jpeg string into np.uint8 tensor.
+  image = tf.image.decode_jpeg(image_str, channels=3)
+  # Convert the image to range [0,1].
+  if image.dtype != tf.float32:
+    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
+  return image
+def decode_images(image_strs):
+  """Decodes a tensor of image strings."""
+  return tf.map_fn(decode_image, image_strs, dtype=tf.float32)
+def preprocess_training_images(images, height, width, min_scale, max_scale,
+                               p_scale_up, aug_color=True, fast_mode=True):
+  """Preprocesses a batch of images for training.
+  This applies training-time scale and color augmentation, crops/resizes,
+  and scales images to the [-1,1] range expected by pre-trained Inception nets.
+  Args:
+    images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
+    height: Int, height in pixels to resize image to.
+    width: Int, width in pixels to resize image to.
+    min_scale: Float, minimum scale augmentation allowed, as a fraction of the
+      central min_side * min_side area of the original image.
+    max_scale: Float, maximum scale augmentation allowed, as a fraction of the
+      central min_side * min_side area of the original image.
+    p_scale_up: Float, fraction of images scaled up.
+    aug_color: Whether or not to do color augmentation.
+    fast_mode: Boolean, avoids slower ops (random_hue and random_contrast).
+  Returns:
+    preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
+  """
+  def _prepro_train(im):
+    """Map this preprocessing function over each image in the batch."""
+    return preprocess_training_image(
+        im, height, width, min_scale, max_scale, p_scale_up,
+        aug_color=aug_color, fast_mode=fast_mode)
+  return tf.map_fn(_prepro_train, images)
+def preprocess_training_image(
+    image, height, width, min_scale, max_scale, p_scale_up,
+    aug_color=True, fast_mode=True):
+  """Preprocesses an image for training.
+  Args:
+    image: A 3-d float tensor representing the image.
+    height: Target image height.
+    width: Target image width.
+    min_scale: Minimum scale of bounding box (as a percentage of full
+      bounding box) used to crop image during scale augmentation.
+    max_scale: Minimum scale of bounding box (as a percentage of full
+      bounding box) used to crop image during scale augmentation.
+    p_scale_up: Fraction of images to scale >100%.
+    aug_color: Whether or not to do color augmentation.
+    fast_mode: Avoids slower ops (random_hue and random_contrast).
+  Returns:
+    scaled_image: An scaled image tensor in the range [-1,1].
+  """
+  # Get a random scaled, cropped image.
+  image = augment_image_scale(image, min_scale, max_scale, p_scale_up)
+  # Resize image to desired height, width.
+  image = tf.expand_dims(image, 0)
+  image = tf.image.resize_bilinear(image, [height, width], align_corners=False)
+  image = tf.squeeze(image, [0])
+  # Optionally augment the color.
+  # pylint: disable=g-long-lambda
+  if aug_color:
+    image = apply_with_random_selector(
+        image,
+        lambda x, ordering: distort_color(
+            x, ordering, fast_mode=fast_mode), num_cases=4)
+  # Scale to [-1,1] range as expected by inception.
+  scaled_image = scale_to_inception_range(image)
+  return scaled_image
+def preprocess_test_image(image, height, width, crop_strategy):
+  """Preprocesses an image for test/inference.
+  Args:
+    image: A 3-d float tensor representing the image.
+    height: Target image height.
+    width: Target image width.
+    crop_strategy: String, name of the strategy used to crop test-time images.
+      Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
+  Returns:
+    scaled_image: An scaled image tensor in the range [-1,1].
+  """
+  image = crop_image_by_strategy(image, crop_strategy)
+  # Resize.
+  image = resize_image(image, height, width)
+  # Scale the input range to [-1,1] as expected by inception.
+  image = scale_to_inception_range(image)
+  return image
+def preprocess_test_images(images, height, width, crop_strategy):
+  """Apply test-time preprocessing to a batch of images.
+  This crops images (given a named strategy for doing so), resizes them,
+  and scales them to the [-1,1] range expected by pre-trained Inception nets.
+  Args:
+    images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
+    height: Int, height in pixels to resize image to.
+    width: Int, width in pixels to resize image to.
+    crop_strategy: String, name of the strategy used to crop test-time images.
+      Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
+  Returns:
+    preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
+  """
+  def _prepro_test(im):
+    """Map this preprocessing function over each image in the batch."""
+    return preprocess_test_image(im, height, width, crop_strategy)
+  if len(images.shape) == 3:
+    return _prepro_test(images)
+  else:
+    return tf.map_fn(_prepro_test, images)
+def preprocess_images(
+    images, is_training, height, width,
+    min_scale=1.0, max_scale=1.0, p_scale_up=0.0,
+    aug_color=True, fast_mode=True,
+    crop_strategy='pad_crop_central'):
+  """Preprocess a batch of images.
+  Args:
+    images: A 4-D float32 `Tensor` holding raw images to be preprocessed.
+    is_training: Boolean, whether to preprocess them for training or test.
+    height: Int, height in pixels to resize image to.
+    width: Int, width in pixels to resize image to.
+    min_scale: Float, minimum scale augmentation allowed, as a fraction of the
+      central min_side * min_side area of the original image.
+    max_scale: Float, maximum scale augmentation allowed, as a fraction of the
+      central min_side * min_side area of the original image.
+    p_scale_up: Float, fraction of images scaled up.
+    aug_color: Whether or not to do color augmentation.
+    fast_mode: Boolean, avoids slower ops (random_hue and random_contrast).
+    crop_strategy: String, name of the strategy used to crop test-time images.
+      Can be: 'crop_center', 'pad', 'pad_200', 'pad_crop_central'.
+  Returns:
+    preprocessed_images: A 4-D float32 `Tensor` holding preprocessed images.
+  """
+  if is_training:
+    return preprocess_training_images(
+        images, height, width, min_scale, max_scale,
+        p_scale_up, aug_color, fast_mode)
+  else:
+    return preprocess_test_images(
+        images, height, width, crop_strategy)
+def cv2rotateimage(image, angle):
+  """Efficient rotation if 90 degrees rotations, slow otherwise.
+  Not a tensorflow function, using cv2 and scipy on numpy arrays.
+  Args:
+    image: a numpy array with shape [height, width, channels].
+    angle: the rotation angle in degrees in the range [-180, 180].
+  Returns:
+    The rotated image.
+  """
+  # Limit angle to [-180, 180] degrees.
+  assert angle <= 180 and angle >= -180
+  if angle == 0:
+    return image
+  # Efficient rotations.
+  if angle == -90:
+    image = cv2.transpose(image)
+    image = cv2.flip(image, 0)
+  elif angle == 90:
+    image = cv2.transpose(image)
+    image = cv2.flip(image, 1)
+  elif angle == 180 or angle == -180:
+    image = cv2.flip(image, 0)
+    image = cv2.flip(image, 1)
+  else:  # Slow rotation.
+    image = ndimage.interpolation.rotate(image, 270)
+  return image
+def cv2resizeminedge(image, min_edge_size):
+  """Resize smallest edge of image to min_edge_size."""
+  assert min_edge_size >= 0
+  height, width = (image.shape[0], image.shape[1])
+  new_height, new_width = (0, 0)
+  if height > width:
+    new_width = min_edge_size
+    new_height = int(height * new_width / float(width))
+  else:
+    new_height = min_edge_size
+    new_width = int(width * new_height / float(height))
+  return cv2.resize(image, (new_width, new_height),
+                    interpolation=cv2.INTER_AREA)
+def shapestring(array):
+  """Returns a compact string describing shape of an array."""
+  shape = array.shape
+  s = str(shape[0])
+  for i in range(1, len(shape)):
+    s += 'x' + str(shape[i])
+  return s
+def unscale_jpeg_encode(ims):
+  """Unscales pixel values and jpeg encodes preprocessed image.
+  Args:
+    ims: A 4-D float32 `Tensor` holding preprocessed images.
+  Returns:
+    im_strings: A 1-D string `Tensor` holding images that have been unscaled
+      (reversing the inception [-1,1] scaling), and jpeg encoded.
+  """
+  ims /= 2.0
+  ims += 0.5
+  ims *= 255.0
+  ims = tf.clip_by_value(ims, 0, 255)
+  ims = tf.cast(ims, tf.uint8)
+  im_strings = tf.map_fn(
+      lambda x: tf.image.encode_jpeg(x, format='rgb', quality=100),
+      ims, dtype=tf.string)
+  return im_strings