Merge branch 'master' into patch-13

00fa8b12 · cclauss · GitHub · 6d257a4f · 1f34fcaf · 00fa8b12
Unverified Commit 00fa8b12 authored Jan 26, 2018 by cclauss Committed by GitHub Jan 26, 2018
20 changed files
--- a/research/syntaxnet/dragnn/python/dragnn_model_saver_lib_test.py
+++ b/research/syntaxnet/dragnn/python/dragnn_model_saver_lib_test.py
@@ -45,7 +45,7 @@ class DragnnModelSaverLibTest(test_util.TensorFlowTestCase):
    master_spec = spec_pb2.MasterSpec()
    root_dir = os.path.join(FLAGS.test_srcdir,
                            'dragnn/python')
-    with file(os.path.join(root_dir, 'testdata', spec_path), 'r') as fin:
+    with open(os.path.join(root_dir, 'testdata', spec_path), 'r') as fin:
      text_format.Parse(fin.read().replace('TOPDIR', root_dir), master_spec)
      return master_spec


--- a/research/syntaxnet/dragnn/python/graph_builder.py
+++ b/research/syntaxnet/dragnn/python/graph_builder.py
@@ -28,7 +28,7 @@ from syntaxnet.util import check

 try:
  tf.NotDifferentiable('ExtractFixedFeatures')
-except KeyError, e:
+except KeyError as e:
  logging.info(str(e))



--- a/research/syntaxnet/dragnn/python/graph_builder_test.py
+++ b/research/syntaxnet/dragnn/python/graph_builder_test.py
@@ -20,6 +20,7 @@ import os.path


 import numpy as np
+from six.moves import xrange
 import tensorflow as tf

 from google.protobuf import text_format
@@ -245,7 +246,7 @@ class GraphBuilderTest(test_util.TensorFlowTestCase):
    master_spec = spec_pb2.MasterSpec()
    testdata = os.path.join(FLAGS.test_srcdir,
                            'dragnn/core/testdata')
-    with file(os.path.join(testdata, spec_path), 'r') as fin:
+    with open(os.path.join(testdata, spec_path), 'r') as fin:
      text_format.Parse(fin.read().replace('TESTDATA', testdata), master_spec)
      return master_spec


--- a/research/syntaxnet/dragnn/python/network_units.py
+++ b/research/syntaxnet/dragnn/python/network_units.py
@@ -22,6 +22,7 @@ import abc


 import numpy as np
+from six.moves import xrange
 import tensorflow as tf
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import tensor_array_ops as ta

--- a/research/syntaxnet/dragnn/python/spec_builder.py
+++ b/research/syntaxnet/dragnn/python/spec_builder.py
@@ -15,6 +15,7 @@
 """Utils for building DRAGNN specs."""


+from six.moves import xrange
 import tensorflow as tf

 from dragnn.protos import spec_pb2

--- a/research/syntaxnet/dragnn/python/trainer_lib.py
+++ b/research/syntaxnet/dragnn/python/trainer_lib.py
@@ -23,6 +23,7 @@ import random


 import tensorflow as tf
+from six.moves import xrange
 from tensorflow.core.framework.summary_pb2 import Summary
 from tensorflow.python.framework import errors
 from tensorflow.python.platform import gfile

--- a/research/syntaxnet/syntaxnet/conll2tree.py
+++ b/research/syntaxnet/syntaxnet/conll2tree.py
@@ -88,12 +88,12 @@ def main(unused_argv):
        sentence.ParseFromString(d)
        tr = asciitree.LeftAligned()
        d = to_dict(sentence)
-        print 'Input: %s' % sentence.text
-        print 'Parse:'
+        print('Input: %s' % sentence.text)
+        print('Parse:')
        tr_str = tr(d)
        pat = re.compile(r'\s*@\d+$')
        for tr_ln in tr_str.splitlines():
-          print pat.sub('', tr_ln)
+          print(pat.sub('', tr_ln))

      if finished:
        break

--- a/research/syntaxnet/syntaxnet/lexicon_builder_test.py
+++ b/research/syntaxnet/syntaxnet/lexicon_builder_test.py
@@ -140,7 +140,7 @@ class LexiconBuilderTest(test_util.TensorFlowTestCase):
      self.assertTrue(last)

  def ValidateTagToCategoryMap(self):
-    with file(os.path.join(FLAGS.test_tmpdir, 'tag-to-category'), 'r') as f:
+    with open(os.path.join(FLAGS.test_tmpdir, 'tag-to-category'), 'r') as f:
      entries = [line.strip().split('\t') for line in f.readlines()]
    for tag, category in entries:
      self.assertIn(tag, TAGS)
@@ -148,7 +148,7 @@ class LexiconBuilderTest(test_util.TensorFlowTestCase):

  def LoadMap(self, map_name):
    loaded_map = {}
-    with file(os.path.join(FLAGS.test_tmpdir, map_name), 'r') as f:
+    with open(os.path.join(FLAGS.test_tmpdir, map_name), 'r') as f:
      for line in f:
        entries = line.strip().split(' ')
        if len(entries) >= 2:

--- a/research/tcn/BUILD
+++ b/research/tcn/BUILD
+package(default_visibility = [":internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+package_group(
+    name = "internal",
+    packages = [
+        "//tcn/...",
+    ],
+)
+
+py_binary(
+    name = "download_pretrained",
+    srcs = [
+        "download_pretrained.py",
+    ],
+)
+
+py_binary(
+    name = "generate_videos",
+    srcs = [
+        "generate_videos.py",
+    ],
+    main = "generate_videos.py",
+    deps = [
+        ":data_providers",
+        ":get_estimator",
+        ":util",
+    ],
+)
+
+py_test(
+    name = "svtcn_loss_test",
+    size = "medium",
+    srcs = [
+        "estimators/svtcn_loss.py",
+        "estimators/svtcn_loss_test.py",
+    ],
+    deps = [
+        ":util",
+    ],
+)
+
+py_library(
+    name = "data_providers",
+    srcs = [
+        "data_providers.py",
+    ],
+    deps = [
+        ":preprocessing",
+    ],
+)
+
+py_test(
+    name = "data_providers_test",
+    size = "large",
+    srcs = ["data_providers_test.py"],
+    deps = [
+        ":data_providers",
+    ],
+)
+
+py_library(
+    name = "preprocessing",
+    srcs = [
+        "preprocessing.py",
+    ],
+)
+
+py_binary(
+    name = "get_estimator",
+    srcs = [
+        "estimators/get_estimator.py",
+    ],
+    deps = [
+        ":mvtcn_estimator",
+        ":svtcn_estimator",
+    ],
+)
+
+py_binary(
+    name = "base_estimator",
+    srcs = [
+        "estimators/base_estimator.py",
+        "model.py",
+    ],
+    deps = [
+        ":data_providers",
+        ":util",
+    ],
+)
+
+py_library(
+    name = "util",
+    srcs = [
+        "utils/luatables.py",
+        "utils/progress.py",
+        "utils/util.py",
+    ],
+)
+
+py_binary(
+    name = "mvtcn_estimator",
+    srcs = [
+        "estimators/mvtcn_estimator.py",
+    ],
+    deps = [
+        ":base_estimator",
+    ],
+)
+
+py_binary(
+    name = "svtcn_estimator",
+    srcs = [
+        "estimators/svtcn_estimator.py",
+        "estimators/svtcn_loss.py",
+    ],
+    deps = [
+        ":base_estimator",
+    ],
+)
+
+py_binary(
+    name = "train",
+    srcs = [
+        "train.py",
+    ],
+    deps = [
+        ":data_providers",
+        ":get_estimator",
+        ":util",
+    ],
+)
+
+py_binary(
+    name = "labeled_eval",
+    srcs = [
+        "labeled_eval.py",
+    ],
+    deps = [
+        ":get_estimator",
+    ],
+)
+
+py_test(
+    name = "labeled_eval_test",
+    size = "small",
+    srcs = ["labeled_eval_test.py"],
+    deps = [
+        ":labeled_eval",
+    ],
+)
+
+py_binary(
+    name = "eval",
+    srcs = [
+        "eval.py",
+    ],
+    deps = [
+        ":get_estimator",
+    ],
+)
+
+py_binary(
+    name = "alignment",
+    srcs = [
+        "alignment.py",
+    ],
+    deps = [
+        ":get_estimator",
+    ],
+)
+
+py_binary(
+    name = "visualize_embeddings",
+    srcs = [
+        "visualize_embeddings.py",
+    ],
+    deps = [
+        ":data_providers",
+        ":get_estimator",
+        ":util",
+    ],
+)
+
+py_binary(
+    name = "webcam",
+    srcs = [
+        "dataset/webcam.py",
+    ],
+    main = "dataset/webcam.py",
+)
+
+py_binary(
+    name = "images_to_videos",
+    srcs = [
+        "dataset/images_to_videos.py",
+    ],
+    main = "dataset/images_to_videos.py",
+)
+
+py_binary(
+    name = "videos_to_tfrecords",
+    srcs = [
+        "dataset/videos_to_tfrecords.py",
+    ],
+    main = "dataset/videos_to_tfrecords.py",
+    deps = [
+        ":preprocessing",
+    ],
+)
--- a/research/tcn/README.md
+++ b/research/tcn/README.md
+# Time Contrastive Networks
+
+This implements ["Time Contrastive Networks"](https://arxiv.org/abs/1704.06888),
+which is part of the larger [Self-Supervised Imitation
+Learning](https://sermanet.github.io/imitation/) project.
+
+![](https://sermanet.github.io/tcn/docs/figs/mvTCN.png)
+
+## Contacts
+
+Maintainers of TCN:
+
+*   Corey Lynch: [github](https://github.com/coreylynch),
+    [twitter](https://twitter.com/coreylynch)
+*   Pierre Sermanet: [github](https://github.com/sermanet),
+    [twitter](https://twitter.com/psermanet)
+
+## Contents
+
+*   [Getting Started](#getting-started)
+    *   [Install Dependencies](#install-dependencies)
+    *   [Download the Inception v3
+        Checkpoint](#download-pretrained-inceptionv3-checkpoint)
+    *   [Run all the tests](#run-all-the-tests)
+*   [Concepts](#concepts)
+    *   [Multi-view Webcam Video](#multi-view-webcam-video)
+    *   [Data Pipelines](#data-pipelines)
+    *   [Estimators](#estimators)
+    *   [Models](#models)
+    *   [Losses](#losses)
+    *   [Inference](#inference)
+    *   [Configuration](#configuration)
+    *   [Monitoring Training](#monitoring-training)
+        *   [KNN Classification Error](#knn-classification-error)
+        *   [KNN Classification Error](#multi-view-alignment)
+    *   [Visualization](#visualization)
+        *   [Nearest Neighbor Imitation
+            Videos](#nearest-neighbor-imitation-videos)
+        *   [PCA & T-SNE Visualization](#pca-t-sne-visualization)
+*   [Tutorial Part I: Collecting Multi-View Webcam
+    Videos](#tutorial-part-i-collecting-multi-view-webcam-videos)
+    *   [Collect Webcam Videos](#collect-webcam-videos)
+    *   [Create TFRecords](#create-tfrecords)
+*   [Tutorial Part II: Training, Evaluation, and
+    Visualization](#tutorial-part-ii-training-evaluation-and-visualization)
+    *   [Download Data](#download-data)
+    *   [Download the Inception v3
+        Checkpoint](#download-pretrained-inceptionv3-checkpoint)
+    *   [Define a Config](#define-a-config)
+    *   [Train](#train)
+    *   [Evaluate](#evaluate)
+    *   [Monitor training](#monior-training)
+    *   [Visualize](#visualize)
+        *   [Generate Imitation Videos](#generate-imitation-videos)
+        *   [Run PCA & T-SNE Visualization](#t-sne-pca-visualization)
+
+## Getting started
+
+### Install Dependencies
+
+*   [Tensorflow nightly build](https://pypi.python.org/pypi/tf-nightly-gpu) or
+    via `pip install tf-nightly-gpu`.
+*   [Bazel](http://bazel.io/docs/install.html)
+*   matplotlib
+*   sklearn
+*   opencv
+
+### Download Pretrained InceptionV3 Checkpoint
+
+Run the script that downloads the pretrained InceptionV3 checkpoint:
+
+```bash
+cd tensorflow-models/tcn
+python download_pretrained.py
+```
+
+### Run all the tests
+
+```bash
+bazel test :all
+```
+
+## Concepts
+
+### Multi-View Webcam Video
+
+We provide utilities to collect your own multi-view videos in dataset/webcam.py.
+See the [webcam tutorial](#tutorial-part-i-collecting-multi-view-webcam-videos)
+for an end to end example of how to collect multi-view webcam data and convert
+it to the TFRecord format expected by this library.
+
+## Data Pipelines
+
+We use the [tf.data.Dataset
+API](https://www.tensorflow.org/programmers_guide/datasets) to construct input
+pipelines that feed training, evaluation, and visualization. These pipelines are
+defined in `data_providers.py`.
+
+## Estimators
+
+We define training, evaluation, and inference behavior using the
+[tf.estimator.Estimator
+API](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator). See
+`estimators/mvtcn_estimator.py` for an example of how multi-view TCN training,
+evaluation, and inference is implemented.
+
+## Models
+
+Different embedder architectures are implemented in model.py. We used the
+`InceptionConvSSFCEmbedder` in the pouring experiments, but we're also
+evaluating `Resnet` embedders.
+
+## Losses
+
+We use the
+[tf.contrib.losses.metric_learning](https://www.tensorflow.org/versions/master/api_docs/python/tf/contrib/losses/metric_learning)
+library's implementations of triplet loss with semi-hard negative mining and
+npairs loss. In our experiments, npairs loss has better empirical convergence
+and produces the best qualitative visualizations, and will likely be our choice
+for future experiments. See the
+[paper](http://www.nec-labs.com/uploads/images/Department-Images/MediaAnalytics/papers/nips16_npairmetriclearning.pdf)
+for details on the algorithm.
+
+## Inference
+
+We support 3 modes of inference for trained TCN models:
+
+*   Mode 1: Input is a tf.Estimator input_fn (see
+    [this](https://www.tensorflow.org/api_docs/python/tf/estimator/Estimator#predict)
+    for details). Output is an iterator over embeddings and additional metadata.
+    See `labeled_eval.py` for a usage example.
+
+*   Mode 2: Input is a TFRecord or (or list of TFRecords). This returns an
+    iterator over tuples of (embeddings, raw_image_strings, sequence_name),
+    where embeddings is the [num views, sequence length, embedding size] numpy
+    array holding the full embedded sequence (for all views), raw_image_strings
+    is a [num views, sequence length] string array holding the jpeg-encoded raw
+    image strings, and sequence_name is the name of the sequence. See
+    `generate_videos.py` for a usage example.
+
+*   Mode 3: Input is a numpy array of size [num images, height, width, num
+    channels]. This returns a tuple of (embeddings, raw_image_strings), where
+    embeddings is a 2-D float32 numpy array holding [num_images, embedding_size]
+    image embeddings, and raw_image_strings is a 1-D string numpy array holding
+    [batch_size] jpeg-encoded image strings. This can be used as follows:
+
+    ```python
+    images = np.random.uniform(0, 1, size=(batch_size, 1080, 1920, 3))
+    embeddings, _ = estimator.inference(
+        images, checkpoint_path=checkpoint_path)
+    ```
+
+See `estimators/base_estimator.py` for details.
+
+## Configuration
+
+Data pipelines, training, eval, and visualization are all configured using
+key-value parameters passed as [YAML](https://en.wikipedia.org/wiki/YAML) files.
+Configurations can be nested, e.g.:
+
+```yaml
+learning:
+  optimizer: 'adam'
+  learning_rate: 0.001
+```
+
+### T objects
+
+YAML configs are converted to LuaTable-like `T` object (see
+`utils/luatables.py`), which behave like a python `dict`, but allow you to use
+dot notation to access (nested) keys. For example we could access the learning
+rate in the above config snippet via `config.learning.learning_rate`.
+
+### Multiple Configs
+
+Multiple configs can be passed to the various binaries as a comma separated list
+of config paths via the `--config_paths` flag. This allows us to specify a
+default config that applies to all experiments (e.g. how often to write
+checkpoints, default embedder hyperparams) and one config per experiment holding
+the just hyperparams specific to the experiment (path to data, etc.).
+
+See `configs/tcn_default.yml` for an example of our default config and
+`configs/pouring.yml` for an example of how we define the pouring experiments.
+
+Configs are applied left to right. For example, consider two config files:
+
+default.yml
+
+```yaml
+learning:
+  learning_rate: 0.001 # Default learning rate.
+  optimizer: 'adam'
+```
+
+myexperiment.yml
+
+```yaml
+learning:
+  learning_rate: 1.0 # Experiment learning rate (overwrites default).
+data:
+  training: '/path/to/myexperiment/training.tfrecord'
+```
+
+Running
+
+```bash
+bazel run train.py --config_paths='default.yml,myexperiment.yml'
+```
+
+results in a final merged config called final_training_config.yml
+
+```yaml
+learning:
+  optimizer: 'adam'
+  learning_rate: 1.0
+data:
+  training: '/path/to/myexperiment/training.tfrecord'
+```
+
+which is created automatically and stored in the experiment log directory
+alongside model checkpoints and tensorboard summaries. This gives us a record of
+the exact configs that went into each trial.
+
+## Monitoring training
+
+We usually look at two validation metrics during training: knn classification
+error and multi-view alignment.
+
+### KNN-Classification Error
+
+In cases where we have labeled validation data, we can compute the average
+cross-sequence KNN classification error (1.0 - recall@k=1) over all embedded
+labeled images in the validation set. See `labeled_eval.py`.
+
+### Multi-view Alignment
+
+In cases where there is no labeled validation data, we can look at the how well
+our model aligns multiple views of same embedded validation sequences. That is,
+for each embedded validation sequence, for all cross-view pairs, we compute the
+scaled absolute distance between ground truth time indices and knn time indices.
+See `alignment.py`.
+
+## Visualization
+
+We visualize the embedding space learned by our models in two ways: nearest
+neighbor imitation videos and PCA/T-SNE.
+
+### Nearest Neighbor Imitation Videos
+
+One of the easiest way to evaluate the understanding of your model is to see how
+well the model can semantically align two videos via nearest neighbors in
+embedding space.
+
+Consider the case where we have multiple validation demo videos of a human or
+robot performing the same task. For example, in the pouring experiments, we
+collected many different multiview validation videos of a person pouring the
+contents of one container into another, then setting the container down. If we'd
+like to see how well our embeddings generalize across viewpoint, object/agent
+appearance, and background, we can construct what we call "Nearest Neighbor
+Imitation" videos, by embedding some validation query sequence `i` from view 1,
+and finding the nearest neighbor for each query frame in some embedded target
+sequence `j` filmed from view 1.
+[Here's](https://sermanet.github.io/tcn/docs/figs/pouring_human.mov.gif) an
+example of the final product.
+
+See `generate_videos.py` for details.
+
+### PCA & T-SNE Visualization
+
+We can also embed a set of images taken randomly from validation videos and
+visualize the embedding space using PCA projection and T-SNE in the tensorboard
+projector. See `visualize_embeddings.py` for details.
+
+## Tutorial Part I: Collecting Multi-View Webcam Videos
+
+Here we give an end-to-end example of how to collect your own multiview webcam
+videos and convert them to the TFRecord format expected by training.
+
+Note: This was tested with up to 8 concurrent [Logitech c930e
+webcams](https://www.logitech.com/en-us/product/c930e-webcam) extended with
+[Plugable 5 Meter (16 Foot) USB 2.0 Active Repeater Extension
+Cables](https://www.amazon.com/gp/product/B006LFL4X0/ref=oh_aui_detailpage_o05_s00?ie=UTF8&psc=1).
+
+### Collect webcam videos
+
+Go to dataset/webcam.py
+
+1.  Plug your webcams in and run
+
+    ```bash
+    ls -ltrh /dev/video*
+    ```
+
+    You should see one device listed per connected webcam.
+
+2.  Define some environment variables describing the dataset you're collecting.
+
+    ```bash
+    dataset=tutorial  # Name of the dataset.
+    mode=train  # E.g. 'train', 'validation', 'test', 'demo'.
+    num_views=2 # Number of webcams.
+    viddir=/tmp/tcn/videos # Output directory for the videos.
+    tmp_imagedir=/tmp/tcn/tmp_images # Temp directory to hold images.
+    debug_vids=1 # Whether or not to generate side-by-side debug videos.
+    export DISPLAY=:0.0  # This allows real time matplotlib display.
+    ```
+
+3.  Run the webcam.py script.
+
+    ```bash
+    bazel build -c opt --copt=-mavx webcam && \
+    bazel-bin/webcam \
+    --dataset $dataset \
+    --mode $mode \
+    --num_views $num_views \
+    --tmp_imagedir $tmp_imagedir \
+    --viddir $viddir \
+    --debug_vids 1
+    ```
+
+4.  Hit Ctrl-C when done collecting, upon which the script will compile videos
+    for each view and optionally a debug video concatenating multiple
+    simultaneous views.
+
+5.  If `--seqname` flag isn't set, the script will name the first sequence '0',
+    the second sequence '1', and so on (meaning you can just keep rerunning step
+    3.). When you are finished, you should see an output viddir with the
+    following structure:
+
+    ```bash
+    videos/0_view0.mov
+    videos/0_view1.mov
+    ...
+    videos/0_viewM.mov
+    videos/1_viewM.mov
+    ...
+    videos/N_viewM.mov
+    for N sequences and M webcam views.
+    ```
+
+### Create TFRecords
+
+Use `dataset/videos_to_tfrecords.py` to convert the directory of videos into a
+directory of TFRecords files, one per multi-view sequence.
+
+```bash
+viddir=/tmp/tcn/videos
+dataset=tutorial
+mode=train
+videos=$viddir/$dataset
+
+bazel build -c opt videos_to_tfrecords && \
+bazel-bin/videos_to_tfrecords --logtostderr \
+--input_dir $videos/$mode \
+--output_dir ~/tcn_data/$dataset/$mode \
+--max_per_shard 400
+```
+
+Setting `--max_per_shard` > 0 allows you to shard training data. We've observed
+that sharding long training sequences provides better performance in terms of
+global steps/sec.
+
+This should be left at the default of 0 for validation / test data.
+
+You should now have a directory of TFRecords files with the following structure:
+
+```bash
+output_dir/0.tfrecord
+...
+output_dir/N.tfrecord
+
+1 TFRecord file for each of N multi-view sequences.
+```
+
+Now we're ready to move on to part II: training, evaluation, and visualization.
+
+## Tutorial Part II: Training, Evaluation, and Visualization
+
+Here we give an end-to-end example of how to train, evaluate, and visualize the
+embedding space learned by TCN models.
+
+### Download Data
+
+We will be using the 'Multiview Pouring' dataset, which can be downloaded using
+the download.sh script
+[here.](https://sites.google.com/site/brainrobotdata/home/multiview-pouring)
+
+The rest of the tutorial will assume that you have your data downloaded to a
+folder at `~/tcn_data`.
+
+```bash
+mkdir ~/tcn_data
+mv ~/Downloads/download.sh ~/tcn_data
+./download.sh
+```
+
+You should now have the following path containing all the data:
+
+```bash
+ls ~/tcn_data/multiview-pouring
+labels  README.txt  tfrecords  videos
+```
+
+### Download Pretrained Inception Checkpoint
+
+If you haven't already, run the script that downloads the pretrained InceptionV3
+checkpoint:
+
+```bash
+python download_pretrained.py
+```
+
+### Define A Config
+
+For our experiment, we create 2 configs:
+
+*   `configs/tcn_default.yml`: This contains all the default hyperparameters
+    that generally don't vary across experiments.
+*   `configs/pouring.yml`: This contains all the hyperparameters that are
+    specific to the pouring experiment.
+
+Important note about `configs/pouring.yml`:
+
+*   data.eval_cropping: We use 'pad200' for the pouring dataset, which was
+    filmed rather close up on iphone cameras. A better choice for data filmed on
+    webcam is likely 'crop_center'. See preprocessing.py for options.
+
+### Train
+
+Run the training binary:
+
+```yaml
+logdir=/tmp/tcn/pouring
+c=configs
+configs=$c/tcn_default.yml,$c/pouring.yml
+
+bazel build -c opt --copt=-mavx --config=cuda train && \
+bazel-bin/train \
+--config_paths $configs --logdir $logdir
+```
+
+### Evaluate
+
+Run the binary that computes running validation loss. Set `export
+CUDA_VISIBLE_DEVICES=` to run on CPU.
+
+```bash
+bazel build -c opt --copt=-mavx eval && \
+bazel-bin/eval \
+--config_paths $configs --logdir $logdir
+```
+
+Run the binary that computes running validation cross-view sequence alignment.
+Set `export CUDA_VISIBLE_DEVICES=` to run on CPU.
+
+```bash
+bazel build -c opt --copt=-mavx alignment && \
+bazel-bin/alignment \
+--config_paths $configs --checkpointdir $logdir --outdir $logdir
+```
+
+Run the binary that computes running labeled KNN validation error. Set `export
+CUDA_VISIBLE_DEVICES=` to run on CPU.
+
+```bash
+bazel build -c opt --copt=-mavx labeled_eval && \
+bazel-bin/labeled_eval \
+--config_paths $configs --checkpointdir $logdir --outdir $logdir
+```
+
+### Monitor training
+
+Run `tensorboard --logdir=$logdir`. After a bit of training, you should see
+curves that look like this:
+
+#### Training loss
+
+<img src="g3doc/loss.png" title="Training Loss" />
+
+#### Validation loss
+
+<img src="g3doc/val_loss.png" title="Validation Loss" />
+
+#### Validation Alignment
+
+<img src="g3doc/alignment.png" title="Validation Alignment" />
+
+#### Average Validation KNN Classification Error
+
+<img src="g3doc/avg_error.png" title="Validation Average KNN Error" />
+
+#### Individual Validation KNN Classification Errors
+
+<img src="g3doc/all_error.png" title="All Validation Average KNN Errors" />
+
+### Visualize
+
+To visualize the embedding space learned by a model, we can:
+
+#### Generate Imitation Videos
+
+```bash
+# Use the automatically generated final config file as config.
+configs=$logdir/final_training_config.yml
+# Visualize checkpoint 40001.
+checkpoint_iter=40001
+# Use validation records for visualization.
+records=~/tcn_data/multiview-pouring/tfrecords/val
+# Write videos to this location.
+outdir=$logdir/tcn_viz/imitation_vids
+```
+
+```bash
+bazel build -c opt --config=cuda --copt=-mavx generate_videos && \
+bazel-bin/generate_videos \
+--config_paths $configs \
+--checkpointdir $logdir \
+--checkpoint_iter $checkpoint_iter \
+--query_records_dir $records \
+--target_records_dir $records \
+--outdir $outdir
+```
+
+After the script completes, you should see a directory of videos with names
+like:
+
+`$outdir/qtrain_clearodwalla_to_clear1_realv1_imtrain_clearsoda_to_white13_realv0.mp4`
+
+that look like this: <img src="g3doc/im.gif" title="Imitation Video" />
+
+#### T-SNE / PCA Visualization
+
+Run the binary that generates embeddings and metadata.
+
+```bash
+outdir=$logdir/tcn_viz/embedding_viz
+bazel build -c opt --config=cuda --copt=-mavx visualize_embeddings && \
+bazel-bin/visualize_embeddings \
+--config_paths $configs \
+--checkpointdir $logdir \
+--checkpoint_iter $checkpoint_iter \
+--embedding_records $records \
+--outdir $outdir \
+--num_embed 1000 \
+--sprite_dim 64
+```
+
+Run tensorboard, pointed at the embedding viz output directory.
+
+```
+tensorboard --logdir=$outdir
+```
+
+You should see something like this in tensorboard.
+<img src="g3doc/pca.png" title="PCA" />
--- a/research/tcn/WORKSPACE
+++ b/research/tcn/WORKSPACE
+workspace(name = "tcn")
+
--- a/research/tcn/alignment.py
+++ b/research/tcn/alignment.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Calculates test sequence alignment score."""
+from __future__ import absolute_import
+from __future__ import absolute_import
+from __future__ import division
+
+import os
+import numpy as np
+from estimators.get_estimator import get_estimator
+from utils import util
+import tensorflow as tf
+tf.logging.set_verbosity(tf.logging.INFO)
+
+tf.flags.DEFINE_string(
+    'config_paths', '',
+    """
+    Path to a YAML configuration files defining FLAG values. Multiple files
+    can be separated by the `#` symbol. Files are merged recursively. Setting
+    a key in these files is equivalent to setting the FLAG value with
+    the same name.
+    """)
+tf.flags.DEFINE_string(
+    'model_params', '{}', 'YAML configuration string for the model parameters.')
+tf.app.flags.DEFINE_string(
+    'checkpoint_iter', '', 'Evaluate this specific checkpoint.')
+tf.app.flags.DEFINE_string(
+    'checkpointdir', '/tmp/tcn', 'Path to model checkpoints.')
+tf.app.flags.DEFINE_string('outdir', '/tmp/tcn', 'Path to write summaries to.')
+FLAGS = tf.app.flags.FLAGS
+
+
+def compute_average_alignment(
+    seqname_to_embeddings, num_views, summary_writer, training_step):
+  """Computes the average cross-view alignment for all sequence view pairs.
+
+  Args:
+    seqname_to_embeddings: Dict, mapping sequence name to a
+      [num_views, embedding size] numpy matrix holding all embedded views.
+    num_views: Int, number of simultaneous views in the dataset.
+    summary_writer: A `SummaryWriter` object.
+    training_step: Int, the training step of the model used to embed images.
+
+  Alignment is the scaled absolute difference between the ground truth time
+  and the knn aligned time.
+  abs(|time_i - knn_time|) / sequence_length
+  """
+  all_alignments = []
+  for _, view_embeddings in seqname_to_embeddings.iteritems():
+    for idx_i in range(num_views):
+      for idx_j in range(idx_i+1, num_views):
+        embeddings_view_i = view_embeddings[idx_i]
+        embeddings_view_j = view_embeddings[idx_j]
+
+        seq_len = len(embeddings_view_i)
+
+        times_i = np.array(range(seq_len))
+        # Get the nearest time_index for each embedding in view_i.
+        times_j = np.array([util.KNNIdsWithDistances(
+            q, embeddings_view_j, k=1)[0][0] for q in embeddings_view_i])
+
+        # Compute sequence view pair alignment.
+        alignment = np.mean(
+            np.abs(np.array(times_i)-np.array(times_j))/float(seq_len))
+        all_alignments.append(alignment)
+        print('alignment so far %f' % alignment)
+  average_alignment = np.mean(all_alignments)
+  print('Average alignment %f' % average_alignment)
+  summ = tf.Summary(value=[tf.Summary.Value(
+      tag='validation/alignment', simple_value=average_alignment)])
+  summary_writer.add_summary(summ, int(training_step))
+
+
+def evaluate_once(
+    config, checkpointdir, validation_records, checkpoint_path, batch_size,
+    num_views):
+  """Evaluates and reports the validation alignment."""
+  # Choose an estimator based on training strategy.
+  estimator = get_estimator(config, checkpointdir)
+
+  # Embed all validation sequences.
+  seqname_to_embeddings = {}
+  for (view_embeddings, _, seqname) in estimator.inference(
+      validation_records, checkpoint_path, batch_size):
+    seqname_to_embeddings[seqname] = view_embeddings
+
+  # Compute and report alignment statistics.
+  ckpt_step = int(checkpoint_path.split('-')[-1])
+  summary_dir = os.path.join(FLAGS.outdir, 'alignment_summaries')
+  summary_writer = tf.summary.FileWriter(summary_dir)
+  compute_average_alignment(
+      seqname_to_embeddings, num_views, summary_writer, ckpt_step)
+
+
+def main(_):
+  # Parse config dict from yaml config files / command line flags.
+  config = util.ParseConfigsToLuaTable(FLAGS.config_paths, FLAGS.model_params)
+  num_views = config.data.num_views
+
+  validation_records = util.GetFilesRecursively(config.data.validation)
+  batch_size = config.data.batch_size
+
+  checkpointdir = FLAGS.checkpointdir
+
+  # If evaluating a specific checkpoint, do that.
+  if FLAGS.checkpoint_iter:
+    checkpoint_path = os.path.join(
+        '%s/model.ckpt-%s' % (checkpointdir, FLAGS.checkpoint_iter))
+    evaluate_once(
+        config, checkpointdir, validation_records, checkpoint_path, batch_size,
+        num_views)
+  else:
+    for checkpoint_path in tf.contrib.training.checkpoints_iterator(
+        checkpointdir):
+      evaluate_once(
+          config, checkpointdir, validation_records, checkpoint_path,
+          batch_size, num_views)
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/tcn/configs/pouring.yml
+++ b/research/tcn/configs/pouring.yml
+# Train with Multi-View TCN.
+training_strategy: 'mvtcn'
+
+# Use the 'inception_conv_ss_fc' embedder, which has the structure:
+# InceptionV3 -> 2 conv adaptation layers -> spatial softmax -> fully connected
+# -> embedding.
+embedder_strategy: 'inception_conv_ss_fc'
+
+# Use npairs loss.
+loss_strategy: 'npairs'
+
+learning:
+  learning_rate: 0.0001
+
+# Set some hyperparameters for our embedder.
+inception_conv_ss_fc:
+  # Don't finetune the pre-trained weights.
+  finetune_inception: false
+  dropout:
+    # Don't dropout convolutional activations.
+    keep_conv: 1.0
+    # Use a dropout of 0.8 on the fully connected activations.
+    keep_fc: 0.8
+    # Use a dropout of 0.8 on the inception activations.
+    keep_pretrained: 0.8
+
+# Size of the TCN embedding.
+embedding_size: 32
+
+data:
+  raw_height: 480
+  raw_width: 360
+  batch_size: 32
+  examples_per_sequence: 32
+  num_views: 2
+  preprocessing:
+    # Inference-time image cropping strategy.
+    eval_cropping: 'pad200'
+  augmentation:
+    # Do scale augmentation.
+    minscale: 0.8 # When downscaling, zoom in to 80% of the central bounding box.
+    maxscale: 3.0 # When upscaling, zoom out to 300% of the central bounding box.
+    proportion_scaled_up: 0.5 # Proportion of the time to scale up rather than down.
+    color: true # Do color augmentation.
+    fast_mode: true
+  # Paths to the data.
+  training: '~/tcn_data/multiview-pouring/tfrecords/train'
+  validation: '~/tcn_data/multiview-pouring/tfrecords/val'
+  test: 'path/to/test'
+  labeled:
+    image_attr_keys: ['image/view0', 'image/view1', 'task']
+    label_attr_keys: ['contact', 'distance', 'liquid_flowing', 'has_liquid', 'container_angle']
+    validation: '~/tcn_data/multiview-pouring/monolithic-labeled/val'
+    test: '~/tcn_data/multiview-pouring/monolithic-labeled/test'
+
+logging:
+  checkpoint:
+    save_checkpoints_steps: 1000
\ No newline at end of file
--- a/research/tcn/configs/tcn_default.yml
+++ b/research/tcn/configs/tcn_default.yml
+#  These configs are the defaults we used for both the pouring and pose
+#  experiments.
+
+# Train on TPU?
+use_tpu: false # Default is to run without TPU locally.
+tpu:
+  num_shards: 1
+  iterations: 100
+
+# SGD / general learning hyperparameters.
+learning:
+  max_step: 1000000
+  learning_rate: 0.001
+  decay_steps: 10000
+  decay_factor: 1.00
+  l2_reg_weight: 0.000001
+  optimizer: 'adam'
+
+# Default metric learning loss hyperparameters.
+triplet_semihard:
+  embedding_l2: true # Suggestion from Hyun Oh Song's slides.
+  margin: .2 # Default value for Facenet.
+npairs:
+  embedding_l2: false # Suggestion from Hyun Oh Song's slides.
+clustering_loss:
+  embedding_l2: true # Suggestion from Hyun Oh Song's slides.
+  margin: 1.0 # Default in deep_metric_learning.
+lifted_struct:
+  embedding_l2: false # Suggestion from Hyun Oh Song's slides.
+  margin: 1.0
+contrastive:
+  embedding_l2: true # Suggestion from Hyun Oh Song's slides.
+  margin: 1.0
+
+# Which method to use to train the embedding.
+# Options are "mvtcn", "svtcn".
+training_strategy: 'mvtcn'
+
+# Which embedder architecture to use.
+# Options are 'inception_conv_ss_fc' (used in pouring / pose experiments),
+# 'resnet'.
+embedder_strategy: 'inception_conv_ss_fc'
+
+# Size of the TCN embedding.
+embedding_size: 32
+
+# Default hyperparameters for the different embedder architectures.
+inception_conv_ss_fc:
+  pretrained_checkpoint: 'pretrained_checkpoints/inception/inception_v3.ckpt'
+  pretrained_layer: 'Mixed_5d'
+  additional_conv_sizes: [512, 512]
+  fc_hidden_sizes: [2048]
+  finetune: false
+  dropout:
+    keep_pretrained: 1.0
+    keep_conv: 1.0
+    keep_fc: 1.0
+
+resnet:
+  pretrained_checkpoint: 'pretrained_checkpoints/resnet/resnet_v2_50.ckpt'
+  pretrained_layer: 4
+  finetune: false
+  adaptation_blocks: '512_3-512_3'
+  emb_connection: 'conv'
+  fc_hidden_sizes: 'None'
+  dropout:
+    keep_pretrained: 1.0
+
+# Loss hyperparameters.
+mvtcn:
+  # Size of the window in timesteps to get random anchor-positive pairs for
+  # training.
+  window: 580 # 29fps * 20 seconds.
+
+svtcn:
+  pos_radius: 6  # 0.2 seconds * 29fps ~ 6 timesteps.
+  neg_radius: 12 # 2.0 * pos_radius.
+
+# Data configs.
+data:
+  height: 299
+  width: 299
+  preprocessing:
+    # Strategy to use when cropping images at inference time.
+    # See preprocessing.py for options.
+    eval_cropping: 'crop_center'
+  # Training scale, color augmentation hyparameters.
+  augmentation:
+    # See preprocessing.py for a discussion of how to use these parameters.
+    minscale: 1.0
+    maxscale: 1.0
+    proportion_scaled_up: 0.5
+    color: true
+    fast_mode: true
+  num_parallel_calls: 12
+  sequence_prefetch_size: 12
+  batch_prefetch_size: 12
+  batch_size: 36
+  eval_batch_size: 36
+  embed_batch_size: 128
+
+val:
+  recall_at_k_list: [1]
+  num_eval_samples: 1000
+  eval_interval_secs: 300
+
+logging:
+  summary:
+    image_summaries: false
+    save_summaries_steps: 100
+    flush_secs: 600
+  checkpoint:
+    num_to_keep: 0 # Keep all checkpoints.
+    save_checkpoints_steps: 1000
+    secs: 1800
\ No newline at end of file
--- a/research/tcn/configs/test_estimator.yml
+++ b/research/tcn/configs/test_estimator.yml
+use_tpu: False
+training_strategy: 'mvtcn'
+loss_strategy: 'triplet_semihard'
+
+learning:
+  max_step: 2
+  optimizer: 'adam'
+
+embedding_size: 8
+
+data:
+  embed_batch_size: 12
+  batch_size: 12
+  examples_per_sequence: 12
+  num_views: 2
+  num_parallel_calls: 1
+  sequence_prefetch_size: 1
+  batch_prefetch_size: 1
+
+logging:
+  summary:
+    image_summaries: false
+    save_summaries_steps: 100
+    flush_secs: 600
+    save_summaries_secs: 60
+  checkpoint:
+    num_to_keep: 0 # Keep all checkpoints.
+    save_checkpoints_steps: 1000
+    secs: 1800
\ No newline at end of file
--- a/research/tcn/data_providers.py
+++ b/research/tcn/data_providers.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Defines data providers used in training and evaluating TCNs."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import functools
+import random
+import numpy as np
+import preprocessing
+import tensorflow as tf
+
+
+def record_dataset(filename):
+  """Generate a TFRecordDataset from a `filename`."""
+  return tf.data.TFRecordDataset(filename)
+
+
+def full_sequence_provider(file_list, num_views):
+  """Provides full preprocessed image sequences.
+
+  Args:
+    file_list: List of strings, paths to TFRecords to preprocess.
+    num_views: Int, the number of simultaneous viewpoints at each timestep in
+      the dataset.
+  Returns:
+    preprocessed: A 4-D float32 `Tensor` holding a sequence of preprocessed
+      images.
+    raw_image_strings: A 2-D string `Tensor` holding a sequence of raw
+      jpeg-encoded image strings.
+    task: String, the name of the sequence.
+    seq_len: Int, the number of timesteps in the sequence.
+  """
+  def _parse_sequence(x):
+    context, views, seq_len = parse_sequence_example(x, num_views)
+    task = context['task']
+    return views, task, seq_len
+
+  data_files = tf.contrib.slim.parallel_reader.get_data_files(file_list)
+  dataset = tf.data.Dataset.from_tensor_slices(data_files)
+  dataset = dataset.repeat(1)
+  # Get a dataset of sequences.
+  dataset = dataset.flat_map(record_dataset)
+
+  # Build a dataset of TFRecord files.
+  dataset = dataset.repeat(1)
+  # Prefetch a number of opened files.
+  dataset = dataset.prefetch(12)
+  # Use _parse_sequence to deserialize (but not decode) image strings.
+  dataset = dataset.map(_parse_sequence, num_parallel_calls=12)
+  # Prefetch batches of images.
+  dataset = dataset.prefetch(12)
+  dataset = dataset.make_one_shot_iterator()
+  views, task, seq_len = dataset.get_next()
+  return views, task, seq_len
+
+
+def parse_labeled_example(
+    example_proto, view_index, preprocess_fn, image_attr_keys, label_attr_keys):
+  """Parses a labeled test example from a specified view.
+
+  Args:
+    example_proto: A scalar string Tensor.
+    view_index: Int, index on which view to parse.
+    preprocess_fn: A function with the signature (raw_images, is_training) ->
+      preprocessed_images, where raw_images is a 4-D float32 image `Tensor`
+      of raw images, is_training is a Boolean describing if we're in training,
+      and preprocessed_images is a 4-D float32 image `Tensor` holding
+      preprocessed images.
+    image_attr_keys: List of Strings, names for image keys.
+    label_attr_keys: List of Strings, names for label attributes.
+  Returns:
+    data: A tuple of images, attributes and tasks `Tensors`.
+  """
+  features = {}
+  for attr_key in image_attr_keys:
+    features[attr_key] = tf.FixedLenFeature((), tf.string)
+  for attr_key in label_attr_keys:
+    features[attr_key] = tf.FixedLenFeature((), tf.int64)
+  parsed_features = tf.parse_single_example(example_proto, features)
+  image_only_keys = [i for i in image_attr_keys if 'image' in i]
+  view_image_key = image_only_keys[view_index]
+  image = preprocessing.decode_image(parsed_features[view_image_key])
+  preprocessed = preprocess_fn(image, is_training=False)
+  attributes = [parsed_features[k] for k in label_attr_keys]
+  task = parsed_features['task']
+  return tuple([preprocessed] + attributes + [task])
+
+
+def labeled_data_provider(
+    filenames, preprocess_fn, view_index, image_attr_keys, label_attr_keys,
+    batch_size=32, num_epochs=1):
+  """Gets a batched dataset iterator over annotated test images + labels.
+
+  Provides a single view, specifed in `view_index`.
+
+  Args:
+    filenames: List of Strings, paths to tfrecords on disk.
+    preprocess_fn: A function with the signature (raw_images, is_training) ->
+      preprocessed_images, where raw_images is a 4-D float32 image `Tensor`
+      of raw images, is_training is a Boolean describing if we're in training,
+      and preprocessed_images is a 4-D float32 image `Tensor` holding
+      preprocessed images.
+    view_index: Int, the index of the view to embed.
+    image_attr_keys: List of Strings, names for image keys.
+    label_attr_keys: List of Strings, names for label attributes.
+    batch_size: Int, size of the batch.
+    num_epochs: Int, number of epochs over the classification dataset.
+  Returns:
+    batch_images: 4-d float `Tensor` holding the batch images for the view.
+    labels: K-d int `Tensor` holding the K label attributes.
+    tasks: 1-D String `Tensor`, holding the task names for each batch element.
+  """
+  dataset = tf.data.TFRecordDataset(filenames)
+  # pylint: disable=g-long-lambda
+  dataset = dataset.map(
+      lambda p: parse_labeled_example(
+          p, view_index, preprocess_fn, image_attr_keys, label_attr_keys))
+  dataset = dataset.repeat(num_epochs)
+  dataset = dataset.batch(batch_size)
+  data_iterator = dataset.make_one_shot_iterator()
+  batch_data = data_iterator.get_next()
+  batch_images = batch_data[0]
+
+  batch_labels = tf.stack(batch_data[1:-1], 1)
+
+  batch_tasks = batch_data[-1]
+
+  batch_images = set_image_tensor_batch_dim(batch_images, batch_size)
+  batch_labels.set_shape([batch_size, len(label_attr_keys)])
+  batch_tasks.set_shape([batch_size])
+
+  return batch_images, batch_labels, batch_tasks
+
+
+def parse_sequence_example(serialized_example, num_views):
+  """Parses a serialized sequence example into views, sequence length data."""
+  context_features = {
+      'task': tf.FixedLenFeature(shape=[], dtype=tf.string),
+      'len': tf.FixedLenFeature(shape=[], dtype=tf.int64)
+  }
+  view_names = ['view%d' % i for i in range(num_views)]
+  fixed_features = [
+      tf.FixedLenSequenceFeature(
+          shape=[], dtype=tf.string) for _ in range(len(view_names))]
+  sequence_features = dict(zip(view_names, fixed_features))
+  context_parse, sequence_parse = tf.parse_single_sequence_example(
+      serialized=serialized_example,
+      context_features=context_features,
+      sequence_features=sequence_features)
+  views = tf.stack([sequence_parse[v] for v in view_names])
+  lens = [sequence_parse[v].get_shape().as_list()[0] for v in view_names]
+  assert len(set(lens)) == 1
+  seq_len = tf.shape(sequence_parse[v])[0]
+  return context_parse, views, seq_len
+
+
+def get_shuffled_input_records(file_list):
+  """Build a tf.data.Dataset of shuffled input TFRecords that repeats."""
+  dataset = tf.data.Dataset.from_tensor_slices(file_list)
+  dataset = dataset.shuffle(len(file_list))
+  dataset = dataset.repeat()
+  dataset = dataset.flat_map(record_dataset)
+  dataset = dataset.repeat()
+  return dataset
+
+
+def get_tcn_anchor_pos_indices(seq_len, num_views, num_pairs, window):
+  """Gets batch TCN anchor positive timestep and view indices.
+
+  This gets random (anchor, positive) timesteps from a sequence, and chooses
+  2 random differing viewpoints for each anchor positive pair.
+
+  Args:
+    seq_len: Int, the size of the batch sequence in timesteps.
+    num_views: Int, the number of simultaneous viewpoints at each timestep.
+    num_pairs: Int, the number of pairs to build.
+    window: Int, the window (in frames) from which to take anchor, positive
+      and negative indices.
+  Returns:
+    ap_time_indices: 1-D Int `Tensor` with size [num_pairs], holding the
+      timestep for each (anchor,pos) pair.
+    a_view_indices: 1-D Int `Tensor` with size [num_pairs], holding the
+      view index for each anchor.
+    p_view_indices: 1-D Int `Tensor` with size [num_pairs], holding the
+      view index for each positive.
+  """
+  # Get anchor, positive time indices.
+  def f1():
+    # Choose a random window-length range from the sequence.
+    range_min = tf.random_shuffle(tf.range(seq_len-window))[0]
+    range_max = range_min+window
+    return tf.range(range_min, range_max)
+  def f2():
+    # Consider the full sequence.
+    return tf.range(seq_len)
+  time_indices = tf.cond(tf.greater(seq_len, window), f1, f2)
+  shuffled_indices = tf.random_shuffle(time_indices)
+  num_pairs = tf.minimum(seq_len, num_pairs)
+  ap_time_indices = shuffled_indices[:num_pairs]
+
+  # Get opposing anchor, positive view indices.
+  view_indices = tf.tile(
+      tf.expand_dims(tf.range(num_views), 0), (num_pairs, 1))
+  shuffled_view_indices = tf.map_fn(tf.random_shuffle, view_indices)
+  a_view_indices = shuffled_view_indices[:, 0]
+  p_view_indices = shuffled_view_indices[:, 1]
+  return ap_time_indices, a_view_indices, p_view_indices
+
+
+def set_image_tensor_batch_dim(tensor, batch_dim):
+  """Sets the batch dimension on an image tensor."""
+  shape = tensor.get_shape()
+  tensor.set_shape([batch_dim, shape[1], shape[2], shape[3]])
+  return tensor
+
+
+def parse_sequence_to_pairs_batch(
+    serialized_example, preprocess_fn, is_training, num_views, batch_size,
+    window):
+  """Parses a serialized sequence example into a batch of preprocessed data.
+
+  Args:
+    serialized_example: A serialized SequenceExample.
+    preprocess_fn: A function with the signature (raw_images, is_training) ->
+      preprocessed_images.
+    is_training: Boolean, whether or not we're in training.
+    num_views: Int, the number of simultaneous viewpoints at each timestep in
+      the dataset.
+    batch_size: Int, size of the batch to get.
+    window: Int, only take pairs from a maximium window of this size.
+  Returns:
+    preprocessed: A 4-D float32 `Tensor` holding preprocessed images.
+    anchor_images: A 4-D float32 `Tensor` holding raw anchor images.
+    pos_images: A 4-D float32 `Tensor` holding raw positive images.
+  """
+  _, views, seq_len = parse_sequence_example(serialized_example, num_views)
+
+  # Get random (anchor, positive) timestep and viewpoint indices.
+  num_pairs = batch_size // 2
+  ap_time_indices, a_view_indices, p_view_indices = get_tcn_anchor_pos_indices(
+      seq_len, num_views, num_pairs, window)
+
+  # Gather the image strings.
+  combined_anchor_indices = tf.concat(
+      [tf.expand_dims(a_view_indices, 1),
+       tf.expand_dims(ap_time_indices, 1)], 1)
+  combined_pos_indices = tf.concat(
+      [tf.expand_dims(p_view_indices, 1),
+       tf.expand_dims(ap_time_indices, 1)], 1)
+  anchor_images = tf.gather_nd(views, combined_anchor_indices)
+  pos_images = tf.gather_nd(views, combined_pos_indices)
+
+  # Decode images.
+  anchor_images = tf.map_fn(
+      preprocessing.decode_image, anchor_images, dtype=tf.float32)
+  pos_images = tf.map_fn(
+      preprocessing.decode_image, pos_images, dtype=tf.float32)
+
+  # Concatenate [anchor, postitive] images into a batch and preprocess it.
+  concatenated = tf.concat([anchor_images, pos_images], 0)
+  preprocessed = preprocess_fn(concatenated, is_training)
+  anchor_prepro, positive_prepro = tf.split(preprocessed, num_or_size_splits=2,
+                                            axis=0)
+
+  # Set static batch dimensions for all image tensors
+  ims = [anchor_prepro, positive_prepro, anchor_images, pos_images]
+  ims = [set_image_tensor_batch_dim(i, num_pairs) for i in ims]
+  [anchor_prepro, positive_prepro, anchor_images, pos_images] = ims
+
+  # Assign each anchor and positive the same label.
+  anchor_labels = tf.range(1, num_pairs+1)
+  positive_labels = tf.range(1, num_pairs+1)
+
+  return (anchor_prepro, positive_prepro, anchor_images, pos_images,
+          anchor_labels, positive_labels, seq_len)
+
+
+def multiview_pairs_provider(file_list,
+                             preprocess_fn,
+                             num_views,
+                             window,
+                             is_training,
+                             batch_size,
+                             examples_per_seq=2,
+                             num_parallel_calls=12,
+                             sequence_prefetch_size=12,
+                             batch_prefetch_size=12):
+  """Provides multi-view TCN anchor-positive image pairs.
+
+  Returns batches of Multi-view TCN pairs, where each pair consists of an
+  anchor and a positive coming from different views from the same timestep.
+  Batches are filled one entire sequence at a time until
+  batch_size is exhausted. Pairs are chosen randomly without replacement
+  within a sequence.
+
+  Used by:
+    * triplet semihard loss.
+    * clustering loss.
+    * npairs loss.
+    * lifted struct loss.
+    * contrastive loss.
+
+  Args:
+    file_list: List of Strings, paths to tfrecords.
+    preprocess_fn: A function with the signature (raw_images, is_training) ->
+      preprocessed_images, where raw_images is a 4-D float32 image `Tensor`
+      of raw images, is_training is a Boolean describing if we're in training,
+      and preprocessed_images is a 4-D float32 image `Tensor` holding
+      preprocessed images.
+    num_views: Int, the number of simultaneous viewpoints at each timestep.
+    window: Int, size of the window (in frames) from which to draw batch ids.
+    is_training: Boolean, whether or not we're in training.
+    batch_size: Int, how many examples in the batch (num pairs * 2).
+    examples_per_seq: Int, how many examples to take per sequence.
+    num_parallel_calls: Int, the number of elements to process in parallel by
+      mapper.
+    sequence_prefetch_size: Int, size of the buffer used to prefetch sequences.
+    batch_prefetch_size: Int, size of the buffer used to prefetch batches.
+  Returns:
+    batch_images: A 4-D float32 `Tensor` holding preprocessed batch images.
+    anchor_labels: A 1-D int32 `Tensor` holding anchor image labels.
+    anchor_images: A 4-D float32 `Tensor` holding raw anchor images.
+    positive_labels: A 1-D int32 `Tensor` holding positive image labels.
+    pos_images: A 4-D float32 `Tensor` holding raw positive images.
+  """
+  def _parse_sequence(x):
+    return parse_sequence_to_pairs_batch(
+        x, preprocess_fn, is_training, num_views, examples_per_seq, window)
+
+  # Build a buffer of shuffled input TFRecords that repeats forever.
+  dataset = get_shuffled_input_records(file_list)
+
+  # Prefetch a number of opened TFRecords.
+  dataset = dataset.prefetch(sequence_prefetch_size)
+
+  # Use _parse_sequence to map sequences to batches (one sequence per batch).
+  dataset = dataset.map(
+      _parse_sequence, num_parallel_calls=num_parallel_calls)
+
+  # Filter out sequences that don't have at least examples_per_seq.
+  def seq_greater_than_min(seqlen, maximum):
+    return seqlen >= maximum
+  filter_fn = functools.partial(seq_greater_than_min, maximum=examples_per_seq)
+  dataset = dataset.filter(lambda a, b, c, d, e, f, seqlen: filter_fn(seqlen))
+
+  # Take a number of sequences for the batch.
+  assert batch_size % examples_per_seq == 0
+  sequences_per_batch = batch_size // examples_per_seq
+  dataset = dataset.batch(sequences_per_batch)
+
+  # Prefetch batches of images.
+  dataset = dataset.prefetch(batch_prefetch_size)
+
+  iterator = dataset.make_one_shot_iterator()
+  data = iterator.get_next()
+
+  # Pull out images, reshape to [batch_size, ...], concatenate anchor and pos.
+  ims = list(data[:4])
+  anchor_labels, positive_labels = data[4:6]
+
+  # Set labels shape.
+  anchor_labels.set_shape([sequences_per_batch, None])
+  positive_labels.set_shape([sequences_per_batch, None])
+
+  def _reshape_to_batchsize(im):
+    """[num_sequences, num_per_seq, ...] images to [batch_size, ...]."""
+    sequence_ims = tf.split(im, num_or_size_splits=sequences_per_batch, axis=0)
+    sequence_ims = [tf.squeeze(i) for i in sequence_ims]
+    return tf.concat(sequence_ims, axis=0)
+
+  # Reshape labels.
+  anchor_labels = _reshape_to_batchsize(anchor_labels)
+  positive_labels = _reshape_to_batchsize(positive_labels)
+
+  def _set_shape(im):
+    """Sets a static shape for an image tensor of [sequences_per_batch,...] ."""
+    shape = im.get_shape()
+    im.set_shape([sequences_per_batch, shape[1], shape[2], shape[3], shape[4]])
+    return im
+  ims = [_set_shape(im) for im in ims]
+  ims = [_reshape_to_batchsize(im) for im in ims]
+
+  anchor_prepro, positive_prepro, anchor_images, pos_images = ims
+  batch_images = tf.concat([anchor_prepro, positive_prepro], axis=0)
+
+  return batch_images, anchor_labels, positive_labels, anchor_images, pos_images
+
+
+def get_svtcn_indices(seq_len, batch_size, num_views):
+  """Gets a random window of contiguous time indices from a sequence.
+
+  Args:
+    seq_len: Int, number of timesteps in the image sequence.
+    batch_size: Int, size of the batch to construct.
+    num_views: Int, the number of simultaneous viewpoints at each
+      timestep in the dataset.
+
+  Returns:
+    time_indices: 1-D Int `Tensor` with size [batch_size], holding the
+      timestep for each batch image.
+    view_indices: 1-D Int `Tensor` with size [batch_size], holding the
+      view for each batch image. This is consistent across the batch.
+  """
+  # Get anchor, positive time indices.
+  def f1():
+    # Choose a random contiguous range from within the sequence.
+    range_min = tf.random_shuffle(tf.range(seq_len-batch_size))[0]
+    range_max = range_min+batch_size
+    return tf.range(range_min, range_max)
+  def f2():
+    # Consider the full sequence.
+    return tf.range(seq_len)
+  time_indices = tf.cond(tf.greater(seq_len, batch_size), f1, f2)
+  # Get opposing anchor, positive view indices.
+  random_view = tf.random_shuffle(tf.range(num_views))[0]
+  view_indices = tf.tile([random_view], (batch_size,))
+  return time_indices, view_indices
+
+
+def parse_sequence_to_svtcn_batch(
+    serialized_example, preprocess_fn, is_training, num_views, batch_size):
+  """Parses a serialized sequence example into a batch of SVTCN data."""
+  _, views, seq_len = parse_sequence_example(serialized_example, num_views)
+  # Get svtcn indices.
+  time_indices, view_indices = get_svtcn_indices(seq_len, batch_size, num_views)
+  combined_indices = tf.concat(
+      [tf.expand_dims(view_indices, 1),
+       tf.expand_dims(time_indices, 1)], 1)
+
+  # Gather the image strings.
+  images = tf.gather_nd(views, combined_indices)
+
+  # Decode images.
+  images = tf.map_fn(preprocessing.decode_image, images, dtype=tf.float32)
+
+  # Concatenate anchor and postitive images, preprocess the batch.
+  preprocessed = preprocess_fn(images, is_training)
+
+  return preprocessed, images, time_indices
+
+
+def singleview_tcn_provider(file_list,
+                            preprocess_fn,
+                            num_views,
+                            is_training,
+                            batch_size,
+                            num_parallel_calls=12,
+                            sequence_prefetch_size=12,
+                            batch_prefetch_size=12):
+  """Provides data to train singleview TCNs.
+
+  Args:
+    file_list: List of Strings, paths to tfrecords.
+    preprocess_fn: A function with the signature (raw_images, is_training) ->
+      preprocessed_images, where raw_images is a 4-D float32 image `Tensor`
+      of raw images, is_training is a Boolean describing if we're in training,
+      and preprocessed_images is a 4-D float32 image `Tensor` holding
+      preprocessed images.
+    num_views: Int, the number of simultaneous viewpoints at each timestep.
+    is_training: Boolean, whether or not we're in training.
+    batch_size: Int, how many examples in the batch.
+    num_parallel_calls: Int, the number of elements to process in parallel by
+      mapper.
+    sequence_prefetch_size: Int, size of the buffer used to prefetch sequences.
+    batch_prefetch_size: Int, size of the buffer used to prefetch batches.
+
+  Returns:
+    batch_images: A 4-D float32 `Tensor` of preprocessed images.
+    raw_images: A 4-D float32 `Tensor` of raw images.
+    timesteps: A 1-D int32 `Tensor` of timesteps associated with each image.
+  """
+  def _parse_sequence(x):
+    return parse_sequence_to_svtcn_batch(
+        x, preprocess_fn, is_training, num_views, batch_size)
+
+  # Build a buffer of shuffled input TFRecords that repeats forever.
+  dataset = get_shuffled_input_records(file_list)
+
+  # Prefetch a number of opened files.
+  dataset = dataset.prefetch(sequence_prefetch_size)
+
+  # Use _parse_sequence to map sequences to image batches.
+  dataset = dataset.map(
+      _parse_sequence, num_parallel_calls=num_parallel_calls)
+
+  # Prefetch batches of images.
+  dataset = dataset.prefetch(batch_prefetch_size)
+  dataset = dataset.make_one_shot_iterator()
+  batch_images, raw_images, timesteps = dataset.get_next()
+  return batch_images, raw_images, timesteps
--- a/research/tcn/data_providers_test.py
+++ b/research/tcn/data_providers_test.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for data_providers."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import data_providers
+import tensorflow as tf
+
+
+class DataTest(tf.test.TestCase):
+
+  def testMVTripletIndices(self):
+    """Ensures anchor/pos indices for a TCN batch are valid."""
+    tf.set_random_seed(0)
+    window = 580
+    batch_size = 36
+    num_pairs = batch_size // 2
+    num_views = 2
+    seq_len = 600
+    # Get anchor time and view indices for this sequence.
+    (_, a_view_indices,
+     p_view_indices) = data_providers.get_tcn_anchor_pos_indices(
+         seq_len, num_views, num_pairs, window)
+    with self.test_session() as sess:
+      (np_a_view_indices,
+       np_p_view_indices) = sess.run([a_view_indices, p_view_indices])
+
+      # Assert no overlap between anchor and pos view indices.
+      np.testing.assert_equal(
+          np.any(np.not_equal(np_a_view_indices, np_p_view_indices)), True)
+
+      # Assert set of view indices is a subset of expected set of view indices.
+      view_set = set(range(num_views))
+      self.assertTrue(set(np_a_view_indices).issubset(view_set))
+      self.assertTrue(set(np_p_view_indices).issubset(view_set))
+
+  def testSVTripletIndices(self):
+    """Ensures time indices for a SV triplet batch are valid."""
+    seq_len = 600
+    batch_size = 36
+    num_views = 2
+    time_indices, _ = data_providers.get_svtcn_indices(
+        seq_len, batch_size, num_views)
+    with self.test_session() as sess:
+      np_time_indices = sess.run(time_indices)
+      first = np_time_indices[0]
+      last = np_time_indices[-1]
+      # Make sure batch time indices are a contiguous range.
+      self.assertTrue(np.array_equal(np_time_indices, range(first, last+1)))
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/research/tcn/dataset/images_to_videos.py
+++ b/research/tcn/dataset/images_to_videos.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Converts temp directories of images to videos."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import os
+import shutil
+# pylint: disable=invalid-name
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    '--view_dirs', type=str, default='',
+    help='Comma-separated list of temp view image directories.')
+parser.add_argument(
+    '--vid_paths', type=str, default='',
+    help='Comma-separated list of video output paths.')
+parser.add_argument(
+    '--debug_path', type=str, default='',
+    help='Output path to debug video.')
+
+parser.add_argument(
+    '--debug_lhs_view', type=str, default='',
+    help='Output path to debug video.')
+parser.add_argument(
+    '--debug_rhs_view', type=str, default='',
+    help='Output path to debug video.')
+
+
+def create_vids(view_dirs, vid_paths, debug_path=None,
+                debug_lhs_view=0, debug_rhs_view=1):
+  """Creates one video per view per sequence."""
+
+  # Create the view videos.
+  for (view_dir, vidpath) in zip(view_dirs, vid_paths):
+    encode_vid_cmd = r'mencoder mf://%s/*.png \
+    -mf fps=29:type=png \
+    -ovc lavc -lavcopts vcodec=mpeg4:mbd=2:trell \
+    -oac copy -o %s' % (view_dir, vidpath)
+    os.system(encode_vid_cmd)
+
+  # Optionally create a debug side-by-side video.
+  if debug_path:
+    lhs = vid_paths[int(debug_lhs_view)]
+    rhs = vid_paths[int(debug_rhs_view)]
+    os.system(r"avconv \
+      -i %s \
+      -i %s \
+      -filter_complex '[0:v]pad=iw*2:ih[int];[int][1:v]overlay=W/2:0[vid]' \
+      -map [vid] \
+      -c:v libx264 \
+      -crf 23 \
+      -preset veryfast \
+      %s" % (lhs, rhs, debug_path))
+
+
+def main():
+  FLAGS, _ = parser.parse_known_args()
+  assert FLAGS.view_dirs
+  assert FLAGS.vid_paths
+  view_dirs = FLAGS.view_dirs.split(',')
+  vid_paths = FLAGS.vid_paths.split(',')
+  create_vids(view_dirs, vid_paths, FLAGS.debug_path,
+              FLAGS.debug_lhs_view, FLAGS.debug_rhs_view)
+
+  # Cleanup temp image dirs.
+  for i in view_dirs:
+    shutil.rmtree(i)
+
+if __name__ == '__main__':
+  main()
--- a/research/tcn/dataset/videos_to_tfrecords.py
+++ b/research/tcn/dataset/videos_to_tfrecords.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Converts videos to training, validation, test, and debug tfrecords on cns.
+
+Example usage:
+
+# From phone videos.
+x=learning/brain/research/tcn/videos_to_tfrecords && \
+blaze build -c opt $x && \
+set=tmp && videos=~/data/tcn/datasets/$set/ && \
+blaze-bin/$x --logtostderr --output_dir /cns/oi-d/home/$USER/tcn_data/$set \
+--input_dir $videos/train
+--debug $dataset/debug --rotate 90 --max_per_shard 400
+
+# From webcam videos.
+mode=train
+x=learning/brain/research/tcn/videos_to_tfrecords && \
+blaze build -c opt $x && \
+set=tmp && videos=/tmp/tcn/videos/$set/ && \
+blaze-bin/$x --logtostderr \
+--output_dir /cns/oi-d/home/$USER/tcn_data/$set/$mode \
+--input_dir $videos/$mode --max_per_shard 400
+
+"""
+import glob
+import math
+import multiprocessing
+from multiprocessing.pool import ThreadPool
+import os
+from random import shuffle
+import re
+from StringIO import StringIO
+import cv2
+from PIL import Image
+from PIL import ImageFile
+from preprocessing import cv2resizeminedge
+from preprocessing import cv2rotateimage
+from preprocessing import shapestring
+from utils.progress import Progress
+import tensorflow.google as tf
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+tf.app.flags.DEFINE_string('view_pattern', '_view[_]*[0]+[.].*',
+                           'view regexp pattern for first view')
+tf.app.flags.DEFINE_string('input_dir', '', '''input data path''')
+tf.app.flags.DEFINE_integer('resize_min_edge', 0,
+                            '''resize the smallest edge to this size.''')
+tf.app.flags.DEFINE_integer('rotate', 0, '''rotate the image in degrees.''')
+tf.app.flags.DEFINE_string('rotate_if_matching', None,
+                           'rotate only if video path matches regexp.')
+tf.app.flags.DEFINE_string('output_dir', '', 'output directory for the dataset')
+tf.app.flags.DEFINE_integer(
+    'max_per_shard', -1, 'max # of frames per data chunk')
+tf.app.flags.DEFINE_integer('expected_views', 2, 'expected number of views')
+tf.app.flags.DEFINE_integer('log_frequency', 50, 'frequency of logging')
+tf.app.flags.DEFINE_integer(
+    'max_views_discrepancy', 100,
+    'Maximum length difference (in frames) allowed between views')
+tf.app.flags.DEFINE_boolean('overwrite', False, 'overwrite output files')
+FLAGS = tf.app.flags.FLAGS
+
+feature = tf.train.Feature
+bytes_feature = lambda v: feature(bytes_list=tf.train.BytesList(value=v))
+int64_feature = lambda v: feature(int64_list=tf.train.Int64List(value=v))
+float_feature = lambda v: feature(float_list=tf.train.FloatList(value=v))
+
+
+def FindPatternFiles(path, view_pattern, errors):
+  """Recursively find all files matching a certain pattern."""
+  if not path:
+    return None
+  tf.logging.info(
+      'Recursively searching for files matching pattern \'%s\' in %s' %
+      (view_pattern, path))
+  view_patt = re.compile('.*' + view_pattern)
+  sequences = []
+  for root, _, filenames in os.walk(path, followlinks=True):
+    path_root = root[:len(path)]
+    assert path_root == path
+
+    for filename in filenames:
+      if view_patt.match(filename):
+        fullpath = os.path.join(root, re.sub(view_pattern, '', filename))
+        shortpath = re.sub(path, '', fullpath).lstrip('/')
+
+        # Determine if this sequence should be sharded or not.
+        shard = False
+        if FLAGS.max_per_shard > 0:
+          shard = True
+
+        # Retrieve number of frames for this sequence.
+        num_views, length, view_paths, num_frames = GetViewInfo(
+            fullpath + view_pattern[0] + '*')
+        if num_views != FLAGS.expected_views:
+          tf.logging.info('Expected %d views but found: %s' %
+                          (FLAGS.expected_views, str(view_paths)))
+        assert num_views == FLAGS.expected_views
+        assert length > 0
+        # Drop sequences if view lengths differ too much.
+        if max(num_frames) - min(num_frames) > FLAGS.max_views_discrepancy:
+          error_msg = (
+              'Error: ignoring sequence with views with length difference > %d:'
+              '%s in %s') % (FLAGS.max_views_discrepancy, str(num_frames),
+                             fullpath)
+          errors.append(error_msg)
+          tf.logging.error(error_msg)
+        else:
+          # Append sequence info.
+          sequences.append({'full': fullpath, 'name': shortpath, 'len': length,
+                            'start': 0, 'end': length, 'num_views': num_views,
+                            'shard': shard})
+  return sorted(sequences, key=lambda k: k['name'])
+
+
+def ShardSequences(sequences, max_per_shard):
+  """Find all sequences, shard and randomize them."""
+  total_shards_len = 0
+  total_shards = 0
+  assert max_per_shard > 0
+  for sequence in sequences:
+    if sequence['shard']:
+      sequence['shard'] = False  # Reset shard flag.
+      length = sequence['len']
+      start = sequence['start']
+      end = sequence['end']
+      name = sequence['name']
+      assert end - start == length
+      if length > max_per_shard:
+        # Dividing sequence into smaller shards.
+        num_shards = int(math.floor(length / max_per_shard)) + 1
+        size = int(math.ceil(length / num_shards))
+        tf.logging.info(
+            'splitting sequence of length %d into %d shards of size %d' %
+            (length, num_shards, size))
+        last_end = 0
+        for i in range(num_shards):
+          shard_start = last_end
+          shard_end = min(length, shard_start + size)
+          if i == num_shards - 1:
+            shard_end = length
+          shard_len = shard_end - shard_start
+          total_shards_len += shard_len
+          shard_name = name + '_shard%02d' % i
+          last_end = shard_end
+
+          # Enqueuing shard.
+          if i == 0:  # Replace current sequence.
+            sequence['len'] = shard_len
+            sequence['start'] = shard_start
+            sequence['end'] = shard_end
+            sequence['name'] = shard_name
+          else:  # Enqueue new sequence.
+            sequences.append(
+                {'full': sequence['full'], 'name': shard_name,
+                 'len': shard_len, 'start': shard_start, 'end': shard_end,
+                 'num_views': sequence['num_views'], 'shard': False})
+
+        total_shards += num_shards
+        assert last_end == length
+
+  # Print resulting sharding.
+  if total_shards > 0:
+    tf.logging.info('%d shards of average length %d' %
+                    (total_shards, total_shards_len / total_shards))
+  return sorted(sequences, key=lambda k: k['name'])
+
+
+def RandomizeSets(sets):
+  """Randomize each set."""
+  for _, sequences in sorted(sets.iteritems()):
+    if sequences:
+      # Randomize order.
+      shuffle(sequences)
+
+
+def GetSpecificFrame(vid_path, frame_index):
+  """Gets a frame at a specified index in a video."""
+  cap = cv2.VideoCapture(vid_path)
+  cap.set(1, frame_index)
+  _, bgr = cap.read()
+  cap.release()
+  rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)
+  return rgb
+
+
+def JpegString(image, jpeg_quality=90):
+  """Returns given PIL.Image instance as jpeg string.
+
+  Args:
+    image: A PIL image.
+    jpeg_quality: The image quality, on a scale from 1 (worst) to 95 (best).
+
+  Returns:
+    a jpeg_string.
+  """
+  # This fix to PIL makes sure that we don't get an error when saving large
+  # jpeg files. This is a workaround for a bug in PIL. The value should be
+  # substantially larger than the size of the image being saved.
+  ImageFile.MAXBLOCK = 640 * 512 * 64
+
+  output_jpeg = StringIO()
+  image.save(output_jpeg, 'jpeg', quality=jpeg_quality, optimize=True)
+  return output_jpeg.getvalue()
+
+
+def ParallelPreprocessing(args):
+  """Parallel preprocessing: rotation, resize and jpeg encoding to string."""
+  (vid_path, timestep, num_timesteps, view) = args
+  try:
+    image = GetSpecificFrame(vid_path, timestep)
+
+    # Resizing.
+    resize_str = ''
+    if FLAGS.resize_min_edge > 0:
+      resize_str += ', resize ' + shapestring(image)
+      image = cv2resizeminedge(image, FLAGS.resize_min_edge)
+      resize_str += ' => ' + shapestring(image)
+
+    # Rotating.
+    rotate = None
+    if FLAGS.rotate:
+      rotate = FLAGS.rotate
+      if FLAGS.rotate_if_matching is not None:
+        rotate = None
+        patt = re.compile(FLAGS.rotate_if_matching)
+        if patt.match(vid_path) is not None:
+          rotate = FLAGS.rotate
+      if rotate is not None:
+        image = cv2rotateimage(image, FLAGS.rotate)
+
+    # Jpeg encoding.
+    image = Image.fromarray(image)
+    im_string = bytes_feature([JpegString(image)])
+
+    if timestep % FLAGS.log_frequency == 0:
+      tf.logging.info('Loaded frame %d / %d for %s (rotation %s%s) from %s' %
+                      (timestep, num_timesteps, view, str(rotate), resize_str,
+                       vid_path))
+    return im_string
+  except cv2.error as e:
+    tf.logging.error('Error while loading frame %d of %s: %s' %
+                     (timestep, vid_path, str(e)))
+    return None
+
+
+def GetNumFrames(vid_path):
+  """Gets the number of frames in a video."""
+  cap = cv2.VideoCapture(vid_path)
+  total_frames = cap.get(7)
+  cap.release()
+  return int(total_frames)
+
+
+def GetViewInfo(views_fullname):
+  """Return information about a group of views."""
+  view_paths = sorted(glob.glob(views_fullname))
+  num_frames = [GetNumFrames(i) for i in view_paths]
+  min_num_frames = min(num_frames)
+  num_views = len(view_paths)
+  return num_views, min_num_frames, view_paths, num_frames
+
+
+def AddSequence(sequence, writer, progress, errors):
+  """Converts a sequence to a SequenceExample.
+
+  Sequences have multiple viewpoint videos. Extract all frames from all
+  viewpoint videos in parallel, build a single SequenceExample containing
+  all viewpoint images for every timestep.
+
+  Args:
+    sequence: a dict with information on a sequence.
+    writer: A TFRecordWriter.
+    progress: A Progress object to report processing progress.
+    errors: a list of string to append to in case of errors.
+  """
+  fullname = sequence['full']
+  shortname = sequence['name']
+  start = sequence['start']
+  end = sequence['end']
+  num_timesteps = sequence['len']
+
+  # Build a list of all view paths for this fullname.
+  path = fullname + FLAGS.view_pattern[0] + '*'
+  tf.logging.info('Loading sequence from ' + path)
+  view_paths = sorted(glob.glob(path))
+  # Extract all images for all views
+  num_frames = [GetNumFrames(i) for i in view_paths]
+  tf.logging.info('Loading %s with [%d, %d[ (%d frames) from: %s %s' %
+                  (shortname, start, end, num_timesteps,
+                   str(num_frames), str(view_paths)))
+  num_views = len(view_paths)
+  total_timesteps = int(min(num_frames))
+  assert num_views == FLAGS.expected_views
+  assert num_views == sequence['num_views']
+
+  # Create a worker pool to parallelize loading/rotating
+  worker_pool = ThreadPool(multiprocessing.cpu_count())
+
+  # Collect all images for each view.
+  view_to_feature_list = {}
+  view_images = []
+  for view_idx, view in enumerate(
+      ['view'+str(i) for i in range(num_views)]):
+    # Flatten list to process in parallel
+    work = []
+    for i in range(start, end):
+      work.append((view_paths[view_idx], i, total_timesteps, view))
+    # Load and rotate images in parallel
+    view_images.append(worker_pool.map(ParallelPreprocessing, work))
+    # Report progress.
+    progress.Add(len(view_images[view_idx]))
+    tf.logging.info('%s' % str(progress))
+
+  # Remove error frames from all views
+  i = start
+  num_errors = 0
+  while i < len(view_images[0]):
+    remove_frame = False
+    # Check if one or more views have an error for this frame.
+    for view_idx in range(num_views):
+      if view_images[view_idx][i] is None:
+        remove_frame = True
+        error_msg = 'Removing frame %d for all views for %s ' % (i, fullname)
+        errors.append(error_msg)
+        tf.logging.error(error_msg)
+    # Remove faulty frames.
+    if remove_frame:
+      num_errors += 1
+      for view_idx in range(num_views):
+        del view_images[view_idx][i]
+    else:
+      i += 1
+
+  # Ignore sequences that have errors.
+  if num_errors > 0:
+    error_msg = 'Dropping sequence because of frame errors for %s' % fullname
+    errors.append(error_msg)
+    tf.logging.error(error_msg)
+  else:
+    # Build FeatureList objects for each view.
+    for view_idx, view in enumerate(
+        ['view'+str(i) for i in range(num_views)]):
+      # Construct FeatureList from repeated feature.
+      view_to_feature_list[view] = tf.train.FeatureList(
+          feature=view_images[view_idx])
+
+    context_features = tf.train.Features(feature={
+        'task': bytes_feature([shortname]),
+        'len': int64_feature([num_timesteps])
+    })
+    feature_lists = tf.train.FeatureLists(feature_list=view_to_feature_list)
+    ex = tf.train.SequenceExample(
+        context=context_features, feature_lists=feature_lists)
+    writer.write(ex.SerializeToString())
+    tf.logging.info('Done adding %s with %d timesteps'
+                    % (fullname, num_timesteps))
+
+
+def PrintSequencesInfo(sequences, prefix):
+  """Print information about sequences and return the total number of frames."""
+  tf.logging.info('')
+  tf.logging.info(prefix)
+  num_frames = 0
+  for sequence in sequences:
+    shard_str = ''
+    if sequence['shard']:
+      shard_str = ' (sharding)'
+    tf.logging.info('frames [%d, %d[\t(%d frames * %d views)%s\t%s' % (
+        sequence['start'], sequence['end'], sequence['len'],
+        sequence['num_views'], shard_str, sequence['name']))
+    num_frames += sequence['len'] * sequence['num_views']
+  tf.logging.info(('%d frames (all views), %d sequences, average sequence'
+                   ' length (all views): %d') %
+                  (num_frames, len(sequences), num_frames / len(sequences)))
+  tf.logging.info('')
+  return num_frames
+
+
+def CheckRecord(filename, sequence):
+  """Check that an existing tfrecord corresponds to the expected sequence."""
+  num_sequences = 0
+  total_frames = 0
+  for serialized_example in tf.python_io.tf_record_iterator(filename):
+    num_sequences += 1
+    example = tf.train.SequenceExample()
+    example.ParseFromString(serialized_example)
+    length = example.context.feature['len'].int64_list.value[0]
+    name = example.context.feature['task'].bytes_list.value[0]
+    total_frames += len(example.feature_lists.feature_list) * length
+    if sequence['name'] != name or sequence['len'] != length:
+      return False, total_frames
+  if num_sequences == 0:
+    return False, total_frames
+  return True, total_frames
+
+
+def AddSequences():
+  """Creates one training, validation."""
+  errors = []
+
+  # Generate datasets file lists.
+  sequences = FindPatternFiles(FLAGS.input_dir, FLAGS.view_pattern, errors)
+  num_frames = PrintSequencesInfo(sequences,
+                                  'Found the following datasets and files:')
+
+  # Sharding and randomizing sets.
+  if FLAGS.max_per_shard > 0:
+    sequences = ShardSequences(sequences, FLAGS.max_per_shard)
+    num_frames = PrintSequencesInfo(sequences, 'After sharding:')
+    tf.logging.info('')
+
+  # Process sets.
+  progress = Progress(num_frames)
+  output_list = []
+  for sequence in sequences:
+    record_name = os.path.join(
+        FLAGS.output_dir, '%s.tfrecord' % sequence['name'])
+    if tf.gfile.Exists(record_name) and not FLAGS.overwrite:
+      ok, num_frames = CheckRecord(record_name, sequence)
+      if ok:
+        progress.Add(num_frames)
+        tf.logging.info('Skipping existing output file: %s' % record_name)
+        continue
+      else:
+        tf.logging.info('File does not match sequence, reprocessing...')
+    output_dir = os.path.dirname(record_name)
+    if not tf.gfile.Exists(output_dir):
+      tf.logging.info('Creating output directory: %s' % output_dir)
+      tf.gfile.MakeDirs(output_dir)
+    output_list.append(record_name)
+    tf.logging.info('Writing to ' + record_name)
+    writer = tf.python_io.TFRecordWriter(record_name)
+    AddSequence(sequence, writer, progress, errors)
+    writer.close()
+  tf.logging.info('Wrote dataset files: ' + str(output_list))
+  tf.logging.info('All errors (%d): %s' % (len(errors), str(errors)))
+
+
+def main(_):
+  AddSequences()
+
+
+if __name__ == '__main__':
+  tf.app.run()
--- a/research/tcn/dataset/webcam.py
+++ b/research/tcn/dataset/webcam.py
+# Copyright 2017 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Collect images from multiple simultaneous webcams.
+
+Usage:
+
+1. Define some environment variables that describe what you're collecting.
+dataset=your_dataset_name
+mode=train
+num_views=2
+viddir=/tmp/tcn/videos
+tmp_imagedir=/tmp/tcn/tmp_images
+debug_vids=1
+
+2. Run the script.
+export DISPLAY=:0.0 && \
+root=learning/brain/research/tcn && \
+bazel build -c opt --copt=-mavx tcn/webcam && \
+bazel-bin/tcn/webcam \
+--dataset $dataset \
+--mode $mode \
+--num_views $num_views \
+--tmp_imagedir $tmp_imagedir \
+--viddir $viddir \
+--debug_vids 1 \
+--logtostderr
+
+3. Hit Ctrl-C when done collecting, upon which the script will compile videos
+for each view and optionally a debug video concatenating multiple
+simultaneous views.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import multiprocessing
+from multiprocessing import Process
+import os
+import subprocess
+import sys
+import time
+import cv2
+import matplotlib
+matplotlib.use('TkAgg')
+from matplotlib import animation  # pylint: disable=g-import-not-at-top
+import matplotlib.pyplot as plt
+import numpy as np
+from six.moves import input
+import tensorflow as tf
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+tf.flags.DEFINE_string('dataset', '', 'Name of the dataset we`re collecting.')
+tf.flags.DEFINE_string('mode', '',
+                       'What type of data we`re collecting. E.g.:'
+                       '`train`,`valid`,`test`, or `demo`')
+tf.flags.DEFINE_string('seqname', '',
+                       'Name of this sequence. If empty, the script will use'
+                       'the name seq_N+1 where seq_N is the latest'
+                       'integer-named sequence in the videos directory.')
+tf.flags.DEFINE_integer('num_views', 2,
+                        'Number of webcams.')
+tf.flags.DEFINE_string('tmp_imagedir', '/tmp/tcn/data',
+                       'Temporary outdir to write images.')
+tf.flags.DEFINE_string('viddir', '/tmp/tcn/videos',
+                       'Base directory to write debug videos.')
+tf.flags.DEFINE_boolean('debug_vids', True,
+                        'Whether to generate debug vids with multiple'
+                        'concatenated views.')
+tf.flags.DEFINE_string('debug_lhs_view', '0',
+                       'Which viewpoint to use for the lhs video.')
+tf.flags.DEFINE_string('debug_rhs_view', '1',
+                       'Which viewpoint to use for the rhs video.')
+tf.flags.DEFINE_integer('height', 1080, 'Raw input height.')
+tf.flags.DEFINE_integer('width', 1920, 'Raw input width.')
+tf.flags.DEFINE_string('webcam_ports', None,
+                       'Comma-separated list of each webcam usb port.')
+FLAGS = tf.app.flags.FLAGS
+
+
+class ImageQueue(object):
+  """An image queue holding each stream's most recent image.
+
+  Basically implements a process-safe collections.deque(maxlen=1).
+  """
+
+  def __init__(self):
+    self.lock = multiprocessing.Lock()
+    self._queue = multiprocessing.Queue(maxsize=1)
+
+  def append(self, data):
+    with self.lock:
+      if self._queue.full():
+        # Pop the first element.
+        _ = self._queue.get()
+      self._queue.put(data)
+
+  def get(self):
+    with self.lock:
+      return self._queue.get()
+
+  def empty(self):
+    return self._queue.empty()
+
+  def close(self):
+    return self._queue.close()
+
+
+class WebcamViewer(object):
+  """A class which displays a live stream from the webcams."""
+
+  def __init__(self, display_queues):
+    """Create a WebcamViewer instance."""
+    self.height = FLAGS.height
+    self.width = FLAGS.width
+    self.queues = display_queues
+
+  def _get_next_images(self):
+    """Gets the next image to display."""
+    # Wait for one image per view.
+    not_found = True
+    while not_found:
+      if True in [q.empty() for q in self.queues]:
+        # At least one image queue is empty; wait.
+        continue
+      else:
+        # Retrieve the images.
+        latest = [q.get() for q in self.queues]
+        combined = np.concatenate(latest, axis=1)
+      not_found = False
+    return combined
+
+  def run(self):
+    """Displays the Kcam live stream in a window.
+
+    This function blocks until the window is closed.
+    """
+    fig, rgb_axis = plt.subplots()
+
+    image_rows = self.height
+    image_cols = self.width * FLAGS.num_views
+    initial_image = np.zeros((image_rows, image_cols, 3))
+    rgb_image = rgb_axis.imshow(initial_image, interpolation='nearest')
+
+    def update_figure(frame_index):
+      """Animation function for matplotlib FuncAnimation. Updates the image.
+
+      Args:
+        frame_index: The frame number.
+      Returns:
+        An iterable of matplotlib drawables to clear.
+      """
+      _ = frame_index
+      images = self._get_next_images()
+      images = images[..., [2, 1, 0]]
+      rgb_image.set_array(images)
+      return rgb_image,
+
+    # We must keep a reference to this animation in order for it to work.
+    unused_animation = animation.FuncAnimation(
+        fig, update_figure, interval=50, blit=True)
+    mng = plt.get_current_fig_manager()
+    mng.resize(*mng.window.maxsize())
+    plt.show()
+
+
+def reconcile(queues, write_queue):
+  """Gets a list of concurrent images from each view queue.
+
+  This waits for latest images to be available in all view queues,
+  then continuously:
+  - Creates a list of current images for each view.
+  - Writes the list to a queue of image lists to write to disk.
+  Args:
+    queues: A list of `ImageQueues`, holding the latest image from each webcam.
+    write_queue: A multiprocessing.Queue holding lists of concurrent images.
+  """
+  # Loop forever.
+  while True:
+    # Wait till all queues have an image.
+    if True in [q.empty() for q in queues]:
+      continue
+    else:
+      # Retrieve all views' images.
+      latest = [q.get() for q in queues]
+      # Copy the list of all concurrent images to the write queue.
+      write_queue.put(latest)
+
+
+def persist(write_queue, view_dirs):
+  """Pulls lists of concurrent images off a write queue, writes them to disk.
+
+  Args:
+    write_queue: A multiprocessing.Queue holding lists of concurrent images;
+      one image per view.
+    view_dirs: A list of strings, holding the output image directories for each
+      view.
+  """
+  timestep = 0
+  while True:
+    # Wait till there is work in the queue.
+    if write_queue.empty():
+      continue
+    # Get a list of concurrent images to write to disk.
+    view_ims = write_queue.get()
+    for view_idx, image in enumerate(view_ims):
+      view_base = view_dirs[view_idx]
+      # Assign all concurrent view images the same sequence timestep.
+      fname = os.path.join(view_base, '%s.png' % str(timestep).zfill(10))
+      cv2.imwrite(fname, image)
+    # Move to the next timestep.
+    timestep += 1
+
+
+def get_image(camera):
+  """Captures a single image from the camera and returns it in PIL format."""
+  data = camera.read()
+  _, im = data
+  return im
+
+
+def capture_webcam(camera, display_queue, reconcile_queue):
+  """Captures images from simultaneous webcams, writes them to queues.
+
+  Args:
+    camera: A cv2.VideoCapture object representing an open webcam stream.
+    display_queue: An ImageQueue.
+    reconcile_queue: An ImageQueue.
+  """
+  # Take some ramp images to allow cams to adjust for brightness etc.
+  for i in range(60):
+    tf.logging.info('Taking ramp image %d.' % i)
+    get_image(camera)
+
+  cnt = 0
+  start = time.time()
+  while True:
+    # Get images for all cameras.
+    im = get_image(camera)
+    # Replace the current image in the display and reconcile queues.
+    display_queue.append(im)
+    reconcile_queue.append(im)
+    cnt += 1
+    current = time.time()
+    if cnt % 100 == 0:
+      tf.logging.info('Collected %s of video, %d frames at ~%.2f fps.' % (
+          timer(start, current), cnt, cnt/(current-start)))
+
+
+def timer(start, end):
+  """Returns a formatted time elapsed."""
+  hours, rem = divmod(end-start, 3600)
+  minutes, seconds = divmod(rem, 60)
+  return '{:0>2}:{:0>2}:{:05.2f}'.format(int(hours), int(minutes), seconds)
+
+
+def display_webcams(display_queues):
+  """Builds an WebcamViewer to animate incoming images, runs it."""
+  viewer = WebcamViewer(display_queues)
+  viewer.run()
+
+
+def create_vids(view_dirs, seqname):
+  """Creates one video per view per sequence."""
+  vidbase = os.path.join(FLAGS.viddir, FLAGS.dataset, FLAGS.mode)
+  if not os.path.exists(vidbase):
+    os.makedirs(vidbase)
+  vidpaths = []
+  for idx, view_dir in enumerate(view_dirs):
+    vidname = os.path.join(vidbase, '%s_view%d.mp4' % (seqname, idx))
+    encode_vid_cmd = r'mencoder mf://%s/*.png \
+    -mf fps=29:type=png \
+    -ovc lavc -lavcopts vcodec=mpeg4:mbd=2:trell \
+    -oac copy -o %s' % (view_dir, vidname)
+    os.system(encode_vid_cmd)
+    vidpaths.append(vidname)
+
+  debugpath = None
+  if FLAGS.debug_vids:
+    lhs = vidpaths[FLAGS.debug_lhs_view]
+    rhs = vidpaths[FLAGS.debug_rhs_view]
+    debug_base = os.path.join('%s_debug' % FLAGS.viddir, FLAGS.dataset,
+                              FLAGS.mode)
+    if not os.path.exists(debug_base):
+      os.makedirs(debug_base)
+    debugpath = '%s/%s.mp4' % (debug_base, seqname)
+    os.system(r"avconv \
+      -i %s \
+      -i %s \
+      -filter_complex '[0:v]pad=iw*2:ih[int];[int][1:v]overlay=W/2:0[vid]' \
+      -map [vid] \
+      -c:v libx264 \
+      -crf 23 \
+      -preset veryfast \
+      %s" % (lhs, rhs, debugpath))
+
+  return vidpaths, debugpath
+
+
+def setup_paths():
+  """Sets up the necessary paths to collect videos."""
+  assert FLAGS.dataset
+  assert FLAGS.mode
+  assert FLAGS.num_views
+
+  # Setup directory for final images used to create videos for this sequence.
+  tmp_imagedir = os.path.join(FLAGS.tmp_imagedir, FLAGS.dataset, FLAGS.mode)
+  if not os.path.exists(tmp_imagedir):
+    os.makedirs(tmp_imagedir)
+
+  # Create a base directory to hold all sequence videos if it doesn't exist.
+  vidbase = os.path.join(FLAGS.viddir, FLAGS.dataset, FLAGS.mode)
+  if not os.path.exists(vidbase):
+    os.makedirs(vidbase)
+
+  # Get one directory per concurrent view and a sequence name.
+  view_dirs, seqname = get_view_dirs(vidbase, tmp_imagedir)
+
+  # Get an output path to each view's video.
+  vid_paths = []
+  for idx, _ in enumerate(view_dirs):
+    vid_path = os.path.join(vidbase, '%s_view%d.mp4' % (seqname, idx))
+    vid_paths.append(vid_path)
+
+  # Optionally build paths to debug_videos.
+  debug_path = None
+  if FLAGS.debug_vids:
+    debug_base = os.path.join('%s_debug' % FLAGS.viddir, FLAGS.dataset,
+                              FLAGS.mode)
+    if not os.path.exists(debug_base):
+      os.makedirs(debug_base)
+    debug_path = '%s/%s.mp4' % (debug_base, seqname)
+
+  return view_dirs, vid_paths, debug_path
+
+
+def get_view_dirs(vidbase, tmp_imagedir):
+  """Creates and returns one view directory per webcam."""
+  # Create and append a sequence name.
+  if FLAGS.seqname:
+    seqname = FLAGS.seqname
+  else:
+    # If there's no video directory, this is the first sequence.
+    if not os.listdir(vidbase):
+      seqname = '0'
+    else:
+      # Otherwise, get the latest sequence name and increment it.
+      seq_names = [i.split('_')[0] for i in os.listdir(vidbase)]
+      latest_seq = sorted(map(int, seq_names), reverse=True)[0]
+      seqname = str(latest_seq+1)
+    tf.logging.info('No seqname specified, using: %s' % seqname)
+  view_dirs = [os.path.join(
+      tmp_imagedir, '%s_view%d' % (seqname, v)) for v in range(FLAGS.num_views)]
+  for d in view_dirs:
+    if not os.path.exists(d):
+      os.makedirs(d)
+  return view_dirs, seqname
+
+
+def get_cameras():
+  """Opens cameras using cv2, ensures they can take images."""
+  # Try to get free webcam ports.
+  if FLAGS.webcam_ports:
+    ports = map(int, FLAGS.webcam_ports.split(','))
+  else:
+    ports = range(FLAGS.num_views)
+  cameras = [cv2.VideoCapture(i) for i in ports]
+
+  if not all([i.isOpened() for i in cameras]):
+    try:
+      # Try to find and kill hanging cv2 process_ids.
+      output = subprocess.check_output(['lsof -t /dev/video*'], shell=True)
+      tf.logging.info('Found hanging cv2 process_ids: \n')
+      tf.logging.info(output)
+      tf.logging.info('Killing hanging processes...')
+      for process_id in output.split('\n')[:-1]:
+        subprocess.call(['kill %s' % process_id], shell=True)
+      time.sleep(3)
+      # Recapture webcams.
+      cameras = [cv2.VideoCapture(i) for i in ports]
+    except subprocess.CalledProcessError:
+      raise ValueError(
+          'Cannot connect to cameras. Try running: \n'
+          'ls -ltrh /dev/video* \n '
+          'to see which ports your webcams are connected to. Then hand those '
+          'ports as a comma-separated list to --webcam_ports, e.g. '
+          '--webcam_ports 0,1')
+
+  # Verify each camera is able to capture images.
+  ims = map(get_image, cameras)
+  assert False not in [i is not None for i in ims]
+  return cameras
+
+
+def launch_images_to_videos(view_dirs, vid_paths, debug_path):
+  """Launch job in separate process to convert images to videos."""
+
+  f = 'learning/brain/research/tcn/dataset/images_to_videos.py'
+  cmd = ['python %s ' % f]
+  cmd += ['--view_dirs %s ' % ','.join(i for i in view_dirs)]
+  cmd += ['--vid_paths %s ' % ','.join(i for i in vid_paths)]
+  cmd += ['--debug_path %s ' % debug_path]
+  cmd += ['--debug_lhs_view %s ' % FLAGS.debug_lhs_view]
+  cmd += ['--debug_rhs_view %s ' % FLAGS.debug_rhs_view]
+  cmd += [' & ']
+  cmd = ''.join(i for i in cmd)
+
+  # Call images_to_videos asynchronously.
+  fnull = open(os.devnull, 'w')
+  subprocess.Popen([cmd], stdout=fnull, stderr=subprocess.STDOUT, shell=True)
+
+  for p in vid_paths:
+    tf.logging.info('Writing final video to: %s' % p)
+  if debug_path:
+    tf.logging.info('Writing debug video to: %s' % debug_path)
+
+
+def main(_):
+  # Initialize the camera capture objects.
+  cameras = get_cameras()
+  # Get one output directory per view.
+  view_dirs, vid_paths, debug_path = setup_paths()
+  try:
+    # Wait for user input.
+    try:
+      tf.logging.info('About to write to:')
+      for v in view_dirs:
+        tf.logging.info(v)
+      input('Press Enter to continue...')
+    except SyntaxError:
+      pass
+
+    # Create a queue per view for displaying and saving images.
+    display_queues = [ImageQueue() for _ in range(FLAGS.num_views)]
+    reconcile_queues = [ImageQueue() for _ in range(FLAGS.num_views)]
+
+    # Create a queue for collecting all tuples of multi-view images to write to
+    # disk.
+    write_queue = multiprocessing.Queue()
+
+    processes = []
+    # Create a process to display collected images in real time.
+    processes.append(Process(target=display_webcams, args=(display_queues,)))
+    # Create a process to collect the latest simultaneous images from each view.
+    processes.append(Process(
+        target=reconcile, args=(reconcile_queues, write_queue,)))
+    # Create a process to collect the latest simultaneous images from each view.
+    processes.append(Process(
+        target=persist, args=(write_queue, view_dirs,)))
+
+    for (cam, dq, rq) in zip(cameras, display_queues, reconcile_queues):
+      processes.append(Process(
+          target=capture_webcam, args=(cam, dq, rq,)))
+
+    for p in processes:
+      p.start()
+    for p in processes:
+      p.join()
+
+  except KeyboardInterrupt:
+    # Close the queues.
+    for q in display_queues + reconcile_queues:
+      q.close()
+    # Release the cameras.
+    for cam in cameras:
+      cam.release()
+
+    # Launch images_to_videos script asynchronously.
+    launch_images_to_videos(view_dirs, vid_paths, debug_path)
+
+    try:
+      sys.exit(0)
+    except SystemExit:
+      os._exit(0)  # pylint: disable=protected-access
+
+
+if __name__ == '__main__':
+  tf.app.run()