Merge branch 'master' into master

3e93722a · Neal Wu · GitHub · 2335c9fc · 4de34a4c · 3e93722a
Commit 3e93722a authored Jan 13, 2017 by Neal Wu Committed by GitHub Jan 13, 2017
20 changed files
--- a/README.md
+++ b/README.md
@@ -9,15 +9,15 @@ To propose a model for inclusion please submit a pull request.
 ## Models
 - [autoencoder](autoencoder) -- various autoencoders
+- [differential_privacy](differential_privacy) -- privacy-preserving student models from multiple teachers
+- [im2txt](im2txt) -- image-to-text neural network for image captioning.
 - [inception](inception) -- deep convolutional networks for computer vision
 - [namignizer](namignizer) -- recognize and generate names
 - [neural_gpu](neural_gpu) -- highly parallel neural computer
- [privacy](privacy) -- privacy-preserving student models from multiple teachers
+- [neural_programmer](neural_programmer) -- neural network augmented with logic and mathematic operations.
 - [resnet](resnet) -- deep and wide residual networks
 - [slim](slim) -- image classification models in TF-Slim
 - [swivel](swivel) -- the Swivel algorithm for generating word embeddings
 - [syntaxnet](syntaxnet) -- neural models of natural language syntax
 - [textsum](textsum) -- sequence-to-sequence with attention model for text summarization.
 - [transformer](transformer) -- spatial transformer network, which allows the spatial manipulation of data within the network
- [im2txt](im2txt) -- image-to-text neural network for image captioning.
- [neural_programmer](neural programmer) -- neural network augmented with logic and mathematic operations.
--- a/differential_privacy/multiple_teachers/deep_cnn.py
+++ b/differential_privacy/multiple_teachers/deep_cnn.py
@@ -341,7 +341,7 @@ def loss_fun(logits, labels):
  # Calculate the cross entropy between labels and predictions
  labels = tf.cast(labels, tf.int64)
  cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
-      logits, labels, name='cross_entropy_per_example')
+      logits=logits, labels=labels, name='cross_entropy_per_example')
  # Calculate the average cross entropy loss across the batch.
  cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')

--- a/im2txt/README.md
+++ b/im2txt/README.md
@@ -119,7 +119,7 @@ approximately 10 times slower.
 First ensure that you have installed the following required packages:
 * **Bazel** ([instructions](http://bazel.io/docs/install.html)).
-* **TensorFlow** ([instructions](https://www.tensorflow.org/versions/master/get_started/os_setup.html)).
+* **TensorFlow** r0.12 or greater ([instructions](https://www.tensorflow.org/versions/master/get_started/os_setup.html)).
 * **NumPy** ([instructions](http://www.scipy.org/install.html)).
 * **Natural Language Toolkit (NLTK)**:
    * First install NLTK ([instructions](http://www.nltk.org/install.html)).

--- a/im2txt/im2txt/inference_utils/caption_generator.py
+++ b/im2txt/im2txt/inference_utils/caption_generator.py
@@ -54,6 +54,16 @@ class Caption(object):
      return -1
    else:
      return 1
+  # For Python 3 compatibility (__cmp__ is deprecated).
+  def __lt__(self, other):
+    assert isinstance(other, Caption)
+    return self.score < other.score
+  # Also for Python 3 compatibility.
+  def __eq__(self, other):
+    assert isinstance(other, Caption)
+    return self.score == other.score
 class TopN(object):

--- a/im2txt/im2txt/ops/image_embedding_test.py
+++ b/im2txt/im2txt/ops/image_embedding_test.py
@@ -41,7 +41,7 @@ class InceptionV3Test(tf.test.TestCase):
  def _countInceptionParameters(self):
    """Counts the number of parameters in the inception model at top scope."""
    counter = {}
-    for v in tf.all_variables():
+    for v in tf.global_variables():
      name_tokens = v.op.name.split("/")
      if name_tokens[0] == "InceptionV3":
        name = "InceptionV3/" + name_tokens[1]
@@ -85,7 +85,7 @@ class InceptionV3Test(tf.test.TestCase):
    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
    self._verifyParameterCounts()
-    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
+    self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
    self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)
    self._assertCollectionSize(188, tf.GraphKeys.UPDATE_OPS)
    self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)
@@ -98,7 +98,7 @@ class InceptionV3Test(tf.test.TestCase):
    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
    self._verifyParameterCounts()
-    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
+    self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
    self._assertCollectionSize(188, tf.GraphKeys.TRAINABLE_VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
    self._assertCollectionSize(94, tf.GraphKeys.REGULARIZATION_LOSSES)
@@ -111,7 +111,7 @@ class InceptionV3Test(tf.test.TestCase):
    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
    self._verifyParameterCounts()
-    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
+    self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
    self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)
@@ -124,7 +124,7 @@ class InceptionV3Test(tf.test.TestCase):
    self.assertEqual([self._batch_size, 2048], embeddings.get_shape().as_list())
    self._verifyParameterCounts()
-    self._assertCollectionSize(376, tf.GraphKeys.VARIABLES)
+    self._assertCollectionSize(376, tf.GraphKeys.GLOBAL_VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.TRAINABLE_VARIABLES)
    self._assertCollectionSize(0, tf.GraphKeys.UPDATE_OPS)
    self._assertCollectionSize(0, tf.GraphKeys.REGULARIZATION_LOSSES)

--- a/im2txt/im2txt/ops/image_processing.py
+++ b/im2txt/im2txt/ops/image_processing.py
@@ -92,7 +92,7 @@ def process_image(encoded_image,
  # only logged in thread 0.
  def image_summary(name, image):
    if not thread_id:
-      tf.image_summary(name, tf.expand_dims(image, 0))
+      tf.summary.image(name, tf.expand_dims(image, 0))
  # Decode image into a float32 Tensor of shape [?, ?, 3] with values in [0, 1).
  with tf.name_scope("decode", values=[encoded_image]):
@@ -128,6 +128,6 @@ def process_image(encoded_image,
  image_summary("final_image", image)
  # Rescale to [-1,1] instead of [0, 1]
-  image = tf.sub(image, 0.5)
+  image = tf.subtract(image, 0.5)
-  image = tf.mul(image, 2.0)
+  image = tf.multiply(image, 2.0)
  return image
--- a/im2txt/im2txt/ops/inputs.py
+++ b/im2txt/im2txt/ops/inputs.py
@@ -116,7 +116,7 @@ def prefetch_input_data(reader,
    enqueue_ops.append(values_queue.enqueue([value]))
  tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
      values_queue, enqueue_ops))
-  tf.scalar_summary(
+  tf.summary.scalar(
      "queue/%s/fraction_of_%d_full" % (values_queue.name, capacity),
      tf.cast(values_queue.size(), tf.float32) * (1. / capacity))
@@ -181,7 +181,7 @@ def batch_with_dynamic_pad(images_and_captions,
  enqueue_list = []
  for image, caption in images_and_captions:
    caption_length = tf.shape(caption)[0]
-    input_length = tf.expand_dims(tf.sub(caption_length, 1), 0)
+    input_length = tf.expand_dims(tf.subtract(caption_length, 1), 0)
    input_seq = tf.slice(caption, [0], input_length)
    target_seq = tf.slice(caption, [1], input_length)
@@ -197,8 +197,8 @@ def batch_with_dynamic_pad(images_and_captions,
  if add_summaries:
    lengths = tf.add(tf.reduce_sum(mask, 1), 1)
-    tf.scalar_summary("caption_length/batch_min", tf.reduce_min(lengths))
+    tf.summary.scalar("caption_length/batch_min", tf.reduce_min(lengths))
-    tf.scalar_summary("caption_length/batch_max", tf.reduce_max(lengths))
+    tf.summary.scalar("caption_length/batch_max", tf.reduce_max(lengths))
-    tf.scalar_summary("caption_length/batch_mean", tf.reduce_mean(lengths))
+    tf.summary.scalar("caption_length/batch_mean", tf.reduce_mean(lengths))
  return images, input_seqs, target_seqs, mask
--- a/im2txt/im2txt/show_and_tell_model.py
+++ b/im2txt/im2txt/show_and_tell_model.py
@@ -244,10 +244,10 @@ class ShowAndTellModel(object):
    # This LSTM cell has biases and outputs tanh(new_c) * sigmoid(o), but the
    # modified LSTM in the "Show and Tell" paper has no biases and outputs
    # new_c * sigmoid(o).
-    lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(
+    lstm_cell = tf.contrib.rnn.BasicLSTMCell(
        num_units=self.config.num_lstm_units, state_is_tuple=True)
    if self.mode == "train":
-      lstm_cell = tf.nn.rnn_cell.DropoutWrapper(
+      lstm_cell = tf.contrib.rnn.DropoutWrapper(
          lstm_cell,
          input_keep_prob=self.config.lstm_dropout_keep_prob,
          output_keep_prob=self.config.lstm_dropout_keep_prob)
@@ -264,13 +264,13 @@ class ShowAndTellModel(object):
      if self.mode == "inference":
        # In inference mode, use concatenated states for convenient feeding and
        # fetching.
-        tf.concat(1, initial_state, name="initial_state")
+        tf.concat_v2(initial_state, 1, name="initial_state")
        # Placeholder for feeding a batch of concatenated states.
        state_feed = tf.placeholder(dtype=tf.float32,
                                    shape=[None, sum(lstm_cell.state_size)],
                                    name="state_feed")
-        state_tuple = tf.split(1, 2, state_feed)
+        state_tuple = tf.split(value=state_feed, num_or_size_splits=2, axis=1)
        # Run a single LSTM step.
        lstm_outputs, state_tuple = lstm_cell(
@@ -278,7 +278,7 @@ class ShowAndTellModel(object):
            state=state_tuple)
        # Concatentate the resulting state.
-        tf.concat(1, state_tuple, name="state")
+        tf.concat_v2(state_tuple, 1, name="state")
      else:
        # Run the batch of sequence embeddings through the LSTM.
        sequence_length = tf.reduce_sum(self.input_mask, 1)
@@ -307,18 +307,19 @@ class ShowAndTellModel(object):
      weights = tf.to_float(tf.reshape(self.input_mask, [-1]))
      # Compute losses.
-      losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, targets)
+      losses = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=targets,
-      batch_loss = tf.div(tf.reduce_sum(tf.mul(losses, weights)),
+                                                              logits=logits)
+      batch_loss = tf.div(tf.reduce_sum(tf.multiply(losses, weights)),
                          tf.reduce_sum(weights),
                          name="batch_loss")
-      tf.contrib.losses.add_loss(batch_loss)
+      tf.losses.add_loss(batch_loss)
-      total_loss = tf.contrib.losses.get_total_loss()
+      total_loss = tf.losses.get_total_loss()
      # Add summaries.
-      tf.scalar_summary("batch_loss", batch_loss)
+      tf.summary.scalar("losses/batch_loss", batch_loss)
-      tf.scalar_summary("total_loss", total_loss)
+      tf.summary.scalar("losses/total_loss", total_loss)
      for var in tf.trainable_variables():
-        tf.histogram_summary(var.op.name, var)
+        tf.summary.histogram("parameters/" + var.op.name, var)
      self.total_loss = total_loss
      self.target_cross_entropy_losses = losses  # Used in evaluation.

--- a/im2txt/im2txt/show_and_tell_model_test.py
+++ b/im2txt/im2txt/show_and_tell_model_test.py
@@ -63,7 +63,7 @@ class ShowAndTellModelTest(tf.test.TestCase):
  def _countModelParameters(self):
    """Counts the number of parameters in the model at top level scope."""
    counter = {}
-    for v in tf.all_variables():
+    for v in tf.global_variables():
      name = v.op.name.split("/")[0]
      num_params = v.get_shape().num_elements()
      assert num_params
@@ -98,7 +98,7 @@ class ShowAndTellModelTest(tf.test.TestCase):
    fetches = expected_shapes.keys()
    with self.test_session() as sess:
-      sess.run(tf.initialize_all_variables())
+      sess.run(tf.global_variables_initializer())
      outputs = sess.run(fetches, feed_dict)
    for index, output in enumerate(outputs):

--- a/inception/inception/data/build_image_data.py
+++ b/inception/inception/data/build_image_data.py
@@ -137,13 +137,13 @@ def _convert_to_example(filename, image_buffer, label, text, height, width):
  example = tf.train.Example(features=tf.train.Features(feature={
      'image/height': _int64_feature(height),
      'image/width': _int64_feature(width),
-      'image/colorspace': _bytes_feature(colorspace),
+      'image/colorspace': _bytes_feature(tf.compat.as_bytes(colorspace)),
      'image/channels': _int64_feature(channels),
      'image/class/label': _int64_feature(label),
-      'image/class/text': _bytes_feature(text),
+      'image/class/text': _bytes_feature(tf.compat.as_bytes(text)),
-      'image/format': _bytes_feature(image_format),
+      'image/format': _bytes_feature(tf.compat.as_bytes(image_format)),
-      'image/filename': _bytes_feature(os.path.basename(filename)),
+      'image/filename': _bytes_feature(tf.compat.as_bytes(os.path.basename(filename))),
-      'image/encoded': _bytes_feature(image_buffer)}))
+      'image/encoded': _bytes_feature(tf.compat.as_bytes(image_buffer))}))
  return example

--- a/inception/inception/inception_train.py
+++ b/inception/inception/inception_train.py
@@ -79,7 +79,7 @@ RMSPROP_MOMENTUM = 0.9             # Momentum in RMSProp.
 RMSPROP_EPSILON = 1.0              # Epsilon term for RMSProp.
-def _tower_loss(images, labels, num_classes, scope):
+def _tower_loss(images, labels, num_classes, scope, reuse_variables=None):
  """Calculate the total loss on a single tower running the ImageNet model.
  We perform 'batch splitting'. This means that we cut up a batch across
@@ -103,9 +103,10 @@ def _tower_loss(images, labels, num_classes, scope):
  restore_logits = not FLAGS.fine_tune
  # Build inference Graph.
-  logits = inception.inference(images, num_classes, for_training=True,
+  with tf.variable_scope(tf.get_variable_scope(), reuse=reuse_variables):
-                               restore_logits=restore_logits,
+    logits = inception.inference(images, num_classes, for_training=True,
-                               scope=scope)
+                                 restore_logits=restore_logits,
+                                 scope=scope)
  # Build the portion of the Graph calculating the losses. Note that we will
  # assemble the total_loss using a custom function below.
@@ -220,13 +221,14 @@ def train(dataset):
    # Number of classes in the Dataset label set plus 1.
    # Label 0 is reserved for an (unused) background class.
    num_classes = dataset.num_classes() + 1
     # Split the batch of images and labels for towers.
    images_splits = tf.split(0, FLAGS.num_gpus, images)
    labels_splits = tf.split(0, FLAGS.num_gpus, labels)
    # Calculate the gradients for each model tower.
    tower_grads = []
+    reuse_variables = None
    for i in range(FLAGS.num_gpus):
      with tf.device('/gpu:%d' % i):
        with tf.name_scope('%s_%d' % (inception.TOWER_NAME, i)) as scope:
@@ -236,10 +238,10 @@ def train(dataset):
            # function constructs the entire ImageNet model but shares the
            # variables across all towers.
            loss = _tower_loss(images_splits[i], labels_splits[i], num_classes,
-                               scope)
+                               scope, reuse_variables)
          # Reuse variables for the next tower.
-          tf.get_variable_scope().reuse_variables()
+          reuse_variables = True
          # Retain the summaries from the final tower.
          summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)

--- a/inception/inception/slim/losses.py
+++ b/inception/inception/slim/losses.py
@@ -163,8 +163,8 @@ def cross_entropy_loss(logits, one_hot_labels, label_smoothing=0,
      smooth_positives = 1.0 - label_smoothing
      smooth_negatives = label_smoothing / num_classes
      one_hot_labels = one_hot_labels * smooth_positives + smooth_negatives
-    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits,
+    cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits,
-                                                            one_hot_labels,
+                                                            labels=one_hot_labels,
                                                            name='xentropy')
    weight = tf.convert_to_tensor(weight,
                                  dtype=logits.dtype.base_dtype,

--- a/inception/inception/slim/ops.py
+++ b/inception/inception/slim/ops.py
@@ -91,7 +91,7 @@ def batch_norm(inputs,
    if scale:
      gamma = variables.variable('gamma',
                                 params_shape,
-                                 initializer=tf.ones_initializer,
+                                 initializer=tf.ones_initializer(),
                                 trainable=trainable,
                                 restore=restore)
    # Create moving_mean and moving_variance add them to
@@ -105,7 +105,7 @@ def batch_norm(inputs,
                                     collections=moving_collections)
    moving_variance = variables.variable('moving_variance',
                                         params_shape,
-                                         initializer=tf.ones_initializer,
+                                         initializer=tf.ones_initializer(),
                                         trainable=False,
                                         restore=restore,
                                         collections=moving_collections)

--- a/neural_programmer/parameters.py
+++ b/neural_programmer/parameters.py
@@ -72,7 +72,7 @@ class Parameters:
        self.RandomUniformInit([1, embedding_dims]))
    params["break_conditional"] = tf.Variable(
        self.RandomUniformInit([2 * embedding_dims, embedding_dims]))
-    init = tf.initialize_all_variables()
+    init = tf.global_variables_initializer()
    return params, global_step, init
  def RandomUniformInit(self, shape):

--- a/next_frame_prediction/README.md
+++ b/next_frame_prediction/README.md
+<font size=4><b>Visual Dynamics: Probabilistic Future Frame Synthesis via Cross Convolutional Networks.</b></font>
+<b>Introduction</b>
+https://arxiv.org/pdf/1607.02586v1.pdf
+This is an implementation based on my understanding, with small
+variations. It doesn't necessarily represents the paper published
+by the original authors.
+Authors: Xin Pan (Github: panyx0718), Anelia Angelova
+<b>Results:</b>
+<left>
+![Sample1](g3doc/cross_conv.png)
+</left>
+<left>
+![Sample2](g3doc/cross_conv2.png)
+</left>
+<left>
+![Loss](g3doc/cross_conv3.png)
+</left>
+<b>Prerequisite:</b>
+1. Install TensorFlow (r0.12), Bazel.
+2. Download the Sprites dataset or generate moving object dataset.
+Sprites data is located here:
+http://www.scottreed.info/files/nips2015-analogy-data.tar.gz
+Convert .mat files into images and use sprites_gen.py to convert them
+to tf.SequenceExample.
+<b>How to run:</b>
+```shell
+ls -R
+.:
+data  next_frame_prediction  WORKSPACE
+./data:
+tfrecords  tfrecords_test
+./next_frame_prediction:
+cross_conv  g3doc  README.md
+./next_frame_prediction/cross_conv:
+BUILD  eval.py  objects_gen.py  model.py  reader.py  sprites_gen.py  train.py
+./next_frame_prediction/g3doc:
+cross_conv2.png  cross_conv3.png  cross_conv.png
+# Build everything.
+bazel build -c opt next_frame_prediction/...
+# The following example runs the generated 2d objects.
+# For Sprites dataset, image_size should be 60, norm_scale should be 255.0.
+# Batch size is normally 16~64, depending on your memory size.
+#
+# Run training.
+bazel-bin/next_frame_prediction/cross_conv/train \
+  --batch_size=1 \
+  --data_filepattern=data/tfrecords \
+  --image_size=64 \
+  --log_root=/tmp/predict
+step: 1, loss: 24.428671
+step: 2, loss: 19.211605
+step: 3, loss: 5.543143
+step: 4, loss: 3.035339
+step: 5, loss: 1.771392
+step: 6, loss: 2.099824
+step: 7, loss: 1.747665
+step: 8, loss: 1.572436
+step: 9, loss: 1.586816
+step: 10, loss: 1.434191
+#
+# Run eval.
+bazel-bin/next_frame_prediction/cross_conv/eval \
+  --batch_size=1 \
+  --data_filepattern=data/tfrecords_test \
+  --image_size=64 \
+  --log_root=/tmp/predict
+```
--- a/next_frame_prediction/cross_conv/BUILD
+++ b/next_frame_prediction/cross_conv/BUILD
+licenses(["notice"])  # Apache 2.0
+package_group(
+    name = "internal",
+    packages = [
+        "//next_frame_prediction/...",
+    ],
+)
+package(default_visibility = [":internal"])
+py_library(
+    name = "model",
+    srcs = ["model.py"],
+)
+py_library(
+    name = "reader",
+    srcs = ["reader.py"],
+)
+py_binary(
+    name = "train",
+    srcs = ["train.py"],
+    deps = [
+        ":model",
+        ":reader",
+    ],
+)
+py_binary(
+    name = "eval",
+    srcs = ["eval.py"],
+    deps = [
+        ":model",
+        ":reader",
+    ],
+)
+py_binary(
+    name = "example_gen",
+    srcs = ["example_gen.py"],
+)
+py_binary(
+    name = "sprites_gen",
+    srcs = ["sprites_gen.py"],
+)
--- a/next_frame_prediction/cross_conv/eval.py
+++ b/next_frame_prediction/cross_conv/eval.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Eval Cross Convolutional Model."""
+import io
+import os
+import sys
+import time
+import numpy as np
+import tensorflow as tf
+import model as cross_conv_model
+import reader
+FLAGS = tf.flags.FLAGS
+tf.flags.DEFINE_string('log_root', '/tmp/moving_obj', 'The root dir of output.')
+tf.flags.DEFINE_string('data_filepattern',
+                       'est',
+                       'training data file pattern.')
+tf.flags.DEFINE_integer('batch_size', 1, 'Batch size.')
+tf.flags.DEFINE_integer('image_size', 64, 'Image height and width.')
+tf.flags.DEFINE_float('norm_scale', 1.0, 'Normalize the original image')
+tf.flags.DEFINE_float('scale', 10.0,
+                      'Scale the image after norm_scale and move the diff '
+                      'to the positive realm.')
+tf.flags.DEFINE_integer('sequence_length', 2, 'tf.SequenceExample length.')
+tf.flags.DEFINE_integer('eval_batch_count', 100,
+                        'Average the result this number of examples.')
+tf.flags.DEFINE_bool('l2_loss', True, 'If true, include l2_loss.')
+tf.flags.DEFINE_bool('reconstr_loss', False, 'If true, include reconstr_loss.')
+tf.flags.DEFINE_bool('kl_loss', True, 'If true, include KL loss.')
+slim = tf.contrib.slim
+def _Eval():
+  params = dict()
+  params['batch_size'] = FLAGS.batch_size
+  params['seq_len'] = FLAGS.sequence_length
+  params['image_size'] = FLAGS.image_size
+  params['is_training'] = False
+  params['norm_scale'] = FLAGS.norm_scale
+  params['scale'] = FLAGS.scale
+  params['l2_loss'] = FLAGS.l2_loss
+  params['reconstr_loss'] = FLAGS.reconstr_loss
+  params['kl_loss'] = FLAGS.kl_loss
+  eval_dir = os.path.join(FLAGS.log_root, 'eval')
+  images = reader.ReadInput(
+      FLAGS.data_filepattern, shuffle=False, params=params)
+  images *= params['scale']
+  # Increase the value makes training much faster.
+  image_diff_list = reader.SequenceToImageAndDiff(images)
+  model = cross_conv_model.CrossConvModel(image_diff_list, params)
+  model.Build()
+  summary_writer = tf.summary.FileWriter(eval_dir)
+  saver = tf.train.Saver()
+  sess = tf.Session('', config=tf.ConfigProto(allow_soft_placement=True))
+  tf.train.start_queue_runners(sess)
+  while True:
+    time.sleep(60)
+    try:
+      ckpt_state = tf.train.get_checkpoint_state(FLAGS.log_root)
+    except tf.errors.OutOfRangeError as e:
+      sys.stderr.write('Cannot restore checkpoint: %s\n' % e)
+      continue
+    if not (ckpt_state and ckpt_state.model_checkpoint_path):
+      sys.stderr.write('No model to eval yet at %s\n' % FLAGS.log_root)
+      continue
+    sys.stderr.write('Loading checkpoint %s\n' %
+                     ckpt_state.model_checkpoint_path)
+    saver.restore(sess, ckpt_state.model_checkpoint_path)
+    # Use the empirical distribution of z from training set.
+    if not tf.gfile.Exists(os.path.join(FLAGS.log_root, 'z_mean.npy')):
+      sys.stderr.write('No z at %s\n' % FLAGS.log_root)
+      continue
+    with tf.gfile.Open(os.path.join(FLAGS.log_root, 'z_mean.npy')) as f:
+      sample_z_mean = np.load(io.BytesIO(f.read()))
+    with tf.gfile.Open(
+        os.path.join(FLAGS.log_root, 'z_stddev_log.npy')) as f:
+      sample_z_stddev_log = np.load(io.BytesIO(f.read()))
+    total_loss = 0.0
+    for _ in xrange(FLAGS.eval_batch_count):
+      loss_val, total_steps, summaries = sess.run(
+          [model.loss, model.global_step, model.summary_op],
+          feed_dict={model.z_mean: sample_z_mean,
+                     model.z_stddev_log: sample_z_stddev_log})
+      total_loss += loss_val
+    summary_writer.add_summary(summaries, total_steps)
+    sys.stderr.write('steps: %d, loss: %f\n' %
+                     (total_steps, total_loss / FLAGS.eval_batch_count))
+def main(_):
+  _Eval()
+if __name__ == '__main__':
+  tf.app.run()
--- a/next_frame_prediction/cross_conv/example_gen.py
+++ b/next_frame_prediction/cross_conv/example_gen.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Generate examples of two objects moving in different directions."""
+import random
+import sys
+import numpy as np
+import tensorflow as tf
+tf.flags.DEFINE_string('out_file', '',
+                       'Output file for the tfrecords.')
+def _add_object(obj_type, image, image2, xpos, ypos):
+  """Add a moving obj to two consecutive images."""
+  obj_size = random.randint(8, 10)
+  channel = random.randint(0, 2)
+  move = random.randint(6, 10)
+  obj = np.zeros([obj_size, obj_size, 3])
+  if obj_type == 'rectangle':
+    xpos2 = xpos + move
+    ypos2 = ypos
+    for i in xrange(obj_size):
+      obj[i, 0:i+1, channel] = [1.0 for _ in xrange(i+1)]
+  elif obj_type == 'square':
+    xpos2 = xpos
+    ypos2 = ypos + move
+    obj[:, :, channel] = 1.0
+  for x in xrange(obj_size):
+    for y in xrange(obj_size):
+      if obj[x, y, channel] == 1.0:
+        image[xpos+x, ypos+y, channel] = 1.0
+        image2[xpos2+x, ypos2+y, channel] = 1.0
+def _images_to_example(image, image2):
+  """Convert two consecutive images to SequenceExample."""
+  example = tf.SequenceExample()
+  feature_list = example.feature_lists.feature_list['moving_objs']
+  feature = feature_list.feature.add()
+  feature.float_list.value.extend(np.reshape(image, [-1]).tolist())
+  feature = feature_list.feature.add()
+  feature.float_list.value.extend(np.reshape(image2, [-1]).tolist())
+  return example
+def generate_input():
+  """Generate tfrecords."""
+  writer = tf.python_io.TFRecordWriter(tf.flags.FLAGS.out_file)
+  writer2 = tf.python_io.TFRecordWriter(tf.flags.FLAGS.out_file + '_test')
+  examples = []
+  for xpos in xrange(0, 40, 3):
+    for ypos in xrange(0, 40, 3):
+      for xpos2 in xrange(0, 40, 3):
+        for ypos2 in xrange(0, 40, 3):
+          image = np.zeros([64, 64, 3])
+          image2 = np.zeros([64, 64, 3])
+          _add_object('rectangle', image, image2, xpos, ypos)
+          _add_object('square', image, image2, xpos2, ypos2)
+          examples.append(_images_to_example(image, image2))
+  sys.stderr.write('Finish generating examples.\n')
+  random.shuffle(examples)
+  for count, ex in enumerate(examples):
+    if count % 10 == 0:
+      writer2.write(ex.SerializeToString())
+    else:
+      writer.write(ex.SerializeToString())
+def main(_):
+  generate_input()
+if __name__ == '__main__':
+  tf.app.run()
--- a/next_frame_prediction/cross_conv/model.py
+++ b/next_frame_prediction/cross_conv/model.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Cross Convolutional Model.
+https://arxiv.org/pdf/1607.02586v1.pdf
+"""
+import math
+import sys
+import tensorflow as tf
+slim = tf.contrib.slim
+class CrossConvModel(object):
+  def __init__(self, image_diff_list, params):
+    """Constructor.
+    Args:
+      image_diff_list: A list of (image, diff) tuples, with shape
+          [batch_size, image_size, image_size, 3] and image_sizes as
+          [32, 64, 128, 256].
+      params: Dict of parameters.
+    """
+    self.images = [i for (i, _) in image_diff_list]
+    # Move the diff to the positive realm.
+    self.diffs = [(d + params['scale']) / 2 for (i, d) in image_diff_list]
+    self.params = params
+  def Build(self):
+    with tf.device('/gpu:0'):
+      with slim.arg_scope([slim.conv2d],
+                          activation_fn=tf.nn.relu,
+                          normalizer_fn=slim.batch_norm,
+                          normalizer_params={'is_training':
+                                             self.params['is_training']}):
+        self._BuildMotionKernel()
+        encoded_images = self._BuildImageEncoder()
+        cross_conved_images = self._CrossConv(encoded_images)
+        self._BuildImageDecoder(cross_conved_images)
+        self._BuildLoss()
+      image = self.images[1]
+      diff = self.diffs[1]
+      self.global_step = tf.Variable(0, name='global_step', trainable=False)
+      if self.params['is_training']:
+        self._BuildTrainOp()
+      diff = diff * 2.0 - self.params['scale']
+      diff_output = self.diff_output * 2.0 - self.params['scale']
+      concat_image = tf.concat(
+          1, [image, image + diff_output, image + diff, diff_output])
+      tf.summary.image('origin_predict_expect_predictdiff', concat_image)
+      self.summary_op = tf.summary.merge_all()
+      return self.loss
+  def _BuildTrainOp(self):
+    lrn_rate = tf.maximum(
+        0.01,  # min_lr_rate.
+        tf.train.exponential_decay(
+            self.params['learning_rate'], self.global_step, 10000, 0.5))
+    tf.summary.scalar('learning rate', lrn_rate)
+    optimizer = tf.train.GradientDescentOptimizer(lrn_rate)
+    self.train_op = slim.learning.create_train_op(
+        self.loss, optimizer, global_step=self.global_step)
+  def _BuildLoss(self):
+    # 1. reconstr_loss seems doesn't do better than l2 loss.
+    # 2. Only works when using reduce_mean. reduce_sum doesn't work.
+    # 3. It seems kl loss doesn't play an important role.
+    self.loss = 0
+    with tf.variable_scope('loss'):
+      if self.params['l2_loss']:
+        l2_loss = tf.reduce_mean(tf.square(self.diff_output - self.diffs[1]))
+        tf.summary.scalar('l2_loss', l2_loss)
+        self.loss += l2_loss
+      if self.params['reconstr_loss']:
+        reconstr_loss = (-tf.reduce_mean(
+            self.diffs[1] * (1e-10 + self.diff_output) +
+            (1-self.diffs[1]) * tf.log(1e-10 + 1 - self.diff_output)))
+        reconstr_loss = tf.check_numerics(reconstr_loss, 'reconstr_loss')
+        tf.summary.scalar('reconstr_loss', reconstr_loss)
+        self.loss += reconstr_loss
+      if self.params['kl_loss']:
+        kl_loss = (0.5 * tf.reduce_mean(
+            tf.square(self.z_mean) + tf.square(self.z_stddev) -
+            2 * self.z_stddev_log - 1))
+        tf.summary.scalar('kl_loss', kl_loss)
+        self.loss += kl_loss
+      tf.summary.scalar('loss', self.loss)
+  def _BuildMotionKernel(self):
+    image = self.images[-2]
+    diff = self.diffs[-2]
+    shape = image.get_shape().as_list()
+    assert shape[1] == shape[2] and shape[1] == 128
+    batch_size = shape[0]
+    net = tf.concat(3, [image, diff])
+    with tf.variable_scope('motion_encoder'):
+      with slim.arg_scope([slim.conv2d], padding='VALID'):
+        net = slim.conv2d(net, 96, [5, 5], stride=1)
+        net = slim.max_pool2d(net, [2, 2])
+        net = slim.conv2d(net, 96, [5, 5], stride=1)
+        net = slim.max_pool2d(net, [2, 2])
+        net = slim.conv2d(net, 128, [5, 5], stride=1)
+        net = slim.conv2d(net, 128, [5, 5], stride=1)
+        net = slim.max_pool2d(net, [2, 2])
+        net = slim.conv2d(net, 256, [4, 4], stride=1)
+        net = slim.conv2d(net, 256, [3, 3], stride=1)
+        z = tf.reshape(net, shape=[batch_size, -1])
+        self.z_mean, self.z_stddev_log = tf.split(
+            split_dim=1, num_split=2, value=z)
+        self.z_stddev = tf.exp(self.z_stddev_log)
+        epsilon = tf.random_normal(
+            self.z_mean.get_shape().as_list(), 0, 1, dtype=tf.float32)
+        kernel = self.z_mean + tf.multiply(self.z_stddev, epsilon)
+        width = int(math.sqrt(kernel.get_shape().as_list()[1] // 128))
+        kernel = tf.reshape(kernel, [batch_size, width, width, 128])
+    with tf.variable_scope('kernel_decoder'):
+      with slim.arg_scope([slim.conv2d], padding='SAME'):
+        kernel = slim.conv2d(kernel, 128, [5, 5], stride=1)
+        self.kernel = slim.conv2d(kernel, 128, [5, 5], stride=1)
+    sys.stderr.write('kernel shape: %s\n' % kernel.get_shape())
+  def _BuildImageEncoder(self):
+    feature_maps = []
+    for (i, image) in enumerate(self.images):
+      with tf.variable_scope('image_encoder_%d' % i):
+        with slim.arg_scope([slim.conv2d, slim.max_pool2d], padding='SAME'):
+          net = slim.conv2d(image, 64, [5, 5], stride=1)
+          net = slim.conv2d(net, 64, [5, 5], stride=1)
+          net = slim.max_pool2d(net, [5, 5])
+          net = slim.conv2d(net, 64, [5, 5], stride=1)
+          net = slim.conv2d(net, 32, [5, 5], stride=1)
+          net = slim.max_pool2d(net, [2, 2])
+      sys.stderr.write('image_conv shape: %s\n' % net.get_shape())
+      feature_maps.append(net)
+    return feature_maps
+  def _CrossConvHelper(self, encoded_image, kernel):
+    """Cross Convolution.
+      The encoded image and kernel are of the same shape. Namely
+      [batch_size, image_size, image_size, channels]. They are split
+      into [image_size, image_size] image squares [kernel_size, kernel_size]
+      kernel squares. kernel squares are used to convolute image squares.
+    """
+    images = tf.expand_dims(encoded_image, 0)
+    kernels = tf.expand_dims(kernel, 3)
+    return tf.nn.depthwise_conv2d(images, kernels, [1, 1, 1, 1], 'SAME')
+  def _CrossConv(self, encoded_images):
+    """Apply the motion kernel on the encoded_images."""
+    cross_conved_images = []
+    kernels = tf.split(split_dim=3, num_split=4, value=self.kernel)
+    for (i, encoded_image) in enumerate(encoded_images):
+      with tf.variable_scope('cross_conv_%d' % i):
+        kernel = kernels[i]
+        encoded_image = tf.unstack(encoded_image, axis=0)
+        kernel = tf.unstack(kernel, axis=0)
+        assert len(encoded_image) == len(kernel)
+        assert len(encoded_image) == self.params['batch_size']
+        conved_image = []
+        for j in xrange(len(encoded_image)):
+          conved_image.append(self._CrossConvHelper(
+              encoded_image[j], kernel[j]))
+        cross_conved_images.append(tf.concat(0, conved_image))
+        sys.stderr.write('cross_conved shape: %s\n' %
+                         cross_conved_images[-1].get_shape())
+    return cross_conved_images
+  def _Deconv(self, net, out_filters, kernel_size, stride):
+    shape = net.get_shape().as_list()
+    in_filters = shape[3]
+    kernel_shape = [kernel_size, kernel_size, out_filters, in_filters]
+    weights = tf.get_variable(
+        name='weights',
+        shape=kernel_shape,
+        dtype=tf.float32,
+        initializer=tf.truncated_normal_initializer(stddev=0.01))
+    out_height = shape[1] * stride
+    out_width = shape[2] * stride
+    batch_size = shape[0]
+    output_shape = [batch_size, out_height, out_width, out_filters]
+    net = tf.nn.conv2d_transpose(net, weights, output_shape,
+                                 [1, stride, stride, 1], padding='SAME')
+    slim.batch_norm(net)
+    return net
+  def _BuildImageDecoder(self, cross_conved_images):
+    """Decode the cross_conved feature maps into the predicted images."""
+    nets = []
+    for i, cross_conved_image in enumerate(cross_conved_images):
+      with tf.variable_scope('image_decoder_%d' % i):
+        stride = 64 / cross_conved_image.get_shape().as_list()[1]
+        # TODO(xpan): Alternative solution for upsampling?
+        nets.append(self._Deconv(
+            cross_conved_image, 64, kernel_size=3, stride=stride))
+    net = tf.concat(3, nets)
+    net = slim.conv2d(net, 128, [9, 9], padding='SAME', stride=1)
+    net = slim.conv2d(net, 128, [1, 1], padding='SAME', stride=1)
+    net = slim.conv2d(net, 3, [1, 1], padding='SAME', stride=1)
+    self.diff_output = net
+    sys.stderr.write('diff_output shape: %s\n' % self.diff_output.get_shape())
--- a/next_frame_prediction/cross_conv/reader.py
+++ b/next_frame_prediction/cross_conv/reader.py
+# Copyright 2016 The TensorFlow Authors All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Read image sequence."""
+import tensorflow as tf
+def SequenceToImageAndDiff(images):
+  """Convert image sequence batch into image and diff batch.
+    Each image pair is converted to the first image and their diff.
+    Batch size will increase if sequence length is larger than 2.
+  Args:
+    images: Image sequence with shape
+        [batch_size, seq_len, image_size, image_size, channel]
+  Returns:
+    the list of (image, diff) tuples with shape
+        [batch_size2, image_size, image_size, channel]. image_sizes are
+        [32, 64, 128, 256].
+  """
+  image_diff_list = []
+  image_seq = tf.unstack(images, axis=1)
+  for size in [32, 64, 128, 256]:
+    resized_images = [
+        tf.image.resize_images(i, [size, size]) for i in image_seq]
+    diffs = []
+    for i in xrange(0, len(resized_images)-1):
+      diffs.append(resized_images[i+1] - resized_images[i])
+    image_diff_list.append(
+        (tf.concat(0, resized_images[:-1]), tf.concat(0, diffs)))
+  return image_diff_list
+def ReadInput(data_filepattern, shuffle, params):
+  """Read the tf.SequenceExample tfrecord files.
+  Args:
+    data_filepattern: tf.SequenceExample tfrecord filepattern.
+    shuffle: Whether to shuffle the examples.
+    params: parameter dict.
+  Returns:
+    image sequence batch [batch_size, seq_len, image_size, image_size, channel].
+  """
+  image_size = params['image_size']
+  filenames = tf.gfile.Glob(data_filepattern)
+  filename_queue = tf.train.string_input_producer(filenames, shuffle=shuffle)
+  reader = tf.TFRecordReader()
+  _, example = reader.read(filename_queue)
+  feature_sepc = {
+      'moving_objs': tf.FixedLenSequenceFeature(
+          shape=[image_size * image_size * 3], dtype=tf.float32)}
+  _, features = tf.parse_single_sequence_example(
+      example, sequence_features=feature_sepc)
+  moving_objs = tf.reshape(
+      features['moving_objs'], [params['seq_len'], image_size, image_size, 3])
+  if shuffle:
+    examples = tf.train.shuffle_batch(
+        [moving_objs],
+        batch_size=params['batch_size'],
+        num_threads=64,
+        capacity=params['batch_size'] * 100,
+        min_after_dequeue=params['batch_size'] * 4)
+  else:
+    examples = tf.train.batch([moving_objs],
+                              batch_size=params['batch_size'],
+                              num_threads=16,
+                              capacity=params['batch_size'])
+  examples /= params['norm_scale']
+  return examples