Merge pull request #1 from tensorflow/master

update to tensorflow/model master

Merge pull request #1 from tensorflow/master
update to tensorflow/model master
68a18b70 · Toby Boyd · GitHub · bc70271a · 2c4fea8d · 68a18b70
Commit 68a18b70 authored Jun 08, 2017 by Toby Boyd Committed by GitHub Jun 08, 2017
20 changed files
--- a/neural_gpu/wmt_utils.py
+++ b/neural_gpu/wmt_utils.py
@@ -60,7 +60,7 @@ def maybe_download(directory, filename, url):
    print "Downloading %s to %s" % (url, filepath)
    filepath, _ = urllib.request.urlretrieve(url, filepath)
    statinfo = os.stat(filepath)
-    print "Succesfully downloaded", filename, statinfo.st_size, "bytes"
+    print "Successfully downloaded", filename, statinfo.st_size, "bytes"
  return filepath

--- a/neural_programmer/data_utils.py
+++ b/neural_programmer/data_utils.py
@@ -223,7 +223,7 @@ def list_join(a):
 def group_by_max(table, number):
-  #computes the most frequently occuring entry in a column
+  #computes the most frequently occurring entry in a column
  answer = []
  for i in range(len(table)):
    temp = []

--- a/neural_programmer/model.py
+++ b/neural_programmer/model.py
@@ -121,21 +121,21 @@ class Graph():
      if (self.utility.FLAGS.rnn_dropout > 0.0):
        question_hidden = question_hidden * rnn_dropout_mask
      hidden_vectors.append(tf.expand_dims(question_hidden, 0))
-    hidden_vectors = tf.concat(0, hidden_vectors)
+    hidden_vectors = tf.concat(axis=0, values=hidden_vectors)
    return question_hidden, hidden_vectors
  def history_recurrent_step(self, curr_hprev, hprev):
    #A single RNN step for controller or history RNN
    return tf.tanh(
        tf.matmul(
-            tf.concat(1, [hprev, curr_hprev]), self.params[
+            tf.concat(axis=1, values=[hprev, curr_hprev]), self.params[
                "history_recurrent"])) + self.params["history_recurrent_bias"]
  def question_number_softmax(self, hidden_vectors):
    #Attention on quetsion to decide the question number to passed to comparison ops
    def compute_ans(op_embedding, comparison):
      op_embedding = tf.expand_dims(op_embedding, 0)
-      #dot product of operation embedding with hidden state to the left of the number occurence
+      #dot product of operation embedding with hidden state to the left of the number occurrence
      first = tf.transpose(
          tf.matmul(op_embedding,
                    tf.transpose(
@@ -150,13 +150,13 @@ class Graph():
                            tf.expand_dims(
                                tf.transpose(self.batch_ordinal_question_one), 2
                            ), [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
-      question_number_softmax = tf.nn.softmax(tf.concat(1, [first, second]))
+      question_number_softmax = tf.nn.softmax(tf.concat(axis=1, values=[first, second]))
      if (self.mode == "test"):
        cond = tf.equal(question_number_softmax,
                        tf.reshape(
                            tf.reduce_max(question_number_softmax, 1),
                            [self.batch_size, 1]))
-        question_number_softmax = tf.select(
+        question_number_softmax = tf.where(
            cond,
            tf.fill(tf.shape(question_number_softmax), 1.0),
            tf.fill(tf.shape(question_number_softmax), 0.0))
@@ -164,7 +164,7 @@ class Graph():
                                          self.data_type)
      ans = tf.reshape(
          tf.reduce_sum(question_number_softmax * tf.concat(
-              1, [self.batch_question_number, self.batch_question_number_one]),
+              axis=1, values=[self.batch_question_number, self.batch_question_number_one]),
                        1), [self.batch_size, 1])
      return ans
@@ -225,7 +225,7 @@ class Graph():
    column_controller_vector = nn_utils.apply_dropout(
        column_controller_vector, self.utility.FLAGS.dropout, self.mode)
    self.full_column_hidden_vectors = tf.concat(
-        1, [self.column_hidden_vectors, self.word_column_hidden_vectors])
+        axis=1, values=[self.column_hidden_vectors, self.word_column_hidden_vectors])
    self.full_column_hidden_vectors += self.summary_text_entry_embeddings
    self.full_column_hidden_vectors = nn_utils.apply_dropout(
        self.full_column_hidden_vectors, self.utility.FLAGS.dropout, self.mode)
@@ -258,7 +258,7 @@ class Graph():
          temp_ans.append(curr_prob)
        else:
          temp_ans.append(tf.zeros_like(curr_prob))
-      temp_ans = tf.transpose(tf.concat(0, temp_ans))
+      temp_ans = tf.transpose(tf.concat(axis=0, values=temp_ans))
      answer += temp_ans
    return answer
@@ -266,7 +266,7 @@ class Graph():
    #converts soft selection to hard selection. used at test time
    cond = tf.equal(
        softmax, tf.reshape(tf.reduce_max(softmax, 1), [self.batch_size, 1]))
-    softmax = tf.select(
+    softmax = tf.where(
        cond, tf.fill(tf.shape(softmax), 1.0), tf.fill(tf.shape(softmax), 0.0))
    softmax = tf.cast(softmax, self.data_type)
    return softmax
@@ -297,7 +297,7 @@ class Graph():
      curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2)
      curr_prob = curr_prob * tf.expand_dims(
          tf.cast((1 - sum_prob) > 0.0, self.data_type), 2)
-      answer = tf.select(select_mask, curr_prob, answer)
+      answer = tf.where(select_mask, curr_prob, answer)
      sum_prob += tf.reduce_sum(curr_prob, 2)
    return answer
@@ -335,11 +335,11 @@ class Graph():
                               1)  #BS * max_elements
    select_min = tf.reduce_sum(init_min * select_full_column_softmax,
                               1)  #BS * max_elements
-    select_prev = tf.concat(1, [
+    select_prev = tf.concat(axis=1, values=[
        tf.slice(select, [0, 1], [self.batch_size, self.max_elements - 1]),
        tf.cast(tf.zeros([self.batch_size, 1]), self.data_type)
    ])
-    select_next = tf.concat(1, [
+    select_next = tf.concat(axis=1, values=[
        tf.cast(tf.zeros([self.batch_size, 1]), self.data_type), tf.slice(
            select, [0, 0], [self.batch_size, self.max_elements - 1])
    ])
@@ -352,11 +352,11 @@ class Graph():
    length_content = 1
    length_select = 13
    length_print = 1
-    values = tf.concat(1, [count])
+    values = tf.concat(axis=1, values=[count])
    softmax_content = tf.slice(softmax, [0, 0],
                               [self.batch_size, length_content])
    #compute scalar output
-    output = tf.reduce_sum(tf.mul(softmax_content, values), 1)
+    output = tf.reduce_sum(tf.multiply(softmax_content, values), 1)
    #compute lookup answer
    softmax_print = tf.slice(softmax, [0, length_content + length_select],
                             [self.batch_size, length_print])
@@ -384,7 +384,7 @@ class Graph():
    ]
    select = tf.reduce_sum(
        tf.tile(tf.expand_dims(softmax_select, 2), [1, 1, self.max_elements]) *
-        tf.concat(1, select_lists), 1)
+        tf.concat(axis=1, values=select_lists), 1)
    select = select * self.select_whole_mask
    return output, select
@@ -396,11 +396,11 @@ class Graph():
        self.batch_question_attention_mask)  #batch_size * embedding_dims
    controller_vector = tf.nn.relu(
        tf.matmul(hprev, self.params["controller_prev"]) + tf.matmul(
-            tf.concat(1, [question_embedding, attention_vector]), self.params[
+            tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[
                "controller"]))
    column_controller_vector = tf.nn.relu(
        tf.matmul(hprev, self.params["column_controller_prev"]) + tf.matmul(
-            tf.concat(1, [question_embedding, attention_vector]), self.params[
+            tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[
                "column_controller"]))
    controller_vector = nn_utils.apply_dropout(
        controller_vector, self.utility.FLAGS.dropout, self.mode)
@@ -413,7 +413,7 @@ class Graph():
        tf.matmul(tf.transpose(self.params_unit), tf.transpose(softmax)))
    column_controller_vector = tf.nn.relu(
        tf.matmul(
-            tf.concat(1, [
+            tf.concat(axis=1, values=[
                column_controller_vector, weighted_op_representation
            ]), self.params["break_conditional"]))
    full_column_softmax = self.compute_column_softmax(column_controller_vector,
@@ -429,7 +429,7 @@ class Graph():
  def compute_lookup_error(self, val):
    #computes lookup error.
    cond = tf.equal(self.batch_print_answer, val)
-    inter = tf.select(
+    inter = tf.where(
        cond, self.init_print_error,
        tf.tile(
            tf.reshape(tf.constant(1e10, self.data_type), [1, 1, 1]), [
@@ -450,12 +450,12 @@ class Graph():
  def error_computation(self):
    #computes the error of each example in a batch
-    math_error = 0.5 * tf.square(tf.sub(self.scalar_output, self.batch_answer))
+    math_error = 0.5 * tf.square(tf.subtract(self.scalar_output, self.batch_answer))
    #scale math error
    math_error = math_error / self.rows
    math_error = tf.minimum(math_error, self.utility.FLAGS.max_math_error *
                            tf.ones(tf.shape(math_error), self.data_type))
-    self.init_print_error = tf.select(
+    self.init_print_error = tf.where(
        self.batch_gold_select, -1 * tf.log(self.batch_lookup_answer + 1e-300 +
                                            self.invert_select_full_mask), -1 *
        tf.log(1 - self.batch_lookup_answer)) * self.select_full_mask
@@ -466,24 +466,24 @@ class Graph():
      print_error += self.compute_lookup_error(val + 0.0)
    print_error = print_error * self.utility.FLAGS.print_cost / self.num_entries
    if (self.mode == "train"):
-      error = tf.select(
+      error = tf.where(
          tf.logical_and(
              tf.not_equal(self.batch_answer, 0.0),
              tf.not_equal(
                  tf.reduce_sum(tf.reduce_sum(self.batch_print_answer, 1), 1),
                  0.0)),
          self.soft_min(math_error, print_error),
-          tf.select(
+          tf.where(
              tf.not_equal(self.batch_answer, 0.0), math_error, print_error))
    else:
-      error = tf.select(
+      error = tf.where(
          tf.logical_and(
              tf.equal(self.scalar_output, 0.0),
              tf.equal(
                  tf.reduce_sum(tf.reduce_sum(self.batch_lookup_answer, 1), 1),
                  0.0)),
          tf.ones_like(math_error),
-          tf.select(
+          tf.where(
              tf.equal(self.scalar_output, 0.0), print_error, math_error))
    return error
@@ -558,7 +558,7 @@ class Graph():
      input_col = tf.reduce_sum(
          tf.expand_dims(soft_column_softmax, 2) *
          self.full_column_hidden_vectors, 1)
-      history_input = tf.concat(1, [input_op, input_col])
+      history_input = tf.concat(axis=1, values=[input_op, input_col])
      history_input = nn_utils.apply_dropout(
          history_input, self.utility.FLAGS.dropout, self.mode)
      hprev = self.history_recurrent_step(history_input, hprev)
@@ -567,7 +567,7 @@ class Graph():
    self.scalar_output = output
    error = self.error_computation()
    cond = tf.less(error, 0.0001, name="cond")
-    correct_add = tf.select(
+    correct_add = tf.where(
        cond, tf.fill(tf.shape(cond), 1.0), tf.fill(tf.shape(cond), 0.0))
    correct = tf.reduce_sum(correct_add)
    error = error / batch_size
@@ -579,11 +579,11 @@ class Graph():
    #Sets mask variables and performs batch processing
    self.batch_gold_select = self.batch_print_answer > 0.0
    self.full_column_mask = tf.concat(
-        1, [self.batch_number_column_mask, self.batch_word_column_mask])
+        axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask])
    self.full_processed_column = tf.concat(
-        1,
+        axis=1,
-        [self.batch_processed_number_column, self.batch_processed_word_column])
+        values=[self.batch_processed_number_column, self.batch_processed_word_column])
-    self.full_processed_sorted_index_column = tf.concat(1, [
+    self.full_processed_sorted_index_column = tf.concat(axis=1, values=[
        self.batch_processed_sorted_index_number_column,
        self.batch_processed_sorted_index_word_column
    ])
@@ -603,7 +603,7 @@ class Graph():
            tf.equal(self.batch_word_column_entry_mask,
                     self.utility.dummy_token_id)), self.data_type)
    self.select_full_mask = tf.concat(
-        1, [self.select_mask, self.select_word_mask])
+        axis=1, values=[self.select_mask, self.select_word_mask])
    self.select_whole_mask = tf.maximum(
        tf.reshape(
            tf.slice(self.select_mask, [0, 0, 0],
@@ -614,7 +614,7 @@ class Graph():
                     [self.batch_size, 1, self.max_elements]),
            [self.batch_size, self.max_elements]))
    self.invert_select_full_mask = tf.cast(
-        tf.concat(1, [
+        tf.concat(axis=1, values=[
            tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int),
            tf.equal(self.batch_word_column_entry_mask,
                     self.utility.dummy_token_id)

--- a/neural_programmer/neural_programmer.py
+++ b/neural_programmer/neural_programmer.py
--- a/neural_programmer/nn_utils.py
+++ b/neural_programmer/nn_utils.py
--- a/neural_programmer/parameters.py
+++ b/neural_programmer/parameters.py
--- a/neural_programmer/wiki_data.py
+++ b/neural_programmer/wiki_data.py
--- a/next_frame_prediction/README.md
+++ b/next_frame_prediction/README.md
@@ -12,17 +12,11 @@ Authors: Xin Pan (Github: panyx0718), Anelia Angelova
 <b>Results:</b>
-<left>
 ![Sample1](g3doc/cross_conv.png)
-</left>
-<left>
 ![Sample2](g3doc/cross_conv2.png)
-</left>
-<left>
 ![Loss](g3doc/cross_conv3.png)
-</left>
 <b>Prerequisite:</b>
@@ -40,7 +34,7 @@ to tf.SequenceExample.
 <b>How to run:</b>
 ```shell
-ls -R
+$ ls -R
 .:
 data  next_frame_prediction  WORKSPACE
@@ -58,18 +52,18 @@ cross_conv2.png  cross_conv3.png  cross_conv.png
 # Build everything.
-bazel build -c opt next_frame_prediction/...
+$ bazel build -c opt next_frame_prediction/...
 # The following example runs the generated 2d objects.
 # For Sprites dataset, image_size should be 60, norm_scale should be 255.0.
 # Batch size is normally 16~64, depending on your memory size.
-#
 # Run training.
-bazel-bin/next_frame_prediction/cross_conv/train \
+$ bazel-bin/next_frame_prediction/cross_conv/train \
-  --batch_size=1 \
+    --batch_size=1 \
-  --data_filepattern=data/tfrecords \
+    --data_filepattern=data/tfrecords \
-  --image_size=64 \
+    --image_size=64 \
-  --log_root=/tmp/predict
+    --log_root=/tmp/predict
 step: 1, loss: 24.428671
 step: 2, loss: 19.211605
@@ -81,11 +75,11 @@ step: 7, loss: 1.747665
 step: 8, loss: 1.572436
 step: 9, loss: 1.586816
 step: 10, loss: 1.434191
-#
 # Run eval.
-bazel-bin/next_frame_prediction/cross_conv/eval \
+$ bazel-bin/next_frame_prediction/cross_conv/eval \
-  --batch_size=1 \
+    --batch_size=1 \
-  --data_filepattern=data/tfrecords_test \
+    --data_filepattern=data/tfrecords_test \
-  --image_size=64 \
+    --image_size=64 \
-  --log_root=/tmp/predict
+    --log_root=/tmp/predict
 ```
--- a/next_frame_prediction/cross_conv/model.py
+++ b/next_frame_prediction/cross_conv/model.py
@@ -65,7 +65,7 @@ class CrossConvModel(object):
      diff = diff * 2.0 - self.params['scale']
      diff_output = self.diff_output * 2.0 - self.params['scale']
      concat_image = tf.concat(
-          1, [image, image + diff_output, image + diff, diff_output])
+          axis=1, values=[image, image + diff_output, image + diff, diff_output])
      tf.summary.image('origin_predict_expect_predictdiff', concat_image)
      self.summary_op = tf.summary.merge_all()
      return self.loss
@@ -113,7 +113,7 @@ class CrossConvModel(object):
    assert shape[1] == shape[2] and shape[1] == 128
    batch_size = shape[0]
-    net = tf.concat(3, [image, diff])
+    net = tf.concat(axis=3, values=[image, diff])
    with tf.variable_scope('motion_encoder'):
      with slim.arg_scope([slim.conv2d], padding='VALID'):
        net = slim.conv2d(net, 96, [5, 5], stride=1)
@@ -128,7 +128,7 @@ class CrossConvModel(object):
        z = tf.reshape(net, shape=[batch_size, -1])
        self.z_mean, self.z_stddev_log = tf.split(
-            split_dim=1, num_split=2, value=z)
+            axis=1, num_or_size_splits=2, value=z)
        self.z_stddev = tf.exp(self.z_stddev_log)
        epsilon = tf.random_normal(
@@ -174,7 +174,7 @@ class CrossConvModel(object):
  def _CrossConv(self, encoded_images):
    """Apply the motion kernel on the encoded_images."""
    cross_conved_images = []
-    kernels = tf.split(split_dim=3, num_split=4, value=self.kernel)
+    kernels = tf.split(axis=3, num_or_size_splits=4, value=self.kernel)
    for (i, encoded_image) in enumerate(encoded_images):
      with tf.variable_scope('cross_conv_%d' % i):
        kernel = kernels[i]
@@ -187,7 +187,7 @@ class CrossConvModel(object):
        for j in xrange(len(encoded_image)):
          conved_image.append(self._CrossConvHelper(
              encoded_image[j], kernel[j]))
-        cross_conved_images.append(tf.concat(0, conved_image))
+        cross_conved_images.append(tf.concat(axis=0, values=conved_image))
        sys.stderr.write('cross_conved shape: %s\n' %
                         cross_conved_images[-1].get_shape())
    return cross_conved_images
@@ -224,7 +224,7 @@ class CrossConvModel(object):
        nets.append(self._Deconv(
            cross_conved_image, 64, kernel_size=3, stride=stride))
-    net = tf.concat(3, nets)
+    net = tf.concat(axis=3, values=nets)
    net = slim.conv2d(net, 128, [9, 9], padding='SAME', stride=1)
    net = slim.conv2d(net, 128, [1, 1], padding='SAME', stride=1)
    net = slim.conv2d(net, 3, [1, 1], padding='SAME', stride=1)

--- a/next_frame_prediction/cross_conv/reader.py
+++ b/next_frame_prediction/cross_conv/reader.py
@@ -42,7 +42,7 @@ def SequenceToImageAndDiff(images):
    for i in xrange(0, len(resized_images)-1):
      diffs.append(resized_images[i+1] - resized_images[i])
    image_diff_list.append(
-        (tf.concat(0, resized_images[:-1]), tf.concat(0, diffs)))
+        (tf.concat(axis=0, values=resized_images[:-1]), tf.concat(axis=0, values=diffs)))
  return image_diff_list

--- a/real_nvp/real_nvp_multiscale_dataset.py
+++ b/real_nvp/real_nvp_multiscale_dataset.py
@@ -332,7 +332,7 @@ def masked_conv_aff_coupling(input_, mask_in, dim, name,
                     residual_blocks=residual_blocks,
                     bottleneck=bottleneck, skip=skip)
        mask = tf.mod(mask_channel + mask, 2)
-        res = tf.split(res, 2, 3)
+        res = tf.split(axis=3, num_or_size_splits=2, value=res)
        shift, log_rescaling = res[-2], res[-1]
        scale = variable_on_cpu(
            "rescaling_scale", [],
@@ -486,9 +486,9 @@ def conv_ch_aff_coupling(input_, dim, name,
            scope.reuse_variables()
        if change_bottom:
-            input_, canvas = tf.split(input_, 2, 3)
+            input_, canvas = tf.split(axis=3, num_or_size_splits=2, value=input_)
        else:
-            canvas, input_ = tf.split(input_, 2, 3)
+            canvas, input_ = tf.split(axis=3, num_or_size_splits=2, value=input_)
        shape = input_.get_shape().as_list()
        batch_size = shape[0]
        height = shape[1]
@@ -509,7 +509,7 @@ def conv_ch_aff_coupling(input_, dim, name,
                     train=train, weight_norm=weight_norm,
                     residual_blocks=residual_blocks,
                     bottleneck=bottleneck, skip=skip)
-        shift, log_rescaling = tf.split(res, 2, 3)
+        shift, log_rescaling = tf.split(axis=3, num_or_size_splits=2, value=res)
        scale = variable_on_cpu(
            "scale", [],
            tf.constant_initializer(1.))
@@ -570,9 +570,9 @@ def conv_ch_add_coupling(input_, dim, name,
            scope.reuse_variables()
        if change_bottom:
-            input_, canvas = tf.split(input_, 2, 3)
+            input_, canvas = tf.split(axis=3, num_or_size_splits=2, value=input_)
        else:
-            canvas, input_ = tf.split(input_, 2, 3)
+            canvas, input_ = tf.split(axis=3, num_or_size_splits=2, value=input_)
        shape = input_.get_shape().as_list()
        channels = shape[3]
        res = input_
@@ -736,8 +736,8 @@ def rec_masked_conv_coupling(input_, hps, scale_idx, n_scale,
                log_diff_1 = log_diff[:, :, :, :channels]
                log_diff_2 = log_diff[:, :, :, channels:]
            else:
-                res_1, res_2 = tf.split(res, 2, 3)
+                res_1, res_2 = tf.split(axis=3, num_or_size_splits=2, value=res)
-                log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
+                log_diff_1, log_diff_2 = tf.split(axis=3, num_or_size_splits=2, value=log_diff)
            res_1, inc_log_diff = rec_masked_conv_coupling(
                input_=res_1, hps=hps, scale_idx=scale_idx + 1, n_scale=n_scale,
                use_batch_norm=use_batch_norm, weight_norm=weight_norm,
@@ -798,8 +798,8 @@ def rec_masked_deconv_coupling(input_, hps, scale_idx, n_scale,
                log_diff_1 = log_diff[:, :, :, :channels]
                log_diff_2 = log_diff[:, :, :, channels:]
            else:
-                res_1, res_2 = tf.split(res, 2, 3)
+                res_1, res_2 = tf.split(axis=3, num_or_size_splits=2, value=res)
-                log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
+                log_diff_1, log_diff_2 = tf.split(axis=3, num_or_size_splits=2, value=log_diff)
            res_1, log_diff_1 = rec_masked_deconv_coupling(
                input_=res_1, hps=hps,
                scale_idx=scale_idx + 1, n_scale=n_scale,
@@ -1305,7 +1305,7 @@ class RealNVP(object):
            z_lost = z_complete
            for scale_idx in xrange(hps.n_scale - 1):
                z_lost = squeeze_2x2_ordered(z_lost)
-                z_lost, _ = tf.split(z_lost, 2, 3)
+                z_lost, _ = tf.split(axis=3, num_or_size_splits=2, value=z_lost)
                z_compressed = z_lost
                z_noisy = z_lost
                for _ in xrange(scale_idx + 1):

--- a/real_nvp/real_nvp_utils.py
+++ b/real_nvp/real_nvp_utils.py
@@ -99,8 +99,8 @@ def conv_layer(input_,
                    filter_size[1] - input_.get_shape().as_list()[2],
                    input_.get_shape().as_list()[3]
                ])
-                res = tf.concat(1, [pad_1, res])
+                res = tf.concat(axis=1, values=[pad_1, res])
-                res = tf.concat(2, [pad_2, res])
+                res = tf.concat(axis=2, values=[pad_2, res])
        res = tf.nn.conv2d(
            input=res,
            filter=weights,
@@ -139,8 +139,8 @@ def depool_2x2(input_, stride=2):
    channels = shape[3]
    res = tf.reshape(input_, [batch_size, height, 1, width, 1, channels])
    res = tf.concat(
-        2, [res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
+        axis=2, values=[res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
-    res = tf.concat(4, [
+    res = tf.concat(axis=4, values=[
        res, tf.zeros([batch_size, height, stride, width, stride - 1, channels])
    ])
    res = tf.reshape(res, [batch_size, stride * height, stride * width, channels])
@@ -158,11 +158,11 @@ def batch_random_flip(input_):
    height = shape[1]
    width = shape[2]
    channels = shape[3]
-    res = tf.split(0, batch_size, input_)
+    res = tf.split(axis=0, num_or_size_splits=batch_size, value=input_)
    res = [elem[0, :, :, :] for elem in res]
    res = [tf.image.random_flip_left_right(elem) for elem in res]
    res = [tf.reshape(elem, [1, height, width, channels]) for elem in res]
-    res = tf.concat(0, res)
+    res = tf.concat(axis=0, values=res)
    return res
@@ -175,7 +175,7 @@ def as_one_hot(input_, n_indices):
    n_elem = numpy.prod(shape)
    indices = tf.range(n_elem)
    indices = tf.cast(indices, tf.int64)
-    indices_input = tf.concat(0, [indices, tf.reshape(input_, [-1])])
+    indices_input = tf.concat(axis=0, values=[indices, tf.reshape(input_, [-1])])
    indices_input = tf.reshape(indices_input, [2, -1])
    indices_input = tf.transpose(indices_input)
    res = tf.sparse_to_dense(

--- a/resnet/README.md
+++ b/resnet/README.md
@@ -23,7 +23,7 @@ https://arxiv.org/pdf/1605.07146v1.pdf
 <b>Settings:</b>
 * Random split 50k training set into 45k/5k train/eval split.
-* Pad to 36x36 and random crop. Horizontal flip. Per-image whitenting. 
+* Pad to 36x36 and random crop. Horizontal flip. Per-image whitening.
 * Momentum optimizer 0.9.
 * Learning rate schedule: 0.1 (40k), 0.01 (60k), 0.001 (>60k).
 * L2 weight decay: 0.002.
@@ -31,13 +31,9 @@ https://arxiv.org/pdf/1605.07146v1.pdf
 <b>Results:</b>
-<left>
 ![Precisions](g3doc/cifar_resnet.gif)
-</left>
-<left>
-![Precisions Legends](g3doc/cifar_resnet_legends.gif)
-</left>
+![Precisions Legends](g3doc/cifar_resnet_legends.gif)
 CIFAR-10 Model|Best Precision|Steps
 --------------|--------------|------
@@ -69,40 +65,40 @@ curl -o cifar-100-binary.tar.gz https://www.cs.toronto.edu/~kriz/cifar-100-binar
 <b>How to run:</b>
 ```shell
-# cd to the your workspace.
+# cd to the models repository and run with bash. Expected command output shown.
-# It contains an empty WORKSPACE file, resnet codes and cifar10 dataset.
+# The directory should contain an empty WORKSPACE file, the resnet code, and the cifar10 dataset.
-# Note: User can split 5k from train set for eval set.
+# Note: The user can split 5k from train set for eval set.
-ls -R
+$ ls -R
-  .:
+.:
-  cifar10  resnet  WORKSPACE
+cifar10  resnet  WORKSPACE
-  ./cifar10:
+./cifar10:
-  data_batch_1.bin  data_batch_2.bin  data_batch_3.bin  data_batch_4.bin
+data_batch_1.bin  data_batch_2.bin  data_batch_3.bin  data_batch_4.bin
-  data_batch_5.bin  test_batch.bin
+data_batch_5.bin  test_batch.bin
-  ./resnet:
+./resnet:
-  BUILD  cifar_input.py  g3doc  README.md  resnet_main.py  resnet_model.py
+BUILD  cifar_input.py  g3doc  README.md  resnet_main.py  resnet_model.py
 # Build everything for GPU.
-bazel build -c opt --config=cuda resnet/...
+$ bazel build -c opt --config=cuda resnet/...
 # Train the model.
-bazel-bin/resnet/resnet_main --train_data_path=cifar10/data_batch* \
+$ bazel-bin/resnet/resnet_main --train_data_path=cifar10/data_batch* \
-                             --log_root=/tmp/resnet_model \
+                               --log_root=/tmp/resnet_model \
-                             --train_dir=/tmp/resnet_model/train \
+                               --train_dir=/tmp/resnet_model/train \
-                             --dataset='cifar10' \
+                               --dataset='cifar10' \
-                             --num_gpus=1
+                               --num_gpus=1
 # While the model is training, you can also check on its progress using tensorboard:
-tensorboard --logdir=/tmp/resnet_model
+$ tensorboard --logdir=/tmp/resnet_model
 # Evaluate the model.
 # Avoid running on the same GPU as the training job at the same time,
 # otherwise, you might run out of memory.
-bazel-bin/resnet/resnet_main --eval_data_path=cifar10/test_batch.bin \
+$ bazel-bin/resnet/resnet_main --eval_data_path=cifar10/test_batch.bin \
-                             --log_root=/tmp/resnet_model \
+                               --log_root=/tmp/resnet_model \
-                             --eval_dir=/tmp/resnet_model/test \
+                               --eval_dir=/tmp/resnet_model/test \
-                             --mode=eval \
+                               --mode=eval \
-                             --dataset='cifar10' \
+                               --dataset='cifar10' \
-                             --num_gpus=0
+                               --num_gpus=0
 ```
--- a/resnet/resnet_model.py
+++ b/resnet/resnet_model.py
@@ -85,7 +85,7 @@ class ResNet(object):
      # comparably good performance.
      # https://arxiv.org/pdf/1605.07146v1.pdf
      # filters = [16, 160, 320, 640]
-      # Update hps.num_residual_units to 9
+      # Update hps.num_residual_units to 4
    with tf.variable_scope('unit_1_0'):
      x = res_func(x, filters[0], filters[1], self._stride_arr(strides[0]),
@@ -128,7 +128,7 @@ class ResNet(object):
  def _build_train_op(self):
    """Build training specific ops for the graph."""
    self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
-    tf.summary.scalar('learning rate', self.lrn_rate)
+    tf.summary.scalar('learning_rate', self.lrn_rate)
    trainable_variables = tf.trainable_variables()
    grads = tf.gradients(self.cost, trainable_variables)

--- a/skip_thoughts/.gitignore
+++ b/skip_thoughts/.gitignore
+/bazel-bin
+/bazel-ci_build-cache
+/bazel-genfiles
+/bazel-out
+/bazel-skip_thoughts
+/bazel-testlogs
+/bazel-tf
+*.pyc
--- a/skip_thoughts/README.md
+++ b/skip_thoughts/README.md
+# Skip-Thought Vectors
+This is a TensorFlow implementation of the model described in:
+Jamie Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, Richard S. Zemel,
+Antonio Torralba, Raquel Urtasun, Sanja Fidler.
+[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf).
+*In NIPS, 2015.*
+## Contact
+***Code author:*** Chris Shallue
+***Pull requests and issues:*** @cshallue
+## Contents
+* [Model Overview](#model-overview)
+* [Getting Started](#getting-started)
+    * [Install Required Packages](#install-required-packages)
+    * [Download Pretrained Models (Optional)](#download-pretrained-models-optional)
+* [Training a Model](#training-a-model)
+    * [Prepare the Training Data](#prepare-the-training-data)
+    * [Run the Training Script](#run-the-training-script)
+    * [Track Training Progress](#track-training-progress)
+* [Expanding the Vocabulary](#expanding-the-vocabulary)
+    * [Overview](#overview)
+    * [Preparation](#preparation)
+    * [Run the Vocabulary Expansion Script](#run-the-vocabulary-expansion-script)
+* [Evaluating a Model](#evaluating-a-model)
+    * [Overview](#overview-1)
+    * [Preparation](#preparation-1)
+    * [Run the Evaluation Tasks](#run-the-evaluation-tasks)
+* [Encoding Sentences](#encoding-sentences)
+## Model overview
+The *Skip-Thoughts* model is a sentence encoder. It learns to encode input
+sentences into a fixed-dimensional vector representation that is useful for many
+tasks, for example to detect paraphrases or to classify whether a product review
+is positive or negative. See the
+[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf)
+paper for details of the model architecture and more example applications.
+A trained *Skip-Thoughts* model will encode similar sentences nearby each other
+in the embedding vector space. The following examples show the nearest neighbor by
+cosine similarity of some sentences from the
+[movie review dataset](https://www.cs.cornell.edu/people/pabo/movie-review-data/).
+| Input sentence | Nearest Neighbor |
+|----------------|------------------|
+| Simplistic, silly and tedious. | Trite, banal, cliched, mostly inoffensive. |
+| Not so much farcical as sour. | Not only unfunny, but downright repellent. |
+| A sensitive and astute first feature by Anne-Sophie Birot. | Absorbing character study by André Turpin . |
+| An enthralling, entertaining feature. |  A slick, engrossing melodrama. |
+## Getting Started
+### Install Required Packages
+First ensure that you have installed the following required packages:
+* **Bazel** ([instructions](http://bazel.build/docs/install.html))
+* **TensorFlow** ([instructions](https://www.tensorflow.org/install/))
+* **NumPy** ([instructions](http://www.scipy.org/install.html))
+* **scikit-learn** ([instructions](http://scikit-learn.org/stable/install.html))
+* **Natural Language Toolkit (NLTK)**
+    * First install NLTK ([instructions](http://www.nltk.org/install.html))
+    * Then install the NLTK data ([instructions](http://www.nltk.org/data.html))
+* **gensim** ([instructions](https://radimrehurek.com/gensim/install.html))
+    * Only required if you will be expanding your vocabulary with the [word2vec](https://code.google.com/archive/p/word2vec/) model.
+### Download Pretrained Models (Optional)
+You can download model checkpoints pretrained on the
+[BookCorpus](http://yknzhu.wixsite.com/mbweb) dataset in the following
+configurations:
+* Unidirectional RNN encoder ("uni-skip" in the paper)
+* Bidirectional RNN encoder ("bi-skip" in the paper)
+```shell
+# Directory to download the pretrained models to.
+PRETRAINED_MODELS_DIR="${HOME}/skip_thoughts/pretrained/"
+mkdir -p ${PRETRAINED_MODELS_DIR}
+cd ${PRETRAINED_MODELS_DIR}
+# Download and extract the unidirectional model.
+wget "http://download.tensorflow.org/models/skip_thoughts_uni_2017_02_02.tar.gz"
+tar -xvf skip_thoughts_uni_2017_02_02.tar.gz
+rm skip_thoughts_uni_2017_02_02.tar.gz
+# Download and extract the bidirectional model.
+wget "http://download.tensorflow.org/models/skip_thoughts_bi_2017_02_16.tar.gz"
+tar -xvf skip_thoughts_bi_2017_02_16.tar.gz
+rm skip_thoughts_bi_2017_02_16.tar.gz
+```
+You can now skip to the sections [Evaluating a Model](#evaluating-a-model) and
+[Encoding Sentences](#encoding-sentences).
+## Training a Model
+### Prepare the Training Data
+To train a model you will need to provide training data in TFRecord format. The
+TFRecord format consists of a set of sharded files containing serialized
+`tf.Example` protocol buffers. Each `tf.Example` proto contains three
+sentences:
+  * `encode`: The sentence to encode.
+  * `decode_pre`: The sentence preceding `encode` in the original text.
+  * `decode_post`: The sentence following `encode` in the original text.
+Each sentence is a list of words. During preprocessing, a dictionary is created
+that assigns each word in the vocabulary to an integer-valued id. Each sentence
+is encoded as a list of integer word ids in the `tf.Example` protos.
+We have provided a script to preprocess any set of text-files into this format.
+You may wish to use the [BookCorpus](http://yknzhu.wixsite.com/mbweb) dataset.
+Note that the preprocessing script may take **12 hours** or more to complete
+on this large dataset.
+```shell
+# Comma-separated list of globs matching the input input files. The format of
+# the input files is assumed to be a list of newline-separated sentences, where
+# each sentence is already tokenized.
+INPUT_FILES="${HOME}/skip_thoughts/bookcorpus/*.txt"
+# Location to save the preprocessed training and validation data.
+DATA_DIR="${HOME}/skip_thoughts/data"
+# Build the preprocessing script.
+cd tensorflow-models/skip_thoughts
+bazel build -c opt //skip_thoughts/data:preprocess_dataset
+# Run the preprocessing script.
+bazel-bin/skip_thoughts/data/preprocess_dataset \
+  --input_files=${INPUT_FILES} \
+  --output_dir=${DATA_DIR}
+```
+When the script finishes you will find 100 training files and 1 validation file
+in `DATA_DIR`. The files will match the patterns `train-?????-of-00100` and
+`validation-00000-of-00001` respectively.
+The script will also produce a file named `vocab.txt`. The format of this file
+is a list of newline-separated words where the word id is the corresponding 0-
+based line index. Words are sorted by descending order of frequency in the input
+data. Only the top 20,000 words are assigned unique ids; all other words are
+assigned the "unknown id" of 1 in the processed data.
+### Run the Training Script
+Execute the following commands to start the training script. By default it will
+run for 500k steps (around 9 days on a GeForce GTX 1080 GPU).
+```shell
+# Directory containing the preprocessed data.
+DATA_DIR="${HOME}/skip_thoughts/data"
+# Directory to save the model.
+MODEL_DIR="${HOME}/skip_thoughts/model"
+# Build the model.
+cd tensorflow-models/skip_thoughts
+bazel build -c opt //skip_thoughts/...
+# Run the training script.
+bazel-bin/skip_thoughts/train \
+  --input_file_pattern="${DATA_DIR}/train-?????-of-00100" \
+  --train_dir="${MODEL_DIR}/train"
+```
+### Track Training Progress
+Optionally, you can run the `track_perplexity` script in a separate process.
+This will log per-word perplexity on the validation set which allows training
+progress to be monitored on
+[TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard).
+Note that you may run out of memory if you run the this script on the same GPU
+as the training script. You can set the environment variable
+`CUDA_VISIBLE_DEVICES=""` to force the script to run on CPU. If it runs too
+slowly on CPU, you can decrease the value of `--num_eval_examples`.
+```shell
+DATA_DIR="${HOME}/skip_thoughts/data"
+MODEL_DIR="${HOME}/skip_thoughts/model"
+# Ignore GPU devices (only necessary if your GPU is currently memory
+# constrained, for example, by running the training script).
+export CUDA_VISIBLE_DEVICES=""
+# Run the evaluation script. This will run in a loop, periodically loading the
+# latest model checkpoint file and computing evaluation metrics.
+bazel-bin/skip_thoughts/track_perplexity \
+  --input_file_pattern="${DATA_DIR}/validation-?????-of-00001" \
+  --checkpoint_dir="${MODEL_DIR}/train" \
+  --eval_dir="${MODEL_DIR}/val" \
+  --num_eval_examples=50000
+```
+If you started the `track_perplexity` script, run a
+[TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard)
+server in a separate process for real-time monitoring of training summaries and
+validation perplexity.
+```shell
+MODEL_DIR="${HOME}/skip_thoughts/model"
+# Run a TensorBoard server.
+tensorboard --logdir="${MODEL_DIR}"
+```
+## Expanding the Vocabulary
+### Overview
+The vocabulary generated by the preprocessing script contains only 20,000 words
+which is insufficient for many tasks. For example, a sentence from Wikipedia
+might contain nouns that do not appear in this vocabulary.
+A solution to this problem described in the
+[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf)
+paper is to learn a mapping that transfers word representations from one model to
+another. This idea is based on the "Translation Matrix" method from the paper
+[Exploiting Similarities Among Languages for Machine Translation](https://arxiv.org/abs/1309.4168).
+Specifically, we will load the word embeddings from a trained *Skip-Thoughts*
+model and from a trained [word2vec model](https://arxiv.org/pdf/1301.3781.pdf)
+(which has a much larger vocabulary). We will train a linear regression model
+without regularization to learn a linear mapping from the word2vec embedding
+space to the *Skip-Thoughts* embedding space. We will then apply the linear
+model to all words in the word2vec vocabulary, yielding vectors in the *Skip-
+Thoughts* word embedding space for the union of the two vocabularies.
+The linear regression task is to learn a parameter matrix *W* to minimize
+*|| X - Y \* W ||<sup>2</sup>*, where *X* is a matrix of *Skip-Thoughts*
+embeddings of shape `[num_words, dim1]`, *Y* is a matrix of word2vec embeddings
+of shape `[num_words, dim2]`, and *W* is a matrix of shape `[dim2, dim1]`.
+### Preparation
+First you will need to download and unpack a pretrained
+[word2vec model](https://arxiv.org/pdf/1301.3781.pdf) from
+[this website](https://code.google.com/archive/p/word2vec/)
+([direct download link](https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit?usp=sharing)).
+This model was trained on the Google News dataset (about 100 billion words).
+Also ensure that you have already [installed gensim](https://radimrehurek.com/gensim/install.html).
+### Run the Vocabulary Expansion Script
+```shell
+# Path to checkpoint file or a directory containing checkpoint files (the script
+# will select the most recent).
+CHECKPOINT_PATH="${HOME}/skip_thoughts/model/train"
+# Vocabulary file generated by the preprocessing script.
+SKIP_THOUGHTS_VOCAB="${HOME}/skip_thoughts/data/vocab.txt"
+# Path to downloaded word2vec model.
+WORD2VEC_MODEL="${HOME}/skip_thoughts/googlenews/GoogleNews-vectors-negative300.bin"
+# Output directory.
+EXP_VOCAB_DIR="${HOME}/skip_thoughts/exp_vocab"
+# Build the vocabulary expansion script.
+cd tensorflow-models/skip_thoughts
+bazel build -c opt //skip_thoughts:vocabulary_expansion
+# Run the vocabulary expansion script.
+bazel-bin/skip_thoughts/vocabulary_expansion \
+  --skip_thoughts_model=${CHECKPOINT_PATH} \
+  --skip_thoughts_vocab=${SKIP_THOUGHTS_VOCAB} \
+  --word2vec_model=${WORD2VEC_MODEL} \
+  --output_dir=${EXP_VOCAB_DIR}
+```
+## Evaluating a Model
+### Overview
+The model can be evaluated using the benchmark tasks described in the
+[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf)
+paper. The following tasks are supported (refer to the paper for full details):
+ * **SICK** semantic relatedness task.
+ * **MSRP** (Microsoft Research Paraphrase Corpus) paraphrase detection task.
+ * Binary classification tasks:
+   * **MR** movie review sentiment task.
+   * **CR** customer product review task.
+   * **SUBJ** subjectivity/objectivity task.
+   * **MPQA** opinion polarity task.
+   * **TREC** question-type classification task.
+### Preparation
+You will need to clone or download the
+[skip-thoughts GitHub repository](https://github.com/ryankiros/skip-thoughts) by
+[ryankiros](https://github.com/ryankiros) (the first author of the Skip-Thoughts
+paper):
+```shell
+# Folder to clone the repository to.
+ST_KIROS_DIR="${HOME}/skip_thoughts/skipthoughts_kiros"
+# Clone the repository.
+git clone git@github.com:ryankiros/skip-thoughts.git "${ST_KIROS_DIR}/skipthoughts"
+# Make the package importable.
+export PYTHONPATH="${ST_KIROS_DIR}/:${PYTHONPATH}"
+```
+You will also need to download the data needed for each evaluation task. See the
+instructions [here](https://github.com/ryankiros/skip-thoughts).
+For example, the CR (customer review) dataset is found [here](http://nlp.stanford.edu/~sidaw/home/projects:nbsvm). For this task we want the
+files `custrev.pos` and `custrev.neg`.
+### Run the Evaluation Tasks
+In the following example we will evaluate a unidirectional model ("uni-skip" in
+the paper) on the CR task. To use a bidirectional model ("bi-skip" in the
+paper),  simply pass the flags `--bi_vocab_file`, `--bi_embeddings_file` and
+`--bi_checkpoint_path` instead. To use the "combine-skip" model described in the
+paper you will need to pass both the unidirectional and bidirectional flags.
+```shell
+# Path to checkpoint file or a directory containing checkpoint files (the script
+# will select the most recent).
+CHECKPOINT_PATH="${HOME}/skip_thoughts/model/train"
+# Vocabulary file generated by the vocabulary expansion script.
+VOCAB_FILE="${HOME}/skip_thoughts/exp_vocab/vocab.txt"
+# Embeddings file generated by the vocabulary expansion script.
+EMBEDDINGS_FILE="${HOME}/skip_thoughts/exp_vocab/embeddings.npy"
+# Directory containing files custrev.pos and custrev.neg.
+EVAL_DATA_DIR="${HOME}/skip_thoughts/eval_data"
+# Build the evaluation script.
+cd tensorflow-models/skip_thoughts
+bazel build -c opt //skip_thoughts:evaluate
+# Run the evaluation script.
+bazel-bin/skip_thoughts/evaluate \
+  --eval_task=CR \
+  --data_dir=${EVAL_DATA_DIR} \
+  --uni_vocab_file=${VOCAB_FILE} \
+  --uni_embeddings_file=${EMBEDDINGS_FILE} \
+  --uni_checkpoint_path=${CHECKPOINT_PATH}
+```
+Output:
+```python
+[0.82539682539682535, 0.84084880636604775, 0.83023872679045096,
+ 0.86206896551724133, 0.83554376657824936, 0.85676392572944293,
+ 0.84084880636604775, 0.83023872679045096, 0.85145888594164454,
+ 0.82758620689655171]
+```
+The output is a list of accuracies of 10 cross-validation classification models.
+To get a single number, simply take the average:
+```python
+ipython  # Launch iPython.
+In [0]:
+import numpy as np
+np.mean([0.82539682539682535, 0.84084880636604775, 0.83023872679045096,
+         0.86206896551724133, 0.83554376657824936, 0.85676392572944293,
+         0.84084880636604775, 0.83023872679045096, 0.85145888594164454,
+         0.82758620689655171])
+Out [0]: 0.84009936423729525
+```
+## Encoding Sentences
+In this example we will encode data from the
+[movie review dataset](https://www.cs.cornell.edu/people/pabo/movie-review-data/)
+(specifically the [sentence polarity dataset v1.0](https://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz)).
+```python
+ipython  # Launch iPython.
+In [0]:
+# Imports.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os.path
+import scipy.spatial.distance as sd
+from skip_thoughts import configuration
+from skip_thoughts import encoder_manager
+In [1]:
+# Set paths to the model.
+VOCAB_FILE = "/path/to/vocab.txt"
+EMBEDDING_MATRIX_FILE = "/path/to/embeddings.npy"
+CHECKPOINT_PATH = "/path/to/model.ckpt-9999"
+# The following directory should contain files rt-polarity.neg and
+# rt-polarity.pos.
+MR_DATA_DIR = "/dir/containing/mr/data"
+In [2]:
+# Set up the encoder. Here we are using a single unidirectional model.
+# To use a bidirectional model as well, call load_model() again with
+# configuration.model_config(bidirectional_encoder=True) and paths to the
+# bidirectional model's files. The encoder will use the concatenation of
+# all loaded models.
+encoder = encoder_manager.EncoderManager()
+encoder.load_model(configuration.model_config(),
+                   vocabulary_file=VOCAB_FILE,
+                   embedding_matrix_file=EMBEDDING_MATRIX_FILE,
+                   checkpoint_path=CHECKPOINT_PATH)
+In [3]:
+# Load the movie review dataset.
+data = []
+with open(os.path.join(MR_DATA_DIR, 'rt-polarity.neg'), 'rb') as f:
+  data.extend([line.decode('latin-1').strip() for line in f])
+with open(os.path.join(MR_DATA_DIR, 'rt-polarity.pos'), 'rb') as f:
+  data.extend([line.decode('latin-1').strip() for line in f])
+In [4]:
+# Generate Skip-Thought Vectors for each sentence in the dataset.
+encodings = encoder.encode(data)
+In [5]:
+# Define a helper function to generate nearest neighbors.
+def get_nn(ind, num=10):
+  encoding = encodings[ind]
+  scores = sd.cdist([encoding], encodings, "cosine")[0]
+  sorted_ids = np.argsort(scores)
+  print("Sentence:")
+  print("", data[ind])
+  print("\nNearest neighbors:")
+  for i in range(1, num + 1):
+    print(" %d. %s (%.3f)" %
+          (i, data[sorted_ids[i]], scores[sorted_ids[i]]))
+In [6]:
+# Compute nearest neighbors of the first sentence in the dataset.
+get_nn(0)
+```
+Output:
+```
+Sentence:
+ simplistic , silly and tedious .
+Nearest neighbors:
+ 1. trite , banal , cliched , mostly inoffensive . (0.247)
+ 2. banal and predictable . (0.253)
+ 3. witless , pointless , tasteless and idiotic . (0.272)
+ 4. loud , silly , stupid and pointless . (0.295)
+ 5. grating and tedious . (0.299)
+ 6. idiotic and ugly . (0.330)
+ 7. black-and-white and unrealistic . (0.335)
+ 8. hopelessly inane , humorless and under-inspired . (0.335)
+ 9. shallow , noisy and pretentious . (0.340)
+ 10. . . . unlikable , uninteresting , unfunny , and completely , utterly inept . (0.346)
+```
--- a/skip_thoughts/WORKSPACE
+++ b/skip_thoughts/WORKSPACE
--- a/skip_thoughts/skip_thoughts/BUILD
+++ b/skip_thoughts/skip_thoughts/BUILD
+package(default_visibility = [":internal"])
+licenses(["notice"])  # Apache 2.0
+exports_files(["LICENSE"])
+package_group(
+    name = "internal",
+    packages = [
+        "//skip_thoughts/...",
+    ],
+)
+py_library(
+    name = "configuration",
+    srcs = ["configuration.py"],
+    srcs_version = "PY2AND3",
+)
+py_library(
+    name = "skip_thoughts_model",
+    srcs = ["skip_thoughts_model.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//skip_thoughts/ops:gru_cell",
+        "//skip_thoughts/ops:input_ops",
+    ],
+)
+py_test(
+    name = "skip_thoughts_model_test",
+    size = "large",
+    srcs = ["skip_thoughts_model_test.py"],
+    deps = [
+        ":configuration",
+        ":skip_thoughts_model",
+    ],
+)
+py_binary(
+    name = "train",
+    srcs = ["train.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":configuration",
+        ":skip_thoughts_model",
+    ],
+)
+py_binary(
+    name = "track_perplexity",
+    srcs = ["track_perplexity.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":configuration",
+        ":skip_thoughts_model",
+    ],
+)
+py_binary(
+    name = "vocabulary_expansion",
+    srcs = ["vocabulary_expansion.py"],
+    srcs_version = "PY2AND3",
+)
+py_library(
+    name = "skip_thoughts_encoder",
+    srcs = ["skip_thoughts_encoder.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skip_thoughts_model",
+        "//skip_thoughts/data:special_words",
+    ],
+)
+py_library(
+    name = "encoder_manager",
+    srcs = ["encoder_manager.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skip_thoughts_encoder",
+    ],
+)
+py_binary(
+    name = "evaluate",
+    srcs = ["evaluate.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":encoder_manager",
+        "//skip_thoughts:configuration",
+    ],
+)
--- a/skip_thoughts/skip_thoughts/__init__.py
+++ b/skip_thoughts/skip_thoughts/__init__.py
--- a/skip_thoughts/skip_thoughts/configuration.py
+++ b/skip_thoughts/skip_thoughts/configuration.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Default configuration for model architecture and training."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+class _HParams(object):
+  """Wrapper for configuration parameters."""
+  pass
+def model_config(input_file_pattern=None,
+                 input_queue_capacity=640000,
+                 num_input_reader_threads=1,
+                 shuffle_input_data=True,
+                 uniform_init_scale=0.1,
+                 vocab_size=20000,
+                 batch_size=128,
+                 word_embedding_dim=620,
+                 bidirectional_encoder=False,
+                 encoder_dim=2400):
+  """Creates a model configuration object.
+  Args:
+    input_file_pattern: File pattern of sharded TFRecord files containing
+      tf.Example protobufs.
+    input_queue_capacity: Number of examples to keep in the input queue.
+    num_input_reader_threads: Number of threads for prefetching input
+      tf.Examples.
+    shuffle_input_data: Whether to shuffle the input data.
+    uniform_init_scale: Scale of random uniform initializer.
+    vocab_size: Number of unique words in the vocab.
+    batch_size: Batch size (training and evaluation only).
+    word_embedding_dim: Word embedding dimension.
+    bidirectional_encoder: Whether to use a bidirectional or unidirectional
+      encoder RNN.
+    encoder_dim: Number of output dimensions of the sentence encoder.
+  Returns:
+    An object containing model configuration parameters.
+  """
+  config = _HParams()
+  config.input_file_pattern = input_file_pattern
+  config.input_queue_capacity = input_queue_capacity
+  config.num_input_reader_threads = num_input_reader_threads
+  config.shuffle_input_data = shuffle_input_data
+  config.uniform_init_scale = uniform_init_scale
+  config.vocab_size = vocab_size
+  config.batch_size = batch_size
+  config.word_embedding_dim = word_embedding_dim
+  config.bidirectional_encoder = bidirectional_encoder
+  config.encoder_dim = encoder_dim
+  return config
+def training_config(learning_rate=0.0008,
+                    learning_rate_decay_factor=0.5,
+                    learning_rate_decay_steps=400000,
+                    number_of_steps=500000,
+                    clip_gradient_norm=5.0,
+                    save_model_secs=600,
+                    save_summaries_secs=600):
+  """Creates a training configuration object.
+  Args:
+    learning_rate: Initial learning rate.
+    learning_rate_decay_factor: If > 0, the learning rate decay factor.
+    learning_rate_decay_steps: The number of steps before the learning rate
+      decays by learning_rate_decay_factor.
+    number_of_steps: The total number of training steps to run. Passing None
+      will cause the training script to run indefinitely.
+    clip_gradient_norm: If not None, then clip gradients to this value.
+    save_model_secs: How often (in seconds) to save model checkpoints.
+    save_summaries_secs: How often (in seconds) to save model summaries.
+  Returns:
+    An object containing training configuration parameters.
+  Raises:
+    ValueError: If learning_rate_decay_factor is set and
+      learning_rate_decay_steps is unset.
+  """
+  if learning_rate_decay_factor and not learning_rate_decay_steps:
+    raise ValueError(
+        "learning_rate_decay_factor requires learning_rate_decay_steps.")
+  config = _HParams()
+  config.learning_rate = learning_rate
+  config.learning_rate_decay_factor = learning_rate_decay_factor
+  config.learning_rate_decay_steps = learning_rate_decay_steps
+  config.number_of_steps = number_of_steps
+  config.clip_gradient_norm = clip_gradient_norm
+  config.save_model_secs = save_model_secs
+  config.save_summaries_secs = save_summaries_secs
+  return config