Merge https://github.com/tensorflow/models

b7523ee5 · Ivan Bogatyy · 66723d7d · 2c6d74b7 · b7523ee5 · b7523ee5
Commit b7523ee5 authored Mar 20, 2017 by Ivan Bogatyy
20 changed files
--- a/README.md
+++ b/README.md
@@ -21,6 +21,7 @@ To propose a model for inclusion please submit a pull request.
 - [next_frame_prediction](next_frame_prediction): probabilistic future frame synthesis via cross convolutional networks.
 - [real_nvp](real_nvp): density estimation using real-valued non-volume preserving (real NVP) transformations.
 - [resnet](resnet): deep and wide residual networks.
+- [skip_thoughts](skip_thoughts): recurrent neural network sentence-to-vector encoder.
 - [slim](slim): image classification models in TF-Slim.
 - [street](street): identify the name of a street (in France) from an image using a Deep RNN.
 - [swivel](swivel): the Swivel algorithm for generating word embeddings.

--- a/autoencoder/AdditiveGaussianNoiseAutoencoderRunner.py
+++ b/autoencoder/AdditiveGaussianNoiseAutoencoderRunner.py
@@ -4,7 +4,7 @@ import sklearn.preprocessing as prep
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data
-from autoencoder.autoencoder_models.DenoisingAutoencoder import AdditiveGaussianNoiseAutoencoder
+from autoencoder_models.DenoisingAutoencoder import AdditiveGaussianNoiseAutoencoder
 mnist = input_data.read_data_sets('MNIST_data', one_hot = True)
@@ -45,7 +45,6 @@ for epoch in range(training_epochs):
    # Display logs per epoch step
    if epoch % display_step == 0:
-        print "Epoch:", '%04d' % (epoch + 1), \
+        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
-            "cost=", "{:.9f}".format(avg_cost)
-print "Total cost: " + str(autoencoder.calc_total_cost(X_test))
+print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
--- a/autoencoder/AutoencoderRunner.py
+++ b/autoencoder/AutoencoderRunner.py
@@ -4,7 +4,7 @@ import sklearn.preprocessing as prep
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data
-from autoencoder.autoencoder_models.Autoencoder import Autoencoder
+from autoencoder_models.Autoencoder import Autoencoder
 mnist = input_data.read_data_sets('MNIST_data', one_hot = True)
@@ -44,7 +44,6 @@ for epoch in range(training_epochs):
    # Display logs per epoch step
    if epoch % display_step == 0:
-        print "Epoch:", '%04d' % (epoch + 1), \
+        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
-            "cost=", "{:.9f}".format(avg_cost)
-print "Total cost: " + str(autoencoder.calc_total_cost(X_test))
+print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
--- a/autoencoder/MaskingNoiseAutoencoderRunner.py
+++ b/autoencoder/MaskingNoiseAutoencoderRunner.py
@@ -4,7 +4,7 @@ import sklearn.preprocessing as prep
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data
-from autoencoder.autoencoder_models.DenoisingAutoencoder import MaskingNoiseAutoencoder
+from autoencoder_models.DenoisingAutoencoder import MaskingNoiseAutoencoder
 mnist = input_data.read_data_sets('MNIST_data', one_hot = True)
@@ -43,7 +43,6 @@ for epoch in range(training_epochs):
        avg_cost += cost / n_samples * batch_size
    if epoch % display_step == 0:
-        print "Epoch:", '%04d' % (epoch + 1), \
+        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
-            "cost=", "{:.9f}".format(avg_cost)
-print "Total cost: " + str(autoencoder.calc_total_cost(X_test))
+print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
--- a/autoencoder/Utils.py
+++ b/autoencoder/Utils.py
-import numpy as np
-import tensorflow as tf
-def xavier_init(fan_in, fan_out, constant = 1):
-    low = -constant * np.sqrt(6.0 / (fan_in + fan_out))
-    high = constant * np.sqrt(6.0 / (fan_in + fan_out))
-    return tf.random_uniform((fan_in, fan_out),
-                             minval = low, maxval = high,
-                             dtype = tf.float32)
--- a/autoencoder/VariationalAutoencoderRunner.py
+++ b/autoencoder/VariationalAutoencoderRunner.py
@@ -4,7 +4,7 @@ import sklearn.preprocessing as prep
 import tensorflow as tf
 from tensorflow.examples.tutorials.mnist import input_data
-from autoencoder.autoencoder_models.VariationalAutoencoder import VariationalAutoencoder
+from autoencoder_models.VariationalAutoencoder import VariationalAutoencoder
 mnist = input_data.read_data_sets('MNIST_data', one_hot = True)
@@ -47,7 +47,6 @@ for epoch in range(training_epochs):
    # Display logs per epoch step
    if epoch % display_step == 0:
-        print "Epoch:", '%04d' % (epoch + 1), \
+        print("Epoch:", '%04d' % (epoch + 1), "cost=", "{:.9f}".format(avg_cost))
-            "cost=", "{:.9f}".format(avg_cost)
-print "Total cost: " + str(autoencoder.calc_total_cost(X_test))
+print("Total cost: " + str(autoencoder.calc_total_cost(X_test)))
--- a/autoencoder/autoencoder_models/Autoencoder.py
+++ b/autoencoder/autoencoder_models/Autoencoder.py
 import tensorflow as tf
-import numpy as np
-import autoencoder.Utils
 class Autoencoder(object):
@@ -28,7 +26,8 @@ class Autoencoder(object):
    def _initialize_weights(self):
        all_weights = dict()
-        all_weights['w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+        all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype=tf.float32))
        all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype=tf.float32))
@@ -46,7 +45,7 @@ class Autoencoder(object):
    def generate(self, hidden = None):
        if hidden is None:
-            hidden = np.random.normal(size=self.weights["b1"])
+            hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
        return self.sess.run(self.reconstruction, feed_dict={self.hidden: hidden})
    def reconstruct(self, X):

--- a/autoencoder/autoencoder_models/DenoisingAutoencoder.py
+++ b/autoencoder/autoencoder_models/DenoisingAutoencoder.py
 import tensorflow as tf
-import numpy as np
-import autoencoder.Utils
 class AdditiveGaussianNoiseAutoencoder(object):
    def __init__(self, n_input, n_hidden, transfer_function = tf.nn.softplus, optimizer = tf.train.AdamOptimizer(),
@@ -31,7 +28,8 @@ class AdditiveGaussianNoiseAutoencoder(object):
    def _initialize_weights(self):
        all_weights = dict()
-        all_weights['w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+        all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype = tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype = tf.float32))
        all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype = tf.float32))
@@ -53,9 +51,9 @@ class AdditiveGaussianNoiseAutoencoder(object):
                                                       self.scale: self.training_scale
                                                       })
-    def generate(self, hidden = None):
+    def generate(self, hidden=None):
        if hidden is None:
-            hidden = np.random.normal(size = self.weights["b1"])
+            hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
        return self.sess.run(self.reconstruction, feed_dict = {self.hidden: hidden})
    def reconstruct(self, X):
@@ -98,7 +96,8 @@ class MaskingNoiseAutoencoder(object):
    def _initialize_weights(self):
        all_weights = dict()
-        all_weights['w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+        all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype = tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype = tf.float32))
        all_weights['b2'] = tf.Variable(tf.zeros([self.n_input], dtype = tf.float32))
@@ -115,9 +114,9 @@ class MaskingNoiseAutoencoder(object):
    def transform(self, X):
        return self.sess.run(self.hidden, feed_dict = {self.x: X, self.keep_prob: 1.0})
-    def generate(self, hidden = None):
+    def generate(self, hidden=None):
        if hidden is None:
-            hidden = np.random.normal(size = self.weights["b1"])
+            hidden = self.sess.run(tf.random_normal([1, self.n_hidden]))
        return self.sess.run(self.reconstruction, feed_dict = {self.hidden: hidden})
    def reconstruct(self, X):

--- a/autoencoder/autoencoder_models/VariationalAutoencoder.py
+++ b/autoencoder/autoencoder_models/VariationalAutoencoder.py
 import tensorflow as tf
 import numpy as np
-import autoencoder.Utils
 class VariationalAutoencoder(object):
@@ -36,8 +35,10 @@ class VariationalAutoencoder(object):
    def _initialize_weights(self):
        all_weights = dict()
-        all_weights['w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+        all_weights['w1'] = tf.get_variable("w1", shape=[self.n_input, self.n_hidden],
-        all_weights['log_sigma_w1'] = tf.Variable(autoencoder.Utils.xavier_init(self.n_input, self.n_hidden))
+            initializer=tf.contrib.layers.xavier_initializer())
+        all_weights['log_sigma_w1'] = tf.get_variable("log_sigma_w1", shape=[self.n_input, self.n_hidden],
+            initializer=tf.contrib.layers.xavier_initializer())
        all_weights['b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
        all_weights['log_sigma_b1'] = tf.Variable(tf.zeros([self.n_hidden], dtype=tf.float32))
        all_weights['w2'] = tf.Variable(tf.zeros([self.n_hidden, self.n_input], dtype=tf.float32))

--- a/im2txt/README.md
+++ b/im2txt/README.md
@@ -37,9 +37,7 @@ Full text available at: http://arxiv.org/abs/1609.06647
 The *Show and Tell* model is a deep neural network that learns how to describe
 the content of images. For example:
-<center>
 ![Example captions](g3doc/example_captions.jpg)
-</center>
 ### Architecture
@@ -66,9 +64,7 @@ learned during training.
 The following diagram illustrates the model architecture.
-<center>
 ![Show and Tell Architecture](g3doc/show_and_tell_architecture.png)
-</center>
 In this diagram, \{*s*<sub>0</sub>, *s*<sub>1</sub>, ..., *s*<sub>*N*-1</sub>\}
 are the words of the caption and \{*w*<sub>*e*</sub>*s*<sub>0</sub>,
@@ -137,8 +133,7 @@ Each caption is a list of words. During preprocessing, a dictionary is created
 that assigns each word in the vocabulary to an integer-valued id. Each caption
 is encoded as a list of integer word ids in the `tf.SequenceExample` protos.
-We have provided a script to download and preprocess the [MSCOCO]
+We have provided a script to download and preprocess the [MSCOCO](http://mscoco.org/) image captioning data set into this format. Downloading
-(http://mscoco.org/) image captioning data set into this format. Downloading
 and preprocessing the data may take several hours depending on your network and
 computer speed. Please be patient.
@@ -266,8 +261,7 @@ tensorboard --logdir="${MODEL_DIR}"
 ### Fine Tune the Inception v3 Model
 Your model will already be able to generate reasonable captions after the first
-phase of training. Try it out! (See [Generating Captions]
+phase of training. Try it out! (See [Generating Captions](#generating-captions)).
-(#generating-captions)).
 You can further improve the performance of the model by running a
 second training phase to jointly fine-tune the parameters of the *Inception v3*
@@ -337,6 +331,4 @@ expected.
 Here is the image:
-<center>
 ![Surfer](g3doc/COCO_val2014_000000224477.jpg)
-</center>
--- a/inception/inception/data/build_image_data.py
+++ b/inception/inception/data/build_image_data.py
@@ -261,7 +261,12 @@ def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
      label = labels[i]
      text = texts[i]
-      image_buffer, height, width = _process_image(filename, coder)
+      try:
+        image_buffer, height, width = _process_image(filename, coder)
+      except Exception as e:
+        print(e)
+        print('SKIPPED: Unexpected eror while decoding %s.' % filename)
+        continue
      example = _convert_to_example(filename, image_buffer, label,
                                    text, height, width)

--- a/resnet/resnet_model.py
+++ b/resnet/resnet_model.py
@@ -128,7 +128,7 @@ class ResNet(object):
  def _build_train_op(self):
    """Build training specific ops for the graph."""
    self.lrn_rate = tf.constant(self.hps.lrn_rate, tf.float32)
-    tf.summary.scalar('learning rate', self.lrn_rate)
+    tf.summary.scalar('learning_rate', self.lrn_rate)
    trainable_variables = tf.trainable_variables()
    grads = tf.gradients(self.cost, trainable_variables)

--- a/skip_thoughts/.gitignore
+++ b/skip_thoughts/.gitignore
+/bazel-bin
+/bazel-ci_build-cache
+/bazel-genfiles
+/bazel-out
+/bazel-skip_thoughts
+/bazel-testlogs
+/bazel-tf
+*.pyc
--- a/skip_thoughts/README.md
+++ b/skip_thoughts/README.md
+# Skip-Thought Vectors
+This is a TensorFlow implementation of the model described in:
+Jamie Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, Richard S. Zemel,
+Antonio Torralba, Raquel Urtasun, Sanja Fidler.
+[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf).
+*In NIPS, 2015.*
+## Contact
+***Code author:*** Chris Shallue
+***Pull requests and issues:*** @cshallue
+## Contents
+* [Model Overview](#model-overview)
+* [Getting Started](#getting-started)
+    * [Install Required Packages](#install-required-packages)
+    * [Download Pretrained Models (Optional)](#download-pretrained-models-optional)
+* [Training a Model](#training-a-model)
+    * [Prepare the Training Data](#prepare-the-training-data)
+    * [Run the Training Script](#run-the-training-script)
+    * [Track Training Progress](#track-training-progress)
+* [Expanding the Vocabulary](#expanding-the-vocabulary)
+    * [Overview](#overview)
+    * [Preparation](#preparation)
+    * [Run the Vocabulary Expansion Script](#run-the-vocabulary-expansion-script)
+* [Evaluating a Model](#evaluating-a-model)
+    * [Overview](#overview-1)
+    * [Preparation](#preparation-1)
+    * [Run the Evaluation Tasks](#run-the-evaluation-tasks)
+* [Encoding Sentences](#encoding-sentences)
+## Model overview
+The *Skip-Thoughts* model is a sentence encoder. It learns to encode input
+sentences into a fixed-dimensional vector representation that is useful for many
+tasks, for example to detect paraphrases or to classify whether a product review
+is positive or negative. See the
+[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf)
+paper for details of the model architecture and more example applications.
+A trained *Skip-Thoughts* model will encode similar sentences nearby each other
+in the embedding vector space. The following examples show the nearest neighbor by
+cosine similarity of some sentences from the
+[movie review dataset](https://www.cs.cornell.edu/people/pabo/movie-review-data/).
+| Input sentence | Nearest Neighbor |
+|----------------|------------------|
+| Simplistic, silly and tedious. | Trite, banal, cliched, mostly inoffensive. |
+| Not so much farcical as sour. | Not only unfunny, but downright repellent. |
+| A sensitive and astute first feature by Anne-Sophie Birot. | Absorbing character study by André Turpin . |
+| An enthralling, entertaining feature. |  A slick, engrossing melodrama. |
+## Getting Started
+### Install Required Packages
+First ensure that you have installed the following required packages:
+* **Bazel** ([instructions](http://bazel.build/docs/install.html))
+* **TensorFlow** ([instructions](https://www.tensorflow.org/install/))
+* **NumPy** ([instructions](http://www.scipy.org/install.html))
+* **scikit-learn** ([instructions](http://scikit-learn.org/stable/install.html))
+* **Natural Language Toolkit (NLTK)**
+    * First install NLTK ([instructions](http://www.nltk.org/install.html))
+    * Then install the NLTK data ([instructions](http://www.nltk.org/data.html))
+* **gensim** ([instructions](https://radimrehurek.com/gensim/install.html))
+    * Only required if you will be expanding your vocabulary with the [word2vec](https://code.google.com/archive/p/word2vec/) model.
+### Download Pretrained Models (Optional)
+You can download model checkpoints pretrained on the
+[BookCorpus](http://yknzhu.wixsite.com/mbweb) dataset in the following
+configurations:
+* Unidirectional RNN encoder ("uni-skip" in the paper)
+* Bidirectional RNN encoder ("bi-skip" in the paper)
+```shell
+# Directory to download the pretrained models to.
+PRETRAINED_MODELS_DIR="${HOME}/skip_thoughts/pretrained/"
+mkdir -p ${PRETRAINED_MODELS_DIR}
+cd ${PRETRAINED_MODELS_DIR}
+# Download and extract the unidirectional model.
+wget "http://download.tensorflow.org/models/skip_thoughts_uni_2017_02_02.tar.gz"
+tar -xvf skip_thoughts_uni_2017_02_02.tar.gz
+rm skip_thoughts_uni_2017_02_02.tar.gz
+# Download and extract the bidirectional model.
+wget "http://download.tensorflow.org/models/skip_thoughts_bi_2017_02_16.tar.gz"
+tar -xvf skip_thoughts_bi_2017_02_16.tar.gz
+rm skip_thoughts_bi_2017_02_16.tar.gz
+```
+You can now skip to the sections [Evaluating a Model](#evaluating-a-model) and
+[Encoding Sentences](#encoding-sentences).
+## Training a Model
+### Prepare the Training Data
+To train a model you will need to provide training data in TFRecord format. The
+TFRecord format consists of a set of sharded files containing serialized
+`tf.Example` protocol buffers. Each `tf.Example` proto contains three
+sentences:
+  * `encode`: The sentence to encode.
+  * `decode_pre`: The sentence preceding `encode` in the original text.
+  * `decode_post`: The sentence following `encode` in the original text.
+Each sentence is a list of words. During preprocessing, a dictionary is created
+that assigns each word in the vocabulary to an integer-valued id. Each sentence
+is encoded as a list of integer word ids in the `tf.Example` protos.
+We have provided a script to preprocess any set of text-files into this format.
+You may wish to use the [BookCorpus](http://yknzhu.wixsite.com/mbweb) dataset.
+Note that the preprocessing script may take **12 hours** or more to complete
+on this large dataset.
+```shell
+# Comma-separated list of globs matching the input input files. The format of
+# the input files is assumed to be a list of newline-separated sentences, where
+# each sentence is already tokenized.
+INPUT_FILES="${HOME}/skip_thoughts/bookcorpus/*.txt"
+# Location to save the preprocessed training and validation data.
+DATA_DIR="${HOME}/skip_thoughts/data"
+# Build the preprocessing script.
+bazel build -c opt skip_thoughts/data/preprocess_dataset
+# Run the preprocessing script.
+bazel-bin/skip_thoughts/data/preprocess_dataset \
+  --input_files=${INPUT_FILES} \
+  --output_dir=${DATA_DIR}
+```
+When the script finishes you will find 100 training files and 1 validation file
+in `DATA_DIR`. The files will match the patterns `train-?????-of-00100` and
+`validation-00000-of-00001` respectively.
+The script will also produce a file named `vocab.txt`. The format of this file
+is a list of newline-separated words where the word id is the corresponding 0-
+based line index. Words are sorted by descending order of frequency in the input
+data. Only the top 20,000 words are assigned unique ids; all other words are
+assigned the "unknown id" of 1 in the processed data.
+### Run the Training Script
+Execute the following commands to start the training script. By default it will
+run for 500k steps (around 9 days on a GeForce GTX 1080 GPU).
+```shell
+# Directory containing the preprocessed data.
+DATA_DIR="${HOME}/skip_thoughts/data"
+# Directory to save the model.
+MODEL_DIR="${HOME}/skip_thoughts/model"
+# Build the model.
+bazel build -c opt skip_thoughts/...
+# Run the training script.
+bazel-bin/skip_thoughts/train \
+  --input_file_pattern="${DATA_DIR}/train-?????-of-00100" \
+  --train_dir="${MODEL_DIR}/train"
+```
+### Track Training Progress
+Optionally, you can run the `track_perplexity` script in a separate process.
+This will log per-word perplexity on the validation set which allows training
+progress to be monitored on
+[TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard).
+Note that you may run out of memory if you run the this script on the same GPU
+as the training script. You can set the environment variable
+`CUDA_VISIBLE_DEVICES=""` to force the script to run on CPU. If it runs too
+slowly on CPU, you can decrease the value of `--num_eval_examples`.
+```shell
+DATA_DIR="${HOME}/skip_thoughts/data"
+MODEL_DIR="${HOME}/skip_thoughts/model"
+# Ignore GPU devices (only necessary if your GPU is currently memory
+# constrained, for example, by running the training script).
+export CUDA_VISIBLE_DEVICES=""
+# Run the evaluation script. This will run in a loop, periodically loading the
+# latest model checkpoint file and computing evaluation metrics.
+bazel-bin/skip_thoughts/track_perplexity \
+  --input_file_pattern="${DATA_DIR}/validation-?????-of-00001" \
+  --checkpoint_dir="${MODEL_DIR}/train" \
+  --eval_dir="${MODEL_DIR}/val" \
+  --num_eval_examples=50000
+```
+If you started the `track_perplexity` script, run a
+[TensorBoard](https://www.tensorflow.org/get_started/summaries_and_tensorboard)
+server in a separate process for real-time monitoring of training summaries and
+validation perplexity.
+```shell
+MODEL_DIR="${HOME}/skip_thoughts/model"
+# Run a TensorBoard server.
+tensorboard --logdir="${MODEL_DIR}"
+```
+## Expanding the Vocabulary
+### Overview
+The vocabulary generated by the preprocessing script contains only 20,000 words
+which is insufficient for many tasks. For example, a sentence from Wikipedia
+might contain nouns that do not appear in this vocabulary.
+A solution to this problem described in the
+[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf)
+paper is to learn a mapping that transfers word representations from one model to
+another. This idea is based on the "Translation Matrix" method from the paper
+[Exploiting Similarities Among Languages for Machine Translation](https://arxiv.org/abs/1309.4168).
+Specifically, we will load the word embeddings from a trained *Skip-Thoughts*
+model and from a trained [word2vec model](https://arxiv.org/pdf/1301.3781.pdf)
+(which has a much larger vocabulary). We will train a linear regression model
+without regularization to learn a linear mapping from the word2vec embedding
+space to the *Skip-Thoughts* embedding space. We will then apply the linear
+model to all words in the word2vec vocabulary, yielding vectors in the *Skip-
+Thoughts* word embedding space for the union of the two vocabularies.
+The linear regression task is to learn a parameter matrix *W* to minimize
+*|| X - Y \* W ||<sup>2</sup>*, where *X* is a matrix of *Skip-Thoughts*
+embeddings of shape `[num_words, dim1]`, *Y* is a matrix of word2vec embeddings
+of shape `[num_words, dim2]`, and *W* is a matrix of shape `[dim2, dim1]`.
+### Preparation
+First you will need to download and unpack a pretrained
+[word2vec model](https://arxiv.org/pdf/1301.3781.pdf) from
+[this website](https://code.google.com/archive/p/word2vec/)
+([direct download link](https://drive.google.com/file/d/0B7XkCwpI5KDYNlNUTTlSS21pQmM/edit?usp=sharing)).
+This model was trained on the Google News dataset (about 100 billion words).
+Also ensure that you have already [installed gensim](https://radimrehurek.com/gensim/install.html).
+### Run the Vocabulary Expansion Script
+```shell
+# Path to checkpoint file or a directory containing checkpoint files (the script
+# will select the most recent).
+CHECKPOINT_PATH="${HOME}/skip_thoughts/model/train"
+# Vocabulary file generated by the preprocessing script.
+SKIP_THOUGHTS_VOCAB="${HOME}/skip_thoughts/data/vocab.txt"
+# Path to downloaded word2vec model.
+WORD2VEC_MODEL="${HOME}/skip_thoughts/googlenews/GoogleNews-vectors-negative300.bin"
+# Output directory.
+EXP_VOCAB_DIR="${HOME}/skip_thoughts/exp_vocab"
+# Build the vocabulary expansion script.
+bazel build -c opt skip_thoughts/vocabulary_expansion
+# Run the vocabulary expansion script.
+bazel-bin/skip_thoughts/vocabulary_expansion \
+  --skip_thoughts_model=${CHECKPOINT_PATH} \
+  --skip_thoughts_vocab=${SKIP_THOUGHTS_VOCAB} \
+  --word2vec_model=${WORD2VEC_MODEL} \
+  --output_dir=${EXP_VOCAB_DIR}
+```
+## Evaluating a Model
+### Overview
+The model can be evaluated using the benchmark tasks described in the
+[Skip-Thought Vectors](https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf)
+paper. The following tasks are suported (refer to the paper for full details):
+ * **SICK** semantic relatedness task.
+ * **MSRP** (Microsoft Research Paraphrase Corpus) paraphrase detection task.
+ * Binary classification tasks:
+   * **MR** movie review sentiment task.
+   * **CR** customer product review task.
+   * **SUBJ** subjectivity/objectivity task.
+   * **MPQA** opinion polarity task.
+   * **TREC** question-type classification task.
+### Preparation
+You will need to clone or download the
+[skip-thoughts GitHub repository](https://github.com/ryankiros/skip-thoughts) by
+[ryankiros](https://github.com/ryankiros) (the first author of the Skip-Thoughts
+paper):
+```shell
+# Folder to clone the repository to.
+ST_KIROS_DIR="${HOME}/skip_thoughts/skipthoughts_kiros"
+# Clone the repository.
+git clone git@github.com:ryankiros/skip-thoughts.git "${ST_KIROS_DIR}/skipthoughts"
+# Make the package importable.
+export PYTHONPATH="${ST_KIROS_DIR}/:${PYTHONPATH}"
+```
+You will also need to download the data needed for each evaluation task. See the
+instructions [here](https://github.com/ryankiros/skip-thoughts).
+For example, the CR (customer review) dataset is found [here](http://nlp.stanford.edu/~sidaw/home/projects:nbsvm). For this task we want the
+files `custrev.pos` and `custrev.neg`.
+### Run the Evaluation Tasks
+In the following example we will evaluate a unidirectional model ("uni-skip" in
+the paper) on the CR task. To use a bidirectional model ("bi-skip" in the
+paper),  simply pass the flags `--bi_vocab_file`, `--bi_embeddings_file` and
+`--bi_checkpoint_path` instead. To use the "combine-skip" model described in the
+paper you will need to pass both the unidirectional and bidirectional flags.
+```shell
+# Path to checkpoint file or a directory containing checkpoint files (the script
+# will select the most recent).
+CHECKPOINT_PATH="${HOME}/skip_thoughts/model/train"
+# Vocabulary file generated by the vocabulary expansion script.
+VOCAB_FILE="${HOME}/skip_thoughts/exp_vocab/vocab.txt"
+# Embeddings file generated by the vocabulary expansion script.
+EMBEDDINGS_FILE="${HOME}/skip_thoughts/exp_vocab/embeddings.npy"
+# Directory containing files custrev.pos and custrev.neg.
+EVAL_DATA_DIR="${HOME}/skip_thoughts/eval_data"
+# Build the evaluation script.
+bazel build -c opt skip_thoughts/evaluate
+# Run the evaluation script.
+bazel-bin/skip_thoughts/evaluate \
+  --eval_task=CR \
+  --data_dir=${EVAL_DATA_DIR} \
+  --uni_vocab_file=${VOCAB_FILE} \
+  --uni_embeddings_file=${EMBEDDINGS_FILE} \
+  --uni_checkpoint_path=${CHECKPOINT_PATH}
+```
+Output:
+```python
+[0.82539682539682535, 0.84084880636604775, 0.83023872679045096,
+ 0.86206896551724133, 0.83554376657824936, 0.85676392572944293,
+ 0.84084880636604775, 0.83023872679045096, 0.85145888594164454,
+ 0.82758620689655171]
+```
+The output is a list of accuracies of 10 cross-validation classification models.
+To get a single number, simply take the average:
+```python
+ipython  # Launch iPython.
+In [0]:
+import numpy as np
+np.mean([0.82539682539682535, 0.84084880636604775, 0.83023872679045096,
+         0.86206896551724133, 0.83554376657824936, 0.85676392572944293,
+         0.84084880636604775, 0.83023872679045096, 0.85145888594164454,
+         0.82758620689655171])
+Out [0]: 0.84009936423729525
+```
+## Encoding Sentences
+In this example we will encode data from the
+[movie review dataset](https://www.cs.cornell.edu/people/pabo/movie-review-data/)
+(specifically the [sentence polarity dataset v1.0](https://www.cs.cornell.edu/people/pabo/movie-review-data/rt-polaritydata.tar.gz)).
+```python
+ipython  # Launch iPython.
+In [0]:
+# Imports.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+import os.path
+import scipy.spatial.distance as sd
+from skip_thoughts import configuration
+from skip_thoughts import encoder_manager
+In [1]:
+# Set paths to the model.
+VOCAB_FILE = "/path/to/vocab.txt"
+EMBEDDING_MATRIX_FILE = "/path/to/embeddings.npy"
+CHECKPOINT_PATH = "/path/to/model.ckpt-9999"
+# The following directory should contain files rt-polarity.neg and
+# rt-polarity.pos.
+MR_DATA_DIR = "/dir/containing/mr/data"
+In [2]:
+# Set up the encoder. Here we are using a single unidirectional model.
+# To use a bidirectional model as well, call load_model() again with
+# configuration.model_config(bidirectional_encoder=True) and paths to the
+# bidirectional model's files. The encoder will use the concatenation of
+# all loaded models.
+encoder = encoder_manager.EncoderManager()
+encoder.load_model(configuration.model_config(),
+                   vocabulary_file=VOCAB_FILE,
+                   embedding_matrix_file=EMBEDDING_MATRIX_FILE,
+                   checkpoint_path=CHECKPOINT_PATH)
+In [3]:
+# Load the movie review dataset.
+data = []
+with open(os.path.join(MR_DATA_DIR, 'rt-polarity.neg'), 'rb') as f:
+  data.extend([line.decode('latin-1').strip() for line in f])
+with open(os.path.join(MR_DATA_DIR, 'rt-polarity.pos'), 'rb') as f:
+  data.extend([line.decode('latin-1').strip() for line in f])
+In [4]:
+# Generate Skip-Thought Vectors for each sentence in the dataset.
+encodings = encoder.encode(data)
+In [5]:
+# Define a helper function to generate nearest neighbors.
+def get_nn(ind, num=10):
+  encoding = encodings[ind]
+  scores = sd.cdist([encoding], encodings, "cosine")[0]
+  sorted_ids = np.argsort(scores)
+  print("Sentence:")
+  print("", data[ind])
+  print("\nNearest neighbors:")
+  for i in range(1, num + 1):
+    print(" %d. %s (%.3f)" %
+          (i, data[sorted_ids[i]], scores[sorted_ids[i]]))
+In [6]:
+# Compute nearest neighbors of the first sentence in the dataset.
+get_nn(0)
+```
+Output:
+```
+Sentence:
+ simplistic , silly and tedious .
+Nearest neighbors:
+ 1. trite , banal , cliched , mostly inoffensive . (0.247)
+ 2. banal and predictable . (0.253)
+ 3. witless , pointless , tasteless and idiotic . (0.272)
+ 4. loud , silly , stupid and pointless . (0.295)
+ 5. grating and tedious . (0.299)
+ 6. idiotic and ugly . (0.330)
+ 7. black-and-white and unrealistic . (0.335)
+ 8. hopelessly inane , humorless and under-inspired . (0.335)
+ 9. shallow , noisy and pretentious . (0.340)
+ 10. . . . unlikable , uninteresting , unfunny , and completely , utterly inept . (0.346)
+```
--- a/skip_thoughts/WORKSPACE
+++ b/skip_thoughts/WORKSPACE
--- a/skip_thoughts/skip_thoughts/BUILD
+++ b/skip_thoughts/skip_thoughts/BUILD
+package(default_visibility = [":internal"])
+licenses(["notice"])  # Apache 2.0
+exports_files(["LICENSE"])
+package_group(
+    name = "internal",
+    packages = [
+        "//skip_thoughts/...",
+    ],
+)
+py_library(
+    name = "configuration",
+    srcs = ["configuration.py"],
+    srcs_version = "PY2AND3",
+)
+py_library(
+    name = "skip_thoughts_model",
+    srcs = ["skip_thoughts_model.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        "//skip_thoughts/ops:gru_cell",
+        "//skip_thoughts/ops:input_ops",
+    ],
+)
+py_test(
+    name = "skip_thoughts_model_test",
+    size = "large",
+    srcs = ["skip_thoughts_model_test.py"],
+    deps = [
+        ":configuration",
+        ":skip_thoughts_model",
+    ],
+)
+py_binary(
+    name = "train",
+    srcs = ["train.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":configuration",
+        ":skip_thoughts_model",
+    ],
+)
+py_binary(
+    name = "track_perplexity",
+    srcs = ["track_perplexity.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":configuration",
+        ":skip_thoughts_model",
+    ],
+)
+py_binary(
+    name = "vocabulary_expansion",
+    srcs = ["vocabulary_expansion.py"],
+    srcs_version = "PY2AND3",
+)
+py_library(
+    name = "skip_thoughts_encoder",
+    srcs = ["skip_thoughts_encoder.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skip_thoughts_model",
+        "//skip_thoughts/data:special_words",
+    ],
+)
+py_library(
+    name = "encoder_manager",
+    srcs = ["encoder_manager.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":skip_thoughts_encoder",
+    ],
+)
+py_binary(
+    name = "evaluate",
+    srcs = ["evaluate.py"],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":encoder_manager",
+        "//skip_thoughts:configuration",
+    ],
+)
--- a/skip_thoughts/skip_thoughts/__init__.py
+++ b/skip_thoughts/skip_thoughts/__init__.py
--- a/skip_thoughts/skip_thoughts/configuration.py
+++ b/skip_thoughts/skip_thoughts/configuration.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Default configuration for model architecture and training."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+class _HParams(object):
+  """Wrapper for configuration parameters."""
+  pass
+def model_config(input_file_pattern=None,
+                 input_queue_capacity=640000,
+                 num_input_reader_threads=1,
+                 shuffle_input_data=True,
+                 uniform_init_scale=0.1,
+                 vocab_size=20000,
+                 batch_size=128,
+                 word_embedding_dim=620,
+                 bidirectional_encoder=False,
+                 encoder_dim=2400):
+  """Creates a model configuration object.
+  Args:
+    input_file_pattern: File pattern of sharded TFRecord files containing
+      tf.Example protobufs.
+    input_queue_capacity: Number of examples to keep in the input queue.
+    num_input_reader_threads: Number of threads for prefetching input
+      tf.Examples.
+    shuffle_input_data: Whether to shuffle the input data.
+    uniform_init_scale: Scale of random uniform initializer.
+    vocab_size: Number of unique words in the vocab.
+    batch_size: Batch size (training and evaluation only).
+    word_embedding_dim: Word embedding dimension.
+    bidirectional_encoder: Whether to use a bidirectional or unidirectional
+      encoder RNN.
+    encoder_dim: Number of output dimensions of the sentence encoder.
+  Returns:
+    An object containing model configuration parameters.
+  """
+  config = _HParams()
+  config.input_file_pattern = input_file_pattern
+  config.input_queue_capacity = input_queue_capacity
+  config.num_input_reader_threads = num_input_reader_threads
+  config.shuffle_input_data = shuffle_input_data
+  config.uniform_init_scale = uniform_init_scale
+  config.vocab_size = vocab_size
+  config.batch_size = batch_size
+  config.word_embedding_dim = word_embedding_dim
+  config.bidirectional_encoder = bidirectional_encoder
+  config.encoder_dim = encoder_dim
+  return config
+def training_config(learning_rate=0.0008,
+                    learning_rate_decay_factor=0.5,
+                    learning_rate_decay_steps=400000,
+                    number_of_steps=500000,
+                    clip_gradient_norm=5.0,
+                    save_model_secs=600,
+                    save_summaries_secs=600):
+  """Creates a training configuration object.
+  Args:
+    learning_rate: Initial learning rate.
+    learning_rate_decay_factor: If > 0, the learning rate decay factor.
+    learning_rate_decay_steps: The number of steps before the learning rate
+      decays by learning_rate_decay_factor.
+    number_of_steps: The total number of training steps to run. Passing None
+      will cause the training script to run indefinitely.
+    clip_gradient_norm: If not None, then clip gradients to this value.
+    save_model_secs: How often (in seconds) to save model checkpoints.
+    save_summaries_secs: How often (in seconds) to save model summaries.
+  Returns:
+    An object containing training configuration parameters.
+  Raises:
+    ValueError: If learning_rate_decay_factor is set and
+      learning_rate_decay_steps is unset.
+  """
+  if learning_rate_decay_factor and not learning_rate_decay_steps:
+    raise ValueError(
+        "learning_rate_decay_factor requires learning_rate_decay_steps.")
+  config = _HParams()
+  config.learning_rate = learning_rate
+  config.learning_rate_decay_factor = learning_rate_decay_factor
+  config.learning_rate_decay_steps = learning_rate_decay_steps
+  config.number_of_steps = number_of_steps
+  config.clip_gradient_norm = clip_gradient_norm
+  config.save_model_secs = save_model_secs
+  config.save_summaries_secs = save_summaries_secs
+  return config
--- a/skip_thoughts/skip_thoughts/data/BUILD
+++ b/skip_thoughts/skip_thoughts/data/BUILD
+package(default_visibility = ["//skip_thoughts:internal"])
+licenses(["notice"])  # Apache 2.0
+exports_files(["LICENSE"])
+py_library(
+    name = "special_words",
+    srcs = ["special_words.py"],
+    srcs_version = "PY2AND3",
+    deps = [],
+)
+py_binary(
+    name = "preprocess_dataset",
+    srcs = [
+        "preprocess_dataset.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":special_words",
+    ],
+)
--- a/skip_thoughts/skip_thoughts/data/__init__.py
+++ b/skip_thoughts/skip_thoughts/data/__init__.py