Merge pull request #1 from tensorflow/master

update to tensorflow/model master

Merge pull request #1 from tensorflow/master
update to tensorflow/model master
68a18b70 · Toby Boyd · GitHub · bc70271a · 2c4fea8d · 68a18b70
Commit 68a18b70 authored Jun 08, 2017 by Toby Boyd Committed by GitHub Jun 08, 2017
20 changed files
--- a/skip_thoughts/skip_thoughts/data/BUILD
+++ b/skip_thoughts/skip_thoughts/data/BUILD
+package(default_visibility = ["//skip_thoughts:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "special_words",
+    srcs = ["special_words.py"],
+    srcs_version = "PY2AND3",
+    deps = [],
+)
+
+py_binary(
+    name = "preprocess_dataset",
+    srcs = [
+        "preprocess_dataset.py",
+    ],
+    srcs_version = "PY2AND3",
+    deps = [
+        ":special_words",
+    ],
+)
--- a/skip_thoughts/skip_thoughts/data/__init__.py
+++ b/skip_thoughts/skip_thoughts/data/__init__.py
--- a/skip_thoughts/skip_thoughts/data/preprocess_dataset.py
+++ b/skip_thoughts/skip_thoughts/data/preprocess_dataset.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Converts a set of text files to TFRecord format with Example protos.
+
+Each Example proto in the output contains the following fields:
+
+  decode_pre: list of int64 ids corresponding to the "previous" sentence.
+  encode: list of int64 ids corresponding to the "current" sentence.
+  decode_post: list of int64 ids corresponding to the "post" sentence.
+
+In addition, the following files are generated:
+
+  vocab.txt: List of "<word> <id>" pairs, where <id> is the integer
+             encoding of <word> in the Example protos.
+  word_counts.txt: List of "<word> <count>" pairs, where <count> is the number
+                   of occurrences of <word> in the input files.
+
+The vocabulary of word ids is constructed from the top --num_words by word
+count. All other words get the <unk> word id.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import os
+
+
+import numpy as np
+import tensorflow as tf
+
+from skip_thoughts.data import special_words
+
+FLAGS = tf.flags.FLAGS
+
+tf.flags.DEFINE_string("input_files", None,
+                       "Comma-separated list of globs matching the input "
+                       "files. The format of the input files is assumed to be "
+                       "a list of newline-separated sentences, where each "
+                       "sentence is already tokenized.")
+
+tf.flags.DEFINE_string("vocab_file", "",
+                       "(Optional) existing vocab file. Otherwise, a new vocab "
+                       "file is created and written to the output directory. "
+                       "The file format is a list of newline-separated words, "
+                       "where the word id is the corresponding 0-based index "
+                       "in the file.")
+
+tf.flags.DEFINE_string("output_dir", None, "Output directory.")
+
+tf.flags.DEFINE_integer("train_output_shards", 100,
+                        "Number of output shards for the training set.")
+
+tf.flags.DEFINE_integer("validation_output_shards", 1,
+                        "Number of output shards for the validation set.")
+
+tf.flags.DEFINE_integer("num_validation_sentences", 50000,
+                        "Number of output shards for the validation set.")
+
+tf.flags.DEFINE_integer("num_words", 20000,
+                        "Number of words to include in the output.")
+
+tf.flags.DEFINE_integer("max_sentences", 0,
+                        "If > 0, the maximum number of sentences to output.")
+
+tf.flags.DEFINE_integer("max_sentence_length", 30,
+                        "If > 0, exclude sentences whose encode, decode_pre OR"
+                        "decode_post sentence exceeds this length.")
+
+tf.flags.DEFINE_boolean("add_eos", True,
+                        "Whether to add end-of-sentence ids to the output.")
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+def _build_vocabulary(input_files):
+  """Loads or builds the model vocabulary.
+
+  Args:
+    input_files: List of pre-tokenized input .txt files.
+
+  Returns:
+    vocab: A dictionary of word to id.
+  """
+  if FLAGS.vocab_file:
+    tf.logging.info("Loading existing vocab file.")
+    vocab = collections.OrderedDict()
+    with tf.gfile.GFile(FLAGS.vocab_file, mode="r") as f:
+      for i, line in enumerate(f):
+        word = line.decode("utf-8").strip()
+        assert word not in vocab, "Attempting to add word twice: %s" % word
+        vocab[word] = i
+    tf.logging.info("Read vocab of size %d from %s",
+                    len(vocab), FLAGS.vocab_file)
+    return vocab
+
+  tf.logging.info("Creating vocabulary.")
+  num = 0
+  wordcount = collections.Counter()
+  for input_file in input_files:
+    tf.logging.info("Processing file: %s", input_file)
+    for sentence in tf.gfile.FastGFile(input_file):
+      wordcount.update(sentence.split())
+
+      num += 1
+      if num % 1000000 == 0:
+        tf.logging.info("Processed %d sentences", num)
+
+  tf.logging.info("Processed %d sentences total", num)
+
+  words = wordcount.keys()
+  freqs = wordcount.values()
+  sorted_indices = np.argsort(freqs)[::-1]
+
+  vocab = collections.OrderedDict()
+  vocab[special_words.EOS] = special_words.EOS_ID
+  vocab[special_words.UNK] = special_words.UNK_ID
+  for w_id, w_index in enumerate(sorted_indices[0:FLAGS.num_words - 2]):
+    vocab[words[w_index]] = w_id + 2  # 0: EOS, 1: UNK.
+
+  tf.logging.info("Created vocab with %d words", len(vocab))
+
+  vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt")
+  with tf.gfile.FastGFile(vocab_file, "w") as f:
+    f.write("\n".join(vocab.keys()))
+  tf.logging.info("Wrote vocab file to %s", vocab_file)
+
+  word_counts_file = os.path.join(FLAGS.output_dir, "word_counts.txt")
+  with tf.gfile.FastGFile(word_counts_file, "w") as f:
+    for i in sorted_indices:
+      f.write("%s %d\n" % (words[i], freqs[i]))
+  tf.logging.info("Wrote word counts file to %s", word_counts_file)
+
+  return vocab
+
+
+def _int64_feature(value):
+  """Helper for creating an Int64 Feature."""
+  return tf.train.Feature(int64_list=tf.train.Int64List(
+      value=[int(v) for v in value]))
+
+
+def _sentence_to_ids(sentence, vocab):
+  """Helper for converting a sentence (list of words) to a list of ids."""
+  ids = [vocab.get(w, special_words.UNK_ID) for w in sentence]
+  if FLAGS.add_eos:
+    ids.append(special_words.EOS_ID)
+  return ids
+
+
+def _create_serialized_example(predecessor, current, successor, vocab):
+  """Helper for creating a serialized Example proto."""
+  example = tf.train.Example(features=tf.train.Features(feature={
+      "decode_pre": _int64_feature(_sentence_to_ids(predecessor, vocab)),
+      "encode": _int64_feature(_sentence_to_ids(current, vocab)),
+      "decode_post": _int64_feature(_sentence_to_ids(successor, vocab)),
+  }))
+
+  return example.SerializeToString()
+
+
+def _process_input_file(filename, vocab, stats):
+  """Processes the sentences in an input file.
+
+  Args:
+    filename: Path to a pre-tokenized input .txt file.
+    vocab: A dictionary of word to id.
+    stats: A Counter object for statistics.
+
+  Returns:
+    processed: A list of serialized Example protos
+  """
+  tf.logging.info("Processing input file: %s", filename)
+  processed = []
+
+  predecessor = None  # Predecessor sentence (list of words).
+  current = None  # Current sentence (list of words).
+  successor = None  # Successor sentence (list of words).
+
+  for successor_str in tf.gfile.FastGFile(filename):
+    stats.update(["sentences_seen"])
+    successor = successor_str.split()
+
+    # The first 2 sentences per file will be skipped.
+    if predecessor and current and successor:
+      stats.update(["sentences_considered"])
+
+      # Note that we are going to insert <EOS> later, so we only allow
+      # sentences with strictly less than max_sentence_length to pass.
+      if FLAGS.max_sentence_length and (
+          len(predecessor) >= FLAGS.max_sentence_length or len(current) >=
+          FLAGS.max_sentence_length or len(successor) >=
+          FLAGS.max_sentence_length):
+        stats.update(["sentences_too_long"])
+      else:
+        serialized = _create_serialized_example(predecessor, current, successor,
+                                                vocab)
+        processed.append(serialized)
+        stats.update(["sentences_output"])
+
+    predecessor = current
+    current = successor
+
+    sentences_seen = stats["sentences_seen"]
+    sentences_output = stats["sentences_output"]
+    if sentences_seen and sentences_seen % 100000 == 0:
+      tf.logging.info("Processed %d sentences (%d output)", sentences_seen,
+                      sentences_output)
+    if FLAGS.max_sentences and sentences_output >= FLAGS.max_sentences:
+      break
+
+  tf.logging.info("Completed processing file %s", filename)
+  return processed
+
+
+def _write_shard(filename, dataset, indices):
+  """Writes a TFRecord shard."""
+  with tf.python_io.TFRecordWriter(filename) as writer:
+    for j in indices:
+      writer.write(dataset[j])
+
+
+def _write_dataset(name, dataset, indices, num_shards):
+  """Writes a sharded TFRecord dataset.
+
+  Args:
+    name: Name of the dataset (e.g. "train").
+    dataset: List of serialized Example protos.
+    indices: List of indices of 'dataset' to be written.
+    num_shards: The number of output shards.
+  """
+  tf.logging.info("Writing dataset %s", name)
+  borders = np.int32(np.linspace(0, len(indices), num_shards + 1))
+  for i in range(num_shards):
+    filename = os.path.join(FLAGS.output_dir, "%s-%.5d-of-%.5d" % (name, i,
+                                                                   num_shards))
+    shard_indices = indices[borders[i]:borders[i + 1]]
+    _write_shard(filename, dataset, shard_indices)
+    tf.logging.info("Wrote dataset indices [%d, %d) to output shard %s",
+                    borders[i], borders[i + 1], filename)
+  tf.logging.info("Finished writing %d sentences in dataset %s.",
+                  len(indices), name)
+
+
+def main(unused_argv):
+  if not FLAGS.input_files:
+    raise ValueError("--input_files is required.")
+  if not FLAGS.output_dir:
+    raise ValueError("--output_dir is required.")
+
+  if not tf.gfile.IsDirectory(FLAGS.output_dir):
+    tf.gfile.MakeDirs(FLAGS.output_dir)
+
+  input_files = []
+  for pattern in FLAGS.input_files.split(","):
+    match = tf.gfile.Glob(FLAGS.input_files)
+    if not match:
+      raise ValueError("Found no files matching %s" % pattern)
+    input_files.extend(match)
+  tf.logging.info("Found %d input files.", len(input_files))
+
+  vocab = _build_vocabulary(input_files)
+
+  tf.logging.info("Generating dataset.")
+  stats = collections.Counter()
+  dataset = []
+  for filename in input_files:
+    dataset.extend(_process_input_file(filename, vocab, stats))
+    if FLAGS.max_sentences and stats["sentences_output"] >= FLAGS.max_sentences:
+      break
+
+  tf.logging.info("Generated dataset with %d sentences.", len(dataset))
+  for k, v in stats.items():
+    tf.logging.info("%s: %d", k, v)
+
+  tf.logging.info("Shuffling dataset.")
+  np.random.seed(123)
+  shuffled_indices = np.random.permutation(len(dataset))
+  val_indices = shuffled_indices[:FLAGS.num_validation_sentences]
+  train_indices = shuffled_indices[FLAGS.num_validation_sentences:]
+
+  _write_dataset("train", dataset, train_indices, FLAGS.train_output_shards)
+  _write_dataset("validation", dataset, val_indices,
+                 FLAGS.validation_output_shards)
+
+
+if __name__ == "__main__":
+  tf.app.run()
--- a/tutorials/rnn/seq2seq.py
+++ b/tutorials/rnn/seq2seq.py
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -12,11 +12,16 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Import seq2seq python ops for backward compatibility."""
+"""Special word constants.

-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
+NOTE: The ids of the EOS and UNK constants should not be modified. It is assumed
+that these always occupy the first two ids.
+"""

-raise ImportError(
-    "This module is deprecated. Use tf.contrib.legacy_seq2seq instead.")
+# End of sentence.
+EOS = "<eos>"
+EOS_ID = 0
+
+# Unknown.
+UNK = "<unk>"
+UNK_ID = 1
--- a/skip_thoughts/skip_thoughts/encoder_manager.py
+++ b/skip_thoughts/skip_thoughts/encoder_manager.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Manager class for loading and encoding with multiple skip-thoughts models.
+
+If multiple models are loaded at once then the encode() function returns the
+concatenation of the outputs of each model.
+
+Example usage:
+  manager = EncoderManager()
+  manager.load_model(model_config_1, vocabulary_file_1, embedding_matrix_file_1,
+                     checkpoint_path_1)
+  manager.load_model(model_config_2, vocabulary_file_2, embedding_matrix_file_2,
+                     checkpoint_path_2)
+  encodings = manager.encode(data)
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+
+import numpy as np
+import tensorflow as tf
+
+from skip_thoughts import skip_thoughts_encoder
+
+
+class EncoderManager(object):
+  """Manager class for loading and encoding with skip-thoughts models."""
+
+  def __init__(self):
+    self.encoders = []
+    self.sessions = []
+
+  def load_model(self, model_config, vocabulary_file, embedding_matrix_file,
+                 checkpoint_path):
+    """Loads a skip-thoughts model.
+
+    Args:
+      model_config: Object containing parameters for building the model.
+      vocabulary_file: Path to vocabulary file containing a list of newline-
+        separated words where the word id is the corresponding 0-based index in
+        the file.
+      embedding_matrix_file: Path to a serialized numpy array of shape
+        [vocab_size, embedding_dim].
+      checkpoint_path: SkipThoughtsModel checkpoint file or a directory
+        containing a checkpoint file.
+    """
+    tf.logging.info("Reading vocabulary from %s", vocabulary_file)
+    with tf.gfile.GFile(vocabulary_file, mode="r") as f:
+      lines = list(f.readlines())
+    reverse_vocab = [line.decode("utf-8").strip() for line in lines]
+    tf.logging.info("Loaded vocabulary with %d words.", len(reverse_vocab))
+
+    tf.logging.info("Loading embedding matrix from %s", embedding_matrix_file)
+    # Note: tf.gfile.GFile doesn't work here because np.load() calls f.seek()
+    # with 3 arguments.
+    with open(embedding_matrix_file, "r") as f:
+      embedding_matrix = np.load(f)
+    tf.logging.info("Loaded embedding matrix with shape %s",
+                    embedding_matrix.shape)
+
+    word_embeddings = collections.OrderedDict(
+        zip(reverse_vocab, embedding_matrix))
+
+    g = tf.Graph()
+    with g.as_default():
+      encoder = skip_thoughts_encoder.SkipThoughtsEncoder(word_embeddings)
+      restore_model = encoder.build_graph_from_config(model_config,
+                                                      checkpoint_path)
+
+    sess = tf.Session(graph=g)
+    restore_model(sess)
+
+    self.encoders.append(encoder)
+    self.sessions.append(sess)
+
+  def encode(self,
+             data,
+             use_norm=True,
+             verbose=False,
+             batch_size=128,
+             use_eos=False):
+    """Encodes a sequence of sentences as skip-thought vectors.
+
+    Args:
+      data: A list of input strings.
+      use_norm: If True, normalize output skip-thought vectors to unit L2 norm.
+      verbose: Whether to log every batch.
+      batch_size: Batch size for the RNN encoders.
+      use_eos: If True, append the end-of-sentence word to each input sentence.
+
+    Returns:
+      thought_vectors: A list of numpy arrays corresponding to 'data'.
+
+    Raises:
+      ValueError: If called before calling load_encoder.
+    """
+    if not self.encoders:
+      raise ValueError(
+          "Must call load_model at least once before calling encode.")
+
+    encoded = []
+    for encoder, sess in zip(self.encoders, self.sessions):
+      encoded.append(
+          np.array(
+              encoder.encode(
+                  sess,
+                  data,
+                  use_norm=use_norm,
+                  verbose=verbose,
+                  batch_size=batch_size,
+                  use_eos=use_eos)))
+
+    return np.concatenate(encoded, axis=1)
+
+  def close(self):
+    """Closes the active TensorFlow Sessions."""
+    for sess in self.sessions:
+      sess.close()
--- a/skip_thoughts/skip_thoughts/evaluate.py
+++ b/skip_thoughts/skip_thoughts/evaluate.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Script to evaluate a skip-thoughts model.
+
+This script can evaluate a model with a unidirectional encoder ("uni-skip" in
+the paper); or a model with a bidirectional encoder ("bi-skip"); or the
+combination of a model with a unidirectional encoder and a model with a
+bidirectional encoder ("combine-skip").
+
+The uni-skip model (if it exists) is specified by the flags
+--uni_vocab_file, --uni_embeddings_file, --uni_checkpoint_path.
+
+The bi-skip model (if it exists) is specified by the flags
+--bi_vocab_file, --bi_embeddings_path, --bi_checkpoint_path.
+
+The evaluation tasks have different running times. SICK may take 5-10 minutes.
+MSRP, TREC and CR may take 20-60 minutes. SUBJ, MPQA and MR may take 2+ hours.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+from skipthoughts import eval_classification
+from skipthoughts import eval_msrp
+from skipthoughts import eval_sick
+from skipthoughts import eval_trec
+import tensorflow as tf
+
+from skip_thoughts import configuration
+from skip_thoughts import encoder_manager
+
+FLAGS = tf.flags.FLAGS
+
+tf.flags.DEFINE_string("eval_task", "CR",
+                       "Name of the evaluation task to run. Available tasks: "
+                       "MR, CR, SUBJ, MPQA, SICK, MSRP, TREC.")
+
+tf.flags.DEFINE_string("data_dir", None, "Directory containing training data.")
+
+tf.flags.DEFINE_string("uni_vocab_file", None,
+                       "Path to vocabulary file containing a list of newline-"
+                       "separated words where the word id is the "
+                       "corresponding 0-based index in the file.")
+tf.flags.DEFINE_string("bi_vocab_file", None,
+                       "Path to vocabulary file containing a list of newline-"
+                       "separated words where the word id is the "
+                       "corresponding 0-based index in the file.")
+
+tf.flags.DEFINE_string("uni_embeddings_file", None,
+                       "Path to serialized numpy array of shape "
+                       "[vocab_size, embedding_dim].")
+tf.flags.DEFINE_string("bi_embeddings_file", None,
+                       "Path to serialized numpy array of shape "
+                       "[vocab_size, embedding_dim].")
+
+tf.flags.DEFINE_string("uni_checkpoint_path", None,
+                       "Checkpoint file or directory containing a checkpoint "
+                       "file.")
+tf.flags.DEFINE_string("bi_checkpoint_path", None,
+                       "Checkpoint file or directory containing a checkpoint "
+                       "file.")
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+def main(unused_argv):
+  if not FLAGS.data_dir:
+    raise ValueError("--data_dir is required.")
+
+  encoder = encoder_manager.EncoderManager()
+
+  # Maybe load unidirectional encoder.
+  if FLAGS.uni_checkpoint_path:
+    print("Loading unidirectional model...")
+    uni_config = configuration.model_config()
+    encoder.load_model(uni_config, FLAGS.uni_vocab_file,
+                       FLAGS.uni_embeddings_file, FLAGS.uni_checkpoint_path)
+
+  # Maybe load bidirectional encoder.
+  if FLAGS.bi_checkpoint_path:
+    print("Loading bidirectional model...")
+    bi_config = configuration.model_config(bidirectional_encoder=True)
+    encoder.load_model(bi_config, FLAGS.bi_vocab_file, FLAGS.bi_embeddings_file,
+                       FLAGS.bi_checkpoint_path)
+
+  if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]:
+    eval_classification.eval_nested_kfold(
+        encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False)
+  elif FLAGS.eval_task == "SICK":
+    eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir)
+  elif FLAGS.eval_task == "MSRP":
+    eval_msrp.evaluate(
+        encoder, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir)
+  elif FLAGS.eval_task == "TREC":
+    eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir)
+  else:
+    raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task)
+
+  encoder.close()
+
+
+if __name__ == "__main__":
+  tf.app.run()
--- a/skip_thoughts/skip_thoughts/ops/BUILD
+++ b/skip_thoughts/skip_thoughts/ops/BUILD
+package(default_visibility = ["//skip_thoughts:internal"])
+
+licenses(["notice"])  # Apache 2.0
+
+exports_files(["LICENSE"])
+
+py_library(
+    name = "input_ops",
+    srcs = ["input_ops.py"],
+    srcs_version = "PY2AND3",
+)
+
+py_library(
+    name = "gru_cell",
+    srcs = ["gru_cell.py"],
+    srcs_version = "PY2AND3",
+)
--- a/skip_thoughts/skip_thoughts/ops/__init__.py
+++ b/skip_thoughts/skip_thoughts/ops/__init__.py
--- a/skip_thoughts/skip_thoughts/ops/gru_cell.py
+++ b/skip_thoughts/skip_thoughts/ops/gru_cell.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""GRU cell implementation for the skip-thought vectors model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import tensorflow as tf
+
+_layer_norm = tf.contrib.layers.layer_norm
+
+
+class LayerNormGRUCell(tf.contrib.rnn.RNNCell):
+  """GRU cell with layer normalization.
+
+  The layer normalization implementation is based on:
+
+    https://arxiv.org/abs/1607.06450.
+
+  "Layer Normalization"
+  Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
+  """
+
+  def __init__(self,
+               num_units,
+               w_initializer,
+               u_initializer,
+               b_initializer,
+               activation=tf.nn.tanh):
+    """Initializes the cell.
+
+    Args:
+      num_units: Number of cell units.
+      w_initializer: Initializer for the "W" (input) parameter matrices.
+      u_initializer: Initializer for the "U" (recurrent) parameter matrices.
+      b_initializer: Initializer for the "b" (bias) parameter vectors.
+      activation: Cell activation function.
+    """
+    self._num_units = num_units
+    self._w_initializer = w_initializer
+    self._u_initializer = u_initializer
+    self._b_initializer = b_initializer
+    self._activation = activation
+
+  @property
+  def state_size(self):
+    return self._num_units
+
+  @property
+  def output_size(self):
+    return self._num_units
+
+  def _w_h_initializer(self):
+    """Returns an initializer for the "W_h" parameter matrix.
+
+    See equation (23) in the paper. The "W_h" parameter matrix is the
+    concatenation of two parameter submatrices. The matrix returned is
+    [U_z, U_r].
+
+    Returns:
+      A Tensor with shape [num_units, 2 * num_units] as described above.
+    """
+
+    def _initializer(shape, dtype=tf.float32, partition_info=None):
+      num_units = self._num_units
+      assert shape == [num_units, 2 * num_units]
+      u_z = self._u_initializer([num_units, num_units], dtype, partition_info)
+      u_r = self._u_initializer([num_units, num_units], dtype, partition_info)
+      return tf.concat([u_z, u_r], 1)
+
+    return _initializer
+
+  def _w_x_initializer(self, input_dim):
+    """Returns an initializer for the "W_x" parameter matrix.
+
+    See equation (23) in the paper. The "W_x" parameter matrix is the
+    concatenation of two parameter submatrices. The matrix returned is
+    [W_z, W_r].
+
+    Args:
+      input_dim: The dimension of the cell inputs.
+
+    Returns:
+      A Tensor with shape [input_dim, 2 * num_units] as described above.
+    """
+
+    def _initializer(shape, dtype=tf.float32, partition_info=None):
+      num_units = self._num_units
+      assert shape == [input_dim, 2 * num_units]
+      w_z = self._w_initializer([input_dim, num_units], dtype, partition_info)
+      w_r = self._w_initializer([input_dim, num_units], dtype, partition_info)
+      return tf.concat([w_z, w_r], 1)
+
+    return _initializer
+
+  def __call__(self, inputs, state, scope=None):
+    """GRU cell with layer normalization."""
+    input_dim = inputs.get_shape().as_list()[1]
+    num_units = self._num_units
+
+    with tf.variable_scope(scope or "gru_cell"):
+      with tf.variable_scope("gates"):
+        w_h = tf.get_variable(
+            "w_h", [num_units, 2 * num_units],
+            initializer=self._w_h_initializer())
+        w_x = tf.get_variable(
+            "w_x", [input_dim, 2 * num_units],
+            initializer=self._w_x_initializer(input_dim))
+        z_and_r = (_layer_norm(tf.matmul(state, w_h), scope="layer_norm/w_h") +
+                   _layer_norm(tf.matmul(inputs, w_x), scope="layer_norm/w_x"))
+        z, r = tf.split(tf.sigmoid(z_and_r), 2, 1)
+      with tf.variable_scope("candidate"):
+        w = tf.get_variable(
+            "w", [input_dim, num_units], initializer=self._w_initializer)
+        u = tf.get_variable(
+            "u", [num_units, num_units], initializer=self._u_initializer)
+        h_hat = (r * _layer_norm(tf.matmul(state, u), scope="layer_norm/u") +
+                 _layer_norm(tf.matmul(inputs, w), scope="layer_norm/w"))
+      new_h = (1 - z) * state + z * self._activation(h_hat)
+    return new_h, new_h
--- a/skip_thoughts/skip_thoughts/ops/input_ops.py
+++ b/skip_thoughts/skip_thoughts/ops/input_ops.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Input ops."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+
+
+import tensorflow as tf
+
+# A SentenceBatch is a pair of Tensors:
+#  ids: Batch of input sentences represented as sequences of word ids: an int64
+#    Tensor with shape [batch_size, padded_length].
+#  mask: Boolean mask distinguishing real words (1) from padded words (0): an
+#    int32 Tensor with shape [batch_size, padded_length].
+SentenceBatch = collections.namedtuple("SentenceBatch", ("ids", "mask"))
+
+
+def parse_example_batch(serialized):
+  """Parses a batch of tf.Example protos.
+
+  Args:
+    serialized: A 1-D string Tensor; a batch of serialized tf.Example protos.
+  Returns:
+    encode: A SentenceBatch of encode sentences.
+    decode_pre: A SentenceBatch of "previous" sentences to decode.
+    decode_post: A SentenceBatch of "post" sentences to decode.
+  """
+  features = tf.parse_example(
+      serialized,
+      features={
+          "encode": tf.VarLenFeature(dtype=tf.int64),
+          "decode_pre": tf.VarLenFeature(dtype=tf.int64),
+          "decode_post": tf.VarLenFeature(dtype=tf.int64),
+      })
+
+  def _sparse_to_batch(sparse):
+    ids = tf.sparse_tensor_to_dense(sparse)  # Padding with zeroes.
+    mask = tf.sparse_to_dense(sparse.indices, sparse.dense_shape,
+                              tf.ones_like(sparse.values, dtype=tf.int32))
+    return SentenceBatch(ids=ids, mask=mask)
+
+  output_names = ("encode", "decode_pre", "decode_post")
+  return tuple(_sparse_to_batch(features[x]) for x in output_names)
+
+
+def prefetch_input_data(reader,
+                        file_pattern,
+                        shuffle,
+                        capacity,
+                        num_reader_threads=1):
+  """Prefetches string values from disk into an input queue.
+
+  Args:
+    reader: Instance of tf.ReaderBase.
+    file_pattern: Comma-separated list of file patterns (e.g.
+        "/tmp/train_data-?????-of-00100", where '?' acts as a wildcard that
+        matches any character).
+    shuffle: Boolean; whether to randomly shuffle the input data.
+    capacity: Queue capacity (number of records).
+    num_reader_threads: Number of reader threads feeding into the queue.
+
+  Returns:
+    A Queue containing prefetched string values.
+  """
+  data_files = []
+  for pattern in file_pattern.split(","):
+    data_files.extend(tf.gfile.Glob(pattern))
+  if not data_files:
+    tf.logging.fatal("Found no input files matching %s", file_pattern)
+  else:
+    tf.logging.info("Prefetching values from %d files matching %s",
+                    len(data_files), file_pattern)
+
+  filename_queue = tf.train.string_input_producer(
+      data_files, shuffle=shuffle, capacity=16, name="filename_queue")
+
+  if shuffle:
+    min_after_dequeue = int(0.6 * capacity)
+    values_queue = tf.RandomShuffleQueue(
+        capacity=capacity,
+        min_after_dequeue=min_after_dequeue,
+        dtypes=[tf.string],
+        shapes=[[]],
+        name="random_input_queue")
+  else:
+    values_queue = tf.FIFOQueue(
+        capacity=capacity,
+        dtypes=[tf.string],
+        shapes=[[]],
+        name="fifo_input_queue")
+
+  enqueue_ops = []
+  for _ in range(num_reader_threads):
+    _, value = reader.read(filename_queue)
+    enqueue_ops.append(values_queue.enqueue([value]))
+  tf.train.queue_runner.add_queue_runner(
+      tf.train.queue_runner.QueueRunner(values_queue, enqueue_ops))
+  tf.summary.scalar("queue/%s/fraction_of_%d_full" % (values_queue.name,
+                                                      capacity),
+                    tf.cast(values_queue.size(), tf.float32) * (1.0 / capacity))
+
+  return values_queue
--- a/skip_thoughts/skip_thoughts/skip_thoughts_encoder.py
+++ b/skip_thoughts/skip_thoughts/skip_thoughts_encoder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Class for encoding text using a trained SkipThoughtsModel.
+
+Example usage:
+  g = tf.Graph()
+  with g.as_default():
+    encoder = SkipThoughtsEncoder(embeddings)
+    restore_fn = encoder.build_graph_from_config(model_config, checkpoint_path)
+
+  with tf.Session(graph=g) as sess:
+    restore_fn(sess)
+    skip_thought_vectors = encoder.encode(sess, data)
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os.path
+
+
+import nltk
+import nltk.tokenize
+import numpy as np
+import tensorflow as tf
+
+from skip_thoughts import skip_thoughts_model
+from skip_thoughts.data import special_words
+
+
+def _pad(seq, target_len):
+  """Pads a sequence of word embeddings up to the target length.
+
+  Args:
+    seq: Sequence of word embeddings.
+    target_len: Desired padded sequence length.
+
+  Returns:
+    embeddings: Input sequence padded with zero embeddings up to the target
+      length.
+    mask: A 0/1 vector with zeros corresponding to padded embeddings.
+
+  Raises:
+    ValueError: If len(seq) is not in the interval (0, target_len].
+  """
+  seq_len = len(seq)
+  if seq_len <= 0 or seq_len > target_len:
+    raise ValueError("Expected 0 < len(seq) <= %d, got %d" % (target_len,
+                                                              seq_len))
+
+  emb_dim = seq[0].shape[0]
+  padded_seq = np.zeros(shape=(target_len, emb_dim), dtype=seq[0].dtype)
+  mask = np.zeros(shape=(target_len,), dtype=np.int8)
+  for i in range(seq_len):
+    padded_seq[i] = seq[i]
+    mask[i] = 1
+  return padded_seq, mask
+
+
+def _batch_and_pad(sequences):
+  """Batches and pads sequences of word embeddings into a 2D array.
+
+  Args:
+    sequences: A list of batch_size sequences of word embeddings.
+
+  Returns:
+    embeddings: A numpy array with shape [batch_size, padded_length, emb_dim].
+    mask: A numpy 0/1 array with shape [batch_size, padded_length] with zeros
+      corresponding to padded elements.
+  """
+  batch_embeddings = []
+  batch_mask = []
+  batch_len = max([len(seq) for seq in sequences])
+  for seq in sequences:
+    embeddings, mask = _pad(seq, batch_len)
+    batch_embeddings.append(embeddings)
+    batch_mask.append(mask)
+  return np.array(batch_embeddings), np.array(batch_mask)
+
+
+class SkipThoughtsEncoder(object):
+  """Skip-thoughts sentence encoder."""
+
+  def __init__(self, embeddings):
+    """Initializes the encoder.
+
+    Args:
+      embeddings: Dictionary of word to embedding vector (1D numpy array).
+    """
+    self._sentence_detector = nltk.data.load("tokenizers/punkt/english.pickle")
+    self._embeddings = embeddings
+
+  def _create_restore_fn(self, checkpoint_path, saver):
+    """Creates a function that restores a model from checkpoint.
+
+    Args:
+      checkpoint_path: Checkpoint file or a directory containing a checkpoint
+        file.
+      saver: Saver for restoring variables from the checkpoint file.
+
+    Returns:
+      restore_fn: A function such that restore_fn(sess) loads model variables
+        from the checkpoint file.
+
+    Raises:
+      ValueError: If checkpoint_path does not refer to a checkpoint file or a
+        directory containing a checkpoint file.
+    """
+    if tf.gfile.IsDirectory(checkpoint_path):
+      latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
+      if not latest_checkpoint:
+        raise ValueError("No checkpoint file found in: %s" % checkpoint_path)
+      checkpoint_path = latest_checkpoint
+
+    def _restore_fn(sess):
+      tf.logging.info("Loading model from checkpoint: %s", checkpoint_path)
+      saver.restore(sess, checkpoint_path)
+      tf.logging.info("Successfully loaded checkpoint: %s",
+                      os.path.basename(checkpoint_path))
+
+    return _restore_fn
+
+  def build_graph_from_config(self, model_config, checkpoint_path):
+    """Builds the inference graph from a configuration object.
+
+    Args:
+      model_config: Object containing configuration for building the model.
+      checkpoint_path: Checkpoint file or a directory containing a checkpoint
+        file.
+
+    Returns:
+      restore_fn: A function such that restore_fn(sess) loads model variables
+        from the checkpoint file.
+    """
+    tf.logging.info("Building model.")
+    model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="encode")
+    model.build()
+    saver = tf.train.Saver()
+
+    return self._create_restore_fn(checkpoint_path, saver)
+
+  def build_graph_from_proto(self, graph_def_file, saver_def_file,
+                             checkpoint_path):
+    """Builds the inference graph from serialized GraphDef and SaverDef protos.
+
+    Args:
+      graph_def_file: File containing a serialized GraphDef proto.
+      saver_def_file: File containing a serialized SaverDef proto.
+      checkpoint_path: Checkpoint file or a directory containing a checkpoint
+        file.
+
+    Returns:
+      restore_fn: A function such that restore_fn(sess) loads model variables
+        from the checkpoint file.
+    """
+    # Load the Graph.
+    tf.logging.info("Loading GraphDef from file: %s", graph_def_file)
+    graph_def = tf.GraphDef()
+    with tf.gfile.FastGFile(graph_def_file, "rb") as f:
+      graph_def.ParseFromString(f.read())
+    tf.import_graph_def(graph_def, name="")
+
+    # Load the Saver.
+    tf.logging.info("Loading SaverDef from file: %s", saver_def_file)
+    saver_def = tf.train.SaverDef()
+    with tf.gfile.FastGFile(saver_def_file, "rb") as f:
+      saver_def.ParseFromString(f.read())
+    saver = tf.train.Saver(saver_def=saver_def)
+
+    return self._create_restore_fn(checkpoint_path, saver)
+
+  def _tokenize(self, item):
+    """Tokenizes an input string into a list of words."""
+    tokenized = []
+    for s in self._sentence_detector.tokenize(item):
+      tokenized.extend(nltk.tokenize.word_tokenize(s))
+
+    return tokenized
+
+  def _word_to_embedding(self, w):
+    """Returns the embedding of a word."""
+    return self._embeddings.get(w, self._embeddings[special_words.UNK])
+
+  def _preprocess(self, data, use_eos):
+    """Preprocesses text for the encoder.
+
+    Args:
+      data: A list of input strings.
+      use_eos: Whether to append the end-of-sentence word to each sentence.
+
+    Returns:
+      embeddings: A list of word embedding sequences corresponding to the input
+        strings.
+    """
+    preprocessed_data = []
+    for item in data:
+      tokenized = self._tokenize(item)
+      if use_eos:
+        tokenized.append(special_words.EOS)
+      preprocessed_data.append([self._word_to_embedding(w) for w in tokenized])
+    return preprocessed_data
+
+  def encode(self,
+             sess,
+             data,
+             use_norm=True,
+             verbose=True,
+             batch_size=128,
+             use_eos=False):
+    """Encodes a sequence of sentences as skip-thought vectors.
+
+    Args:
+      sess: TensorFlow Session.
+      data: A list of input strings.
+      use_norm: Whether to normalize skip-thought vectors to unit L2 norm.
+      verbose: Whether to log every batch.
+      batch_size: Batch size for the encoder.
+      use_eos: Whether to append the end-of-sentence word to each input
+        sentence.
+
+    Returns:
+      thought_vectors: A list of numpy arrays corresponding to the skip-thought
+        encodings of sentences in 'data'.
+    """
+    data = self._preprocess(data, use_eos)
+    thought_vectors = []
+
+    batch_indices = np.arange(0, len(data), batch_size)
+    for batch, start_index in enumerate(batch_indices):
+      if verbose:
+        tf.logging.info("Batch %d / %d.", batch, len(batch_indices))
+
+      embeddings, mask = _batch_and_pad(
+          data[start_index:start_index + batch_size])
+      feed_dict = {
+          "encode_emb:0": embeddings,
+          "encode_mask:0": mask,
+      }
+      thought_vectors.extend(
+          sess.run("encoder/thought_vectors:0", feed_dict=feed_dict))
+
+    if use_norm:
+      thought_vectors = [v / np.linalg.norm(v) for v in thought_vectors]
+
+    return thought_vectors
--- a/skip_thoughts/skip_thoughts/skip_thoughts_model.py
+++ b/skip_thoughts/skip_thoughts/skip_thoughts_model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Skip-Thoughts model for learning sentence vectors.
+
+The model is based on the paper:
+
+  "Skip-Thought Vectors"
+  Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, Richard S. Zemel,
+  Antonio Torralba, Raquel Urtasun, Sanja Fidler.
+  https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf
+
+Layer normalization is applied based on the paper:
+
+  "Layer Normalization"
+  Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
+  https://arxiv.org/abs/1607.06450
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import tensorflow as tf
+
+from skip_thoughts.ops import gru_cell
+from skip_thoughts.ops import input_ops
+
+
+def random_orthonormal_initializer(shape, dtype=tf.float32,
+                                   partition_info=None):  # pylint: disable=unused-argument
+  """Variable initializer that produces a random orthonormal matrix."""
+  if len(shape) != 2 or shape[0] != shape[1]:
+    raise ValueError("Expecting square shape, got %s" % shape)
+  _, u, _ = tf.svd(tf.random_normal(shape, dtype=dtype), full_matrices=True)
+  return u
+
+
+class SkipThoughtsModel(object):
+  """Skip-thoughts model."""
+
+  def __init__(self, config, mode="train", input_reader=None):
+    """Basic setup. The actual TensorFlow graph is constructed in build().
+
+    Args:
+      config: Object containing configuration parameters.
+      mode: "train", "eval" or "encode".
+      input_reader: Subclass of tf.ReaderBase for reading the input serialized
+        tf.Example protocol buffers. Defaults to TFRecordReader.
+
+    Raises:
+      ValueError: If mode is invalid.
+    """
+    if mode not in ["train", "eval", "encode"]:
+      raise ValueError("Unrecognized mode: %s" % mode)
+
+    self.config = config
+    self.mode = mode
+    self.reader = input_reader if input_reader else tf.TFRecordReader()
+
+    # Initializer used for non-recurrent weights.
+    self.uniform_initializer = tf.random_uniform_initializer(
+        minval=-self.config.uniform_init_scale,
+        maxval=self.config.uniform_init_scale)
+
+    # Input sentences represented as sequences of word ids. "encode" is the
+    # source sentence, "decode_pre" is the previous sentence and "decode_post"
+    # is the next sentence.
+    # Each is an int64 Tensor with  shape [batch_size, padded_length].
+    self.encode_ids = None
+    self.decode_pre_ids = None
+    self.decode_post_ids = None
+
+    # Boolean masks distinguishing real words (1) from padded words (0).
+    # Each is an int32 Tensor with shape [batch_size, padded_length].
+    self.encode_mask = None
+    self.decode_pre_mask = None
+    self.decode_post_mask = None
+
+    # Input sentences represented as sequences of word embeddings.
+    # Each is a float32 Tensor with shape [batch_size, padded_length, emb_dim].
+    self.encode_emb = None
+    self.decode_pre_emb = None
+    self.decode_post_emb = None
+
+    # The output from the sentence encoder.
+    # A float32 Tensor with shape [batch_size, num_gru_units].
+    self.thought_vectors = None
+
+    # The cross entropy losses and corresponding weights of the decoders. Used
+    # for evaluation.
+    self.target_cross_entropy_losses = []
+    self.target_cross_entropy_loss_weights = []
+
+    # The total loss to optimize.
+    self.total_loss = None
+
+  def build_inputs(self):
+    """Builds the ops for reading input data.
+
+    Outputs:
+      self.encode_ids
+      self.decode_pre_ids
+      self.decode_post_ids
+      self.encode_mask
+      self.decode_pre_mask
+      self.decode_post_mask
+    """
+    if self.mode == "encode":
+      # Word embeddings are fed from an external vocabulary which has possibly
+      # been expanded (see vocabulary_expansion.py).
+      encode_ids = None
+      decode_pre_ids = None
+      decode_post_ids = None
+      encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask")
+      decode_pre_mask = None
+      decode_post_mask = None
+    else:
+      # Prefetch serialized tf.Example protos.
+      input_queue = input_ops.prefetch_input_data(
+          self.reader,
+          self.config.input_file_pattern,
+          shuffle=self.config.shuffle_input_data,
+          capacity=self.config.input_queue_capacity,
+          num_reader_threads=self.config.num_input_reader_threads)
+
+      # Deserialize a batch.
+      serialized = input_queue.dequeue_many(self.config.batch_size)
+      encode, decode_pre, decode_post = input_ops.parse_example_batch(
+          serialized)
+
+      encode_ids = encode.ids
+      decode_pre_ids = decode_pre.ids
+      decode_post_ids = decode_post.ids
+
+      encode_mask = encode.mask
+      decode_pre_mask = decode_pre.mask
+      decode_post_mask = decode_post.mask
+
+    self.encode_ids = encode_ids
+    self.decode_pre_ids = decode_pre_ids
+    self.decode_post_ids = decode_post_ids
+
+    self.encode_mask = encode_mask
+    self.decode_pre_mask = decode_pre_mask
+    self.decode_post_mask = decode_post_mask
+
+  def build_word_embeddings(self):
+    """Builds the word embeddings.
+
+    Inputs:
+      self.encode_ids
+      self.decode_pre_ids
+      self.decode_post_ids
+
+    Outputs:
+      self.encode_emb
+      self.decode_pre_emb
+      self.decode_post_emb
+    """
+    if self.mode == "encode":
+      # Word embeddings are fed from an external vocabulary which has possibly
+      # been expanded (see vocabulary_expansion.py).
+      encode_emb = tf.placeholder(tf.float32, (
+          None, None, self.config.word_embedding_dim), "encode_emb")
+      # No sequences to decode.
+      decode_pre_emb = None
+      decode_post_emb = None
+    else:
+      word_emb = tf.get_variable(
+          name="word_embedding",
+          shape=[self.config.vocab_size, self.config.word_embedding_dim],
+          initializer=self.uniform_initializer)
+
+      encode_emb = tf.nn.embedding_lookup(word_emb, self.encode_ids)
+      decode_pre_emb = tf.nn.embedding_lookup(word_emb, self.decode_pre_ids)
+      decode_post_emb = tf.nn.embedding_lookup(word_emb, self.decode_post_ids)
+
+    self.encode_emb = encode_emb
+    self.decode_pre_emb = decode_pre_emb
+    self.decode_post_emb = decode_post_emb
+
+  def _initialize_gru_cell(self, num_units):
+    """Initializes a GRU cell.
+
+    The Variables of the GRU cell are initialized in a way that exactly matches
+    the skip-thoughts paper: recurrent weights are initialized from random
+    orthonormal matrices and non-recurrent weights are initialized from random
+    uniform matrices.
+
+    Args:
+      num_units: Number of output units.
+
+    Returns:
+      cell: An instance of RNNCell with variable initializers that match the
+        skip-thoughts paper.
+    """
+    return gru_cell.LayerNormGRUCell(
+        num_units,
+        w_initializer=self.uniform_initializer,
+        u_initializer=random_orthonormal_initializer,
+        b_initializer=tf.constant_initializer(0.0))
+
+  def build_encoder(self):
+    """Builds the sentence encoder.
+
+    Inputs:
+      self.encode_emb
+      self.encode_mask
+
+    Outputs:
+      self.thought_vectors
+
+    Raises:
+      ValueError: if config.bidirectional_encoder is True and config.encoder_dim
+        is odd.
+    """
+    with tf.variable_scope("encoder") as scope:
+      length = tf.to_int32(tf.reduce_sum(self.encode_mask, 1), name="length")
+
+      if self.config.bidirectional_encoder:
+        if self.config.encoder_dim % 2:
+          raise ValueError(
+              "encoder_dim must be even when using a bidirectional encoder.")
+        num_units = self.config.encoder_dim // 2
+        cell_fw = self._initialize_gru_cell(num_units)  # Forward encoder
+        cell_bw = self._initialize_gru_cell(num_units)  # Backward encoder
+        _, states = tf.nn.bidirectional_dynamic_rnn(
+            cell_fw=cell_fw,
+            cell_bw=cell_bw,
+            inputs=self.encode_emb,
+            sequence_length=length,
+            dtype=tf.float32,
+            scope=scope)
+        thought_vectors = tf.concat(states, 1, name="thought_vectors")
+      else:
+        cell = self._initialize_gru_cell(self.config.encoder_dim)
+        _, state = tf.nn.dynamic_rnn(
+            cell=cell,
+            inputs=self.encode_emb,
+            sequence_length=length,
+            dtype=tf.float32,
+            scope=scope)
+        # Use an identity operation to name the Tensor in the Graph.
+        thought_vectors = tf.identity(state, name="thought_vectors")
+
+    self.thought_vectors = thought_vectors
+
+  def _build_decoder(self, name, embeddings, targets, mask, initial_state,
+                     reuse_logits):
+    """Builds a sentence decoder.
+
+    Args:
+      name: Decoder name.
+      embeddings: Batch of sentences to decode; a float32 Tensor with shape
+        [batch_size, padded_length, emb_dim].
+      targets: Batch of target word ids; an int64 Tensor with shape
+        [batch_size, padded_length].
+      mask: A 0/1 Tensor with shape [batch_size, padded_length].
+      initial_state: Initial state of the GRU. A float32 Tensor with shape
+        [batch_size, num_gru_cells].
+      reuse_logits: Whether to reuse the logits weights.
+    """
+    # Decoder RNN.
+    cell = self._initialize_gru_cell(self.config.encoder_dim)
+    with tf.variable_scope(name) as scope:
+      # Add a padding word at the start of each sentence (to correspond to the
+      # prediction of the first word) and remove the last word.
+      decoder_input = tf.pad(
+          embeddings[:, :-1, :], [[0, 0], [1, 0], [0, 0]], name="input")
+      length = tf.reduce_sum(mask, 1, name="length")
+      decoder_output, _ = tf.nn.dynamic_rnn(
+          cell=cell,
+          inputs=decoder_input,
+          sequence_length=length,
+          initial_state=initial_state,
+          scope=scope)
+
+    # Stack batch vertically.
+    decoder_output = tf.reshape(decoder_output, [-1, self.config.encoder_dim])
+    targets = tf.reshape(targets, [-1])
+    weights = tf.to_float(tf.reshape(mask, [-1]))
+
+    # Logits.
+    with tf.variable_scope("logits", reuse=reuse_logits) as scope:
+      logits = tf.contrib.layers.fully_connected(
+          inputs=decoder_output,
+          num_outputs=self.config.vocab_size,
+          activation_fn=None,
+          weights_initializer=self.uniform_initializer,
+          scope=scope)
+
+    losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=targets, logits=logits)
+    batch_loss = tf.reduce_sum(losses * weights)
+    tf.losses.add_loss(batch_loss)
+
+    tf.summary.scalar("losses/" + name, batch_loss)
+
+    self.target_cross_entropy_losses.append(losses)
+    self.target_cross_entropy_loss_weights.append(weights)
+
+  def build_decoders(self):
+    """Builds the sentence decoders.
+
+    Inputs:
+      self.decode_pre_emb
+      self.decode_post_emb
+      self.decode_pre_ids
+      self.decode_post_ids
+      self.decode_pre_mask
+      self.decode_post_mask
+      self.thought_vectors
+
+    Outputs:
+      self.target_cross_entropy_losses
+      self.target_cross_entropy_loss_weights
+    """
+    if self.mode != "encode":
+      # Pre-sentence decoder.
+      self._build_decoder("decoder_pre", self.decode_pre_emb,
+                          self.decode_pre_ids, self.decode_pre_mask,
+                          self.thought_vectors, False)
+
+      # Post-sentence decoder. Logits weights are reused.
+      self._build_decoder("decoder_post", self.decode_post_emb,
+                          self.decode_post_ids, self.decode_post_mask,
+                          self.thought_vectors, True)
+
+  def build_loss(self):
+    """Builds the loss Tensor.
+
+    Outputs:
+      self.total_loss
+    """
+    if self.mode != "encode":
+      total_loss = tf.losses.get_total_loss()
+      tf.summary.scalar("losses/total", total_loss)
+
+      self.total_loss = total_loss
+
+  def build_global_step(self):
+    """Builds the global step Tensor.
+
+    Outputs:
+      self.global_step
+    """
+    self.global_step = tf.contrib.framework.create_global_step()
+
+  def build(self):
+    """Creates all ops for training, evaluation or encoding."""
+    self.build_inputs()
+    self.build_word_embeddings()
+    self.build_encoder()
+    self.build_decoders()
+    self.build_loss()
+    self.build_global_step()
--- a/skip_thoughts/skip_thoughts/skip_thoughts_model_test.py
+++ b/skip_thoughts/skip_thoughts/skip_thoughts_model_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tensorflow_models.skip_thoughts.skip_thoughts_model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import numpy as np
+import tensorflow as tf
+
+from skip_thoughts import configuration
+from skip_thoughts import skip_thoughts_model
+
+
+class SkipThoughtsModel(skip_thoughts_model.SkipThoughtsModel):
+  """Subclass of SkipThoughtsModel without the disk I/O."""
+
+  def build_inputs(self):
+    if self.mode == "encode":
+      # Encode mode doesn't read from disk, so defer to parent.
+      return super(SkipThoughtsModel, self).build_inputs()
+    else:
+      # Replace disk I/O with random Tensors.
+      self.encode_ids = tf.random_uniform(
+          [self.config.batch_size, 15],
+          minval=0,
+          maxval=self.config.vocab_size,
+          dtype=tf.int64)
+      self.decode_pre_ids = tf.random_uniform(
+          [self.config.batch_size, 15],
+          minval=0,
+          maxval=self.config.vocab_size,
+          dtype=tf.int64)
+      self.decode_post_ids = tf.random_uniform(
+          [self.config.batch_size, 15],
+          minval=0,
+          maxval=self.config.vocab_size,
+          dtype=tf.int64)
+      self.encode_mask = tf.ones_like(self.encode_ids)
+      self.decode_pre_mask = tf.ones_like(self.decode_pre_ids)
+      self.decode_post_mask = tf.ones_like(self.decode_post_ids)
+
+
+class SkipThoughtsModelTest(tf.test.TestCase):
+
+  def setUp(self):
+    super(SkipThoughtsModelTest, self).setUp()
+    self._model_config = configuration.model_config()
+
+  def _countModelParameters(self):
+    """Counts the number of parameters in the model at top level scope."""
+    counter = {}
+    for v in tf.global_variables():
+      name = v.op.name.split("/")[0]
+      num_params = v.get_shape().num_elements()
+      if not num_params:
+        self.fail("Could not infer num_elements from Variable %s" % v.op.name)
+      counter[name] = counter.get(name, 0) + num_params
+    return counter
+
+  def _checkModelParameters(self):
+    """Verifies the number of parameters in the model."""
+    param_counts = self._countModelParameters()
+    expected_param_counts = {
+        # vocab_size * embedding_size
+        "word_embedding": 12400000,
+        # GRU Cells
+        "encoder": 21772800,
+        "decoder_pre": 21772800,
+        "decoder_post": 21772800,
+        # (encoder_dim + 1) * vocab_size
+        "logits": 48020000,
+        "global_step": 1,
+    }
+    self.assertDictEqual(expected_param_counts, param_counts)
+
+  def _checkOutputs(self, expected_shapes, feed_dict=None):
+    """Verifies that the model produces expected outputs.
+
+    Args:
+      expected_shapes: A dict mapping Tensor or Tensor name to expected output
+        shape.
+      feed_dict: Values of Tensors to feed into Session.run().
+    """
+    fetches = expected_shapes.keys()
+
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      outputs = sess.run(fetches, feed_dict)
+
+    for index, output in enumerate(outputs):
+      tensor = fetches[index]
+      expected = expected_shapes[tensor]
+      actual = output.shape
+      if expected != actual:
+        self.fail("Tensor %s has shape %s (expected %s)." % (tensor, actual,
+                                                             expected))
+
+  def testBuildForTraining(self):
+    model = SkipThoughtsModel(self._model_config, mode="train")
+    model.build()
+
+    self._checkModelParameters()
+
+    expected_shapes = {
+        # [batch_size, length]
+        model.encode_ids: (128, 15),
+        model.decode_pre_ids: (128, 15),
+        model.decode_post_ids: (128, 15),
+        model.encode_mask: (128, 15),
+        model.decode_pre_mask: (128, 15),
+        model.decode_post_mask: (128, 15),
+        # [batch_size, length, word_embedding_dim]
+        model.encode_emb: (128, 15, 620),
+        model.decode_pre_emb: (128, 15, 620),
+        model.decode_post_emb: (128, 15, 620),
+        # [batch_size, encoder_dim]
+        model.thought_vectors: (128, 2400),
+        # [batch_size * length]
+        model.target_cross_entropy_losses[0]: (1920,),
+        model.target_cross_entropy_losses[1]: (1920,),
+        # [batch_size * length]
+        model.target_cross_entropy_loss_weights[0]: (1920,),
+        model.target_cross_entropy_loss_weights[1]: (1920,),
+        # Scalar
+        model.total_loss: (),
+    }
+    self._checkOutputs(expected_shapes)
+
+  def testBuildForEval(self):
+    model = SkipThoughtsModel(self._model_config, mode="eval")
+    model.build()
+
+    self._checkModelParameters()
+
+    expected_shapes = {
+        # [batch_size, length]
+        model.encode_ids: (128, 15),
+        model.decode_pre_ids: (128, 15),
+        model.decode_post_ids: (128, 15),
+        model.encode_mask: (128, 15),
+        model.decode_pre_mask: (128, 15),
+        model.decode_post_mask: (128, 15),
+        # [batch_size, length, word_embedding_dim]
+        model.encode_emb: (128, 15, 620),
+        model.decode_pre_emb: (128, 15, 620),
+        model.decode_post_emb: (128, 15, 620),
+        # [batch_size, encoder_dim]
+        model.thought_vectors: (128, 2400),
+        # [batch_size * length]
+        model.target_cross_entropy_losses[0]: (1920,),
+        model.target_cross_entropy_losses[1]: (1920,),
+        # [batch_size * length]
+        model.target_cross_entropy_loss_weights[0]: (1920,),
+        model.target_cross_entropy_loss_weights[1]: (1920,),
+        # Scalar
+        model.total_loss: (),
+    }
+    self._checkOutputs(expected_shapes)
+
+  def testBuildForEncode(self):
+    model = SkipThoughtsModel(self._model_config, mode="encode")
+    model.build()
+
+    # Test feeding a batch of word embeddings to get skip thought vectors.
+    encode_emb = np.random.rand(64, 15, 620)
+    encode_mask = np.ones((64, 15), dtype=np.int64)
+    feed_dict = {model.encode_emb: encode_emb, model.encode_mask: encode_mask}
+    expected_shapes = {
+        # [batch_size, encoder_dim]
+        model.thought_vectors: (64, 2400),
+    }
+    self._checkOutputs(expected_shapes, feed_dict)
+
+
+if __name__ == "__main__":
+  tf.test.main()
--- a/skip_thoughts/skip_thoughts/track_perplexity.py
+++ b/skip_thoughts/skip_thoughts/track_perplexity.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tracks training progress via per-word perplexity.
+
+This script should be run concurrently with training so that summaries show up
+in TensorBoard.
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import math
+import os.path
+import time
+
+
+import numpy as np
+import tensorflow as tf
+
+from skip_thoughts import configuration
+from skip_thoughts import skip_thoughts_model
+
+FLAGS = tf.flags.FLAGS
+
+tf.flags.DEFINE_string("input_file_pattern", None,
+                       "File pattern of sharded TFRecord input files.")
+tf.flags.DEFINE_string("checkpoint_dir", None,
+                       "Directory containing model checkpoints.")
+tf.flags.DEFINE_string("eval_dir", None, "Directory to write event logs to.")
+
+tf.flags.DEFINE_integer("eval_interval_secs", 600,
+                        "Interval between evaluation runs.")
+tf.flags.DEFINE_integer("num_eval_examples", 50000,
+                        "Number of examples for evaluation.")
+
+tf.flags.DEFINE_integer("min_global_step", 100,
+                        "Minimum global step to run evaluation.")
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+def evaluate_model(sess, losses, weights, num_batches, global_step,
+                   summary_writer, summary_op):
+  """Computes perplexity-per-word over the evaluation dataset.
+
+  Summaries and perplexity-per-word are written out to the eval directory.
+
+  Args:
+    sess: Session object.
+    losses: A Tensor of any shape; the target cross entropy losses for the
+      current batch.
+    weights: A Tensor of weights corresponding to losses.
+    num_batches: Integer; the number of evaluation batches.
+    global_step: Integer; global step of the model checkpoint.
+    summary_writer: Instance of SummaryWriter.
+    summary_op: Op for generating model summaries.
+  """
+  # Log model summaries on a single batch.
+  summary_str = sess.run(summary_op)
+  summary_writer.add_summary(summary_str, global_step)
+
+  start_time = time.time()
+  sum_losses = 0.0
+  sum_weights = 0.0
+  for i in xrange(num_batches):
+    batch_losses, batch_weights = sess.run([losses, weights])
+    sum_losses += np.sum(batch_losses * batch_weights)
+    sum_weights += np.sum(batch_weights)
+    if not i % 100:
+      tf.logging.info("Computed losses for %d of %d batches.", i + 1,
+                      num_batches)
+  eval_time = time.time() - start_time
+
+  perplexity = math.exp(sum_losses / sum_weights)
+  tf.logging.info("Perplexity = %f (%.2f sec)", perplexity, eval_time)
+
+  # Log perplexity to the SummaryWriter.
+  summary = tf.Summary()
+  value = summary.value.add()
+  value.simple_value = perplexity
+  value.tag = "perplexity"
+  summary_writer.add_summary(summary, global_step)
+
+  # Write the Events file to the eval directory.
+  summary_writer.flush()
+  tf.logging.info("Finished processing evaluation at global step %d.",
+                  global_step)
+
+
+def run_once(model, losses, weights, saver, summary_writer, summary_op):
+  """Evaluates the latest model checkpoint.
+
+  Args:
+    model: Instance of SkipThoughtsModel; the model to evaluate.
+    losses: Tensor; the target cross entropy losses for the current batch.
+    weights: A Tensor of weights corresponding to losses.
+    saver: Instance of tf.train.Saver for restoring model Variables.
+    summary_writer: Instance of FileWriter.
+    summary_op: Op for generating model summaries.
+  """
+  model_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
+  if not model_path:
+    tf.logging.info("Skipping evaluation. No checkpoint found in: %s",
+                    FLAGS.checkpoint_dir)
+    return
+
+  with tf.Session() as sess:
+    # Load model from checkpoint.
+    tf.logging.info("Loading model from checkpoint: %s", model_path)
+    saver.restore(sess, model_path)
+    global_step = tf.train.global_step(sess, model.global_step.name)
+    tf.logging.info("Successfully loaded %s at global step = %d.",
+                    os.path.basename(model_path), global_step)
+    if global_step < FLAGS.min_global_step:
+      tf.logging.info("Skipping evaluation. Global step = %d < %d", global_step,
+                      FLAGS.min_global_step)
+      return
+
+    # Start the queue runners.
+    coord = tf.train.Coordinator()
+    threads = tf.train.start_queue_runners(coord=coord)
+
+    num_eval_batches = int(
+        math.ceil(FLAGS.num_eval_examples / model.config.batch_size))
+
+    # Run evaluation on the latest checkpoint.
+    try:
+      evaluate_model(sess, losses, weights, num_eval_batches, global_step,
+                     summary_writer, summary_op)
+    except tf.InvalidArgumentError:
+      tf.logging.error(
+          "Evaluation raised InvalidArgumentError (e.g. due to Nans).")
+    finally:
+      coord.request_stop()
+      coord.join(threads, stop_grace_period_secs=10)
+
+
+def main(unused_argv):
+  if not FLAGS.input_file_pattern:
+    raise ValueError("--input_file_pattern is required.")
+  if not FLAGS.checkpoint_dir:
+    raise ValueError("--checkpoint_dir is required.")
+  if not FLAGS.eval_dir:
+    raise ValueError("--eval_dir is required.")
+
+  # Create the evaluation directory if it doesn't exist.
+  eval_dir = FLAGS.eval_dir
+  if not tf.gfile.IsDirectory(eval_dir):
+    tf.logging.info("Creating eval directory: %s", eval_dir)
+    tf.gfile.MakeDirs(eval_dir)
+
+  g = tf.Graph()
+  with g.as_default():
+    # Build the model for evaluation.
+    model_config = configuration.model_config(
+        input_file_pattern=FLAGS.input_file_pattern,
+        input_queue_capacity=FLAGS.num_eval_examples,
+        shuffle_input_data=False)
+    model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="eval")
+    model.build()
+
+    losses = tf.concat(model.target_cross_entropy_losses, 0)
+    weights = tf.concat(model.target_cross_entropy_loss_weights, 0)
+
+    # Create the Saver to restore model Variables.
+    saver = tf.train.Saver()
+
+    # Create the summary operation and the summary writer.
+    summary_op = tf.summary.merge_all()
+    summary_writer = tf.summary.FileWriter(eval_dir)
+
+    g.finalize()
+
+    # Run a new evaluation run every eval_interval_secs.
+    while True:
+      start = time.time()
+      tf.logging.info("Starting evaluation at " + time.strftime(
+          "%Y-%m-%d-%H:%M:%S", time.localtime()))
+      run_once(model, losses, weights, saver, summary_writer, summary_op)
+      time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
+      if time_to_next_eval > 0:
+        time.sleep(time_to_next_eval)
+
+
+if __name__ == "__main__":
+  tf.app.run()
--- a/skip_thoughts/skip_thoughts/train.py
+++ b/skip_thoughts/skip_thoughts/train.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Train the skip-thoughts model."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import tensorflow as tf
+
+from skip_thoughts import configuration
+from skip_thoughts import skip_thoughts_model
+
+FLAGS = tf.flags.FLAGS
+
+tf.flags.DEFINE_string("input_file_pattern", None,
+                       "File pattern of sharded TFRecord files containing "
+                       "tf.Example protos.")
+tf.flags.DEFINE_string("train_dir", None,
+                       "Directory for saving and loading checkpoints.")
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+def _setup_learning_rate(config, global_step):
+  """Sets up the learning rate with optional exponential decay.
+
+  Args:
+    config: Object containing learning rate configuration parameters.
+    global_step: Tensor; the global step.
+
+  Returns:
+    learning_rate: Tensor; the learning rate with exponential decay.
+  """
+  if config.learning_rate_decay_factor > 0:
+    learning_rate = tf.train.exponential_decay(
+        learning_rate=float(config.learning_rate),
+        global_step=global_step,
+        decay_steps=config.learning_rate_decay_steps,
+        decay_rate=config.learning_rate_decay_factor,
+        staircase=False)
+  else:
+    learning_rate = tf.constant(config.learning_rate)
+  return learning_rate
+
+
+def main(unused_argv):
+  if not FLAGS.input_file_pattern:
+    raise ValueError("--input_file_pattern is required.")
+  if not FLAGS.train_dir:
+    raise ValueError("--train_dir is required.")
+
+  model_config = configuration.model_config(
+      input_file_pattern=FLAGS.input_file_pattern)
+  training_config = configuration.training_config()
+
+  tf.logging.info("Building training graph.")
+  g = tf.Graph()
+  with g.as_default():
+    model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="train")
+    model.build()
+
+    learning_rate = _setup_learning_rate(training_config, model.global_step)
+    optimizer = tf.train.AdamOptimizer(learning_rate)
+
+    train_tensor = tf.contrib.slim.learning.create_train_op(
+        total_loss=model.total_loss,
+        optimizer=optimizer,
+        global_step=model.global_step,
+        clip_gradient_norm=training_config.clip_gradient_norm)
+
+    saver = tf.train.Saver()
+
+  tf.contrib.slim.learning.train(
+      train_op=train_tensor,
+      logdir=FLAGS.train_dir,
+      graph=g,
+      global_step=model.global_step,
+      number_of_steps=training_config.number_of_steps,
+      save_summaries_secs=training_config.save_summaries_secs,
+      saver=saver,
+      save_interval_secs=training_config.save_model_secs)
+
+
+if __name__ == "__main__":
+  tf.app.run()
--- a/skip_thoughts/skip_thoughts/vocabulary_expansion.py
+++ b/skip_thoughts/skip_thoughts/vocabulary_expansion.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Compute an expanded vocabulary of embeddings using a word2vec model.
+
+This script loads the word embeddings from a trained skip-thoughts model and
+from a trained word2vec model (typically with a larger vocabulary). It trains a
+linear regression model without regularization to learn a linear mapping from
+the word2vec embedding space to the skip-thoughts embedding space. The model is
+then applied to all words in the word2vec vocabulary, yielding vectors in the
+skip-thoughts word embedding space for the union of the two vocabularies.
+
+The linear regression task is to learn a parameter matrix W to minimize
+  || X - Y * W ||^2,
+where X is a matrix of skip-thoughts embeddings of shape [num_words, dim1],
+Y is a matrix of word2vec embeddings of shape [num_words, dim2], and W is a
+matrix of shape [dim2, dim1].
+
+This is based on the "Translation Matrix" method from the paper:
+
+  "Exploiting Similarities among Languages for Machine Translation"
+  Tomas Mikolov, Quoc V. Le, Ilya Sutskever
+  https://arxiv.org/abs/1309.4168
+"""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import os.path
+
+
+import gensim.models
+import numpy as np
+import sklearn.linear_model
+import tensorflow as tf
+
+FLAGS = tf.flags.FLAGS
+
+tf.flags.DEFINE_string("skip_thoughts_model", None,
+                       "Checkpoint file or directory containing a checkpoint "
+                       "file.")
+
+tf.flags.DEFINE_string("skip_thoughts_vocab", None,
+                       "Path to vocabulary file containing a list of newline-"
+                       "separated words where the word id is the "
+                       "corresponding 0-based index in the file.")
+
+tf.flags.DEFINE_string("word2vec_model", None,
+                       "File containing a word2vec model in binary format.")
+
+tf.flags.DEFINE_string("output_dir", None, "Output directory.")
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+def _load_skip_thoughts_embeddings(checkpoint_path):
+  """Loads the embedding matrix from a skip-thoughts model checkpoint.
+
+  Args:
+    checkpoint_path: Model checkpoint file or directory containing a checkpoint
+        file.
+
+  Returns:
+    word_embedding: A numpy array of shape [vocab_size, embedding_dim].
+
+  Raises:
+    ValueError: If no checkpoint file matches checkpoint_path.
+  """
+  if tf.gfile.IsDirectory(checkpoint_path):
+    checkpoint_file = tf.train.latest_checkpoint(checkpoint_path)
+    if not checkpoint_file:
+      raise ValueError("No checkpoint file found in %s" % checkpoint_path)
+  else:
+    checkpoint_file = checkpoint_path
+
+  tf.logging.info("Loading skip-thoughts embedding matrix from %s",
+                  checkpoint_file)
+  reader = tf.train.NewCheckpointReader(checkpoint_file)
+  word_embedding = reader.get_tensor("word_embedding")
+  tf.logging.info("Loaded skip-thoughts embedding matrix of shape %s",
+                  word_embedding.shape)
+
+  return word_embedding
+
+
+def _load_vocabulary(filename):
+  """Loads a vocabulary file.
+
+  Args:
+    filename: Path to text file containing newline-separated words.
+
+  Returns:
+    vocab: A dictionary mapping word to word id.
+  """
+  tf.logging.info("Reading vocabulary from %s", filename)
+  vocab = collections.OrderedDict()
+  with tf.gfile.GFile(filename, mode="r") as f:
+    for i, line in enumerate(f):
+      word = line.decode("utf-8").strip()
+      assert word not in vocab, "Attempting to add word twice: %s" % word
+      vocab[word] = i
+  tf.logging.info("Read vocabulary of size %d", len(vocab))
+  return vocab
+
+
+def _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab, word2vec):
+  """Runs vocabulary expansion on a skip-thoughts model using a word2vec model.
+
+  Args:
+    skip_thoughts_emb: A numpy array of shape [skip_thoughts_vocab_size,
+        skip_thoughts_embedding_dim].
+    skip_thoughts_vocab: A dictionary of word to id.
+    word2vec: An instance of gensim.models.Word2Vec.
+
+  Returns:
+    combined_emb: A dictionary mapping words to embedding vectors.
+  """
+  # Find words shared between the two vocabularies.
+  tf.logging.info("Finding shared words")
+  shared_words = [w for w in word2vec.vocab if w in skip_thoughts_vocab]
+
+  # Select embedding vectors for shared words.
+  tf.logging.info("Selecting embeddings for %d shared words", len(shared_words))
+  shared_st_emb = skip_thoughts_emb[[
+      skip_thoughts_vocab[w] for w in shared_words
+  ]]
+  shared_w2v_emb = word2vec[shared_words]
+
+  # Train a linear regression model on the shared embedding vectors.
+  tf.logging.info("Training linear regression model")
+  model = sklearn.linear_model.LinearRegression()
+  model.fit(shared_w2v_emb, shared_st_emb)
+
+  # Create the expanded vocabulary.
+  tf.logging.info("Creating embeddings for expanded vocabuary")
+  combined_emb = collections.OrderedDict()
+  for w in word2vec.vocab:
+    # Ignore words with underscores (spaces).
+    if "_" not in w:
+      w_emb = model.predict(word2vec[w].reshape(1, -1))
+      combined_emb[w] = w_emb.reshape(-1)
+
+  for w in skip_thoughts_vocab:
+    combined_emb[w] = skip_thoughts_emb[skip_thoughts_vocab[w]]
+
+  tf.logging.info("Created expanded vocabulary of %d words", len(combined_emb))
+
+  return combined_emb
+
+
+def main(unused_argv):
+  if not FLAGS.skip_thoughts_model:
+    raise ValueError("--skip_thoughts_model is required.")
+  if not FLAGS.skip_thoughts_vocab:
+    raise ValueError("--skip_thoughts_vocab is required.")
+  if not FLAGS.word2vec_model:
+    raise ValueError("--word2vec_model is required.")
+  if not FLAGS.output_dir:
+    raise ValueError("--output_dir is required.")
+
+  if not tf.gfile.IsDirectory(FLAGS.output_dir):
+    tf.gfile.MakeDirs(FLAGS.output_dir)
+
+  # Load the skip-thoughts embeddings and vocabulary.
+  skip_thoughts_emb = _load_skip_thoughts_embeddings(FLAGS.skip_thoughts_model)
+  skip_thoughts_vocab = _load_vocabulary(FLAGS.skip_thoughts_vocab)
+
+  # Load the Word2Vec model.
+  word2vec = gensim.models.Word2Vec.load_word2vec_format(
+      FLAGS.word2vec_model, binary=True)
+
+  # Run vocabulary expansion.
+  embedding_map = _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab,
+                                     word2vec)
+
+  # Save the output.
+  vocab = embedding_map.keys()
+  vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt")
+  with tf.gfile.GFile(vocab_file, "w") as f:
+    f.write("\n".join(vocab))
+  tf.logging.info("Wrote vocabulary file to %s", vocab_file)
+
+  embeddings = np.array(embedding_map.values())
+  embeddings_file = os.path.join(FLAGS.output_dir, "embeddings.npy")
+  np.save(embeddings_file, embeddings)
+  tf.logging.info("Wrote embeddings file to %s", embeddings_file)
+
+
+if __name__ == "__main__":
+  tf.app.run()
--- a/slim/BUILD
+++ b/slim/BUILD
 # Description:
 #   Contains files for loading, training and evaluating TF-Slim-based models.

-package(default_visibility = [":internal"])
+package(default_visibility = [
+    ":internal",
+    "//domain_adaptation:__subpackages__",
+])

 licenses(["notice"])  # Apache 2.0


--- a/slim/README.md
+++ b/slim/README.md
@@ -13,7 +13,7 @@ converting them
 to TensorFlow's native TFRecord format and reading them in using TF-Slim's
 data reading and queueing utilities. You can easily train any model on any of
 these datasets, as we demonstrate below. We've also included a
-[jupyter notebook](https://github.com/tensorflow/models/blob/master/slim/slim_walkthough.ipynb),
+[jupyter notebook](https://github.com/tensorflow/models/blob/master/slim/slim_walkthrough.ipynb),
 which provides working examples of how to use TF-Slim for image classification.

 ## Contacts
@@ -41,23 +41,9 @@ prerequisite packages.

 ## Installing latest version of TF-slim

-As of 8/28/16, the latest [stable release of TF](https://www.tensorflow.org/versions/r0.10/get_started/os_setup.html#pip-installation)
-is r0.10, which contains most of TF-Slim but not some later additions. To obtain the
-latest version, you must install the most recent nightly build of
-TensorFlow. You can find the latest nightly binaries at
-[TensorFlow Installation](https://github.com/tensorflow/tensorflow#installation)
-in the section that reads "People who are a little more adventurous can
-also try our nightly binaries". Copy the link address that corresponds to
-the appropriate machine architecture and python version, and pip install
-it. For example:
-
-```shell
-export TF_BINARY_URL=https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
-sudo pip install --upgrade $TF_BINARY_URL
-```
-
-To test this has worked, execute the following command; it should run
-without raising any errors.
+TF-Slim is available as `tf.contrib.slim` via TensorFlow 1.0. To test that your
+installation is working, execute the following command; it should run without
+raising any errors.

 ```
 python -c "import tensorflow.contrib.slim as slim; eval = slim.evaluation.evaluate_once"
@@ -140,7 +126,7 @@ You can use the same script to create the mnist and cifar10 datasets.
 However, for ImageNet, you have to follow the instructions
 [here](https://github.com/tensorflow/models/blob/master/inception/README.md#getting-started).
 Note that you first have to sign up for an account at image-net.org.
-Also, the download can take several hours, and uses about 500MB.
+Also, the download can take several hours, and could use up to 500GB.


 ## Creating a TF-Slim Dataset Descriptor.
@@ -192,12 +178,12 @@ image classification dataset.
 In the table below, we list each model, the corresponding
 TensorFlow model file, the link to the model checkpoint, and the top 1 and top 5
 accuracy (on the imagenet test set).
-Note that the VGG and ResNet parameters have been converted from their original
+Note that the VGG and ResNet V1 parameters have been converted from their original
 caffe formats
 ([here](https://github.com/BVLC/caffe/wiki/Model-Zoo#models-used-by-the-vgg-team-in-ilsvrc-2014)
 and
 [here](https://github.com/KaimingHe/deep-residual-networks)),
-whereas the Inception parameters have been trained internally at
+whereas the Inception and ResNet V2 parameters have been trained internally at
 Google. Also be aware that these accuracies were computed by evaluating using a
 single image crop. Some academic papers report higher accuracy by using multiple
 crops at multiple scales.
@@ -209,12 +195,19 @@ Model | TF-Slim File | Checkpoint | Top-1 Accuracy| Top-5 Accuracy |
 [Inception V3](http://arxiv.org/abs/1512.00567)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_v3.py)|[inception_v3_2016_08_28.tar.gz](http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz)|78.0|93.9|
 [Inception V4](http://arxiv.org/abs/1602.07261)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_v4.py)|[inception_v4_2016_09_09.tar.gz](http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz)|80.2|95.2|
 [Inception-ResNet-v2](http://arxiv.org/abs/1602.07261)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_resnet_v2.py)|[inception_resnet_v2.tar.gz](http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz)|80.4|95.3|
-[ResNet 50](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_50.tar.gz](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)|75.2|92.2|
-[ResNet 101](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_101.tar.gz](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz)|76.4|92.9|
-[ResNet 152](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_152.tar.gz](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz)|76.8|93.2|
+[ResNet V1 50](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_50.tar.gz](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)|75.2|92.2|
+[ResNet V1 101](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_101.tar.gz](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz)|76.4|92.9|
+[ResNet V1 152](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_152.tar.gz](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz)|76.8|93.2|
+[ResNet V2 50](https://arxiv.org/abs/1603.05027)^|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v2.py)|[resnet_v2_50.tar.gz](http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz)|75.6|92.8|
+[ResNet V2 101](https://arxiv.org/abs/1603.05027)^|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v2.py)|[resnet_v2_101.tar.gz](http://download.tensorflow.org/models/resnet_v2_101_2017_04_14.tar.gz)|77.0|93.7|
+[ResNet V2 152](https://arxiv.org/abs/1603.05027)^|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v2.py)|[resnet_v2_152.tar.gz](http://download.tensorflow.org/models/resnet_v2_152_2017_04_14.tar.gz)|77.8|94.1|
 [VGG 16](http://arxiv.org/abs/1409.1556.pdf)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/vgg.py)|[vgg_16.tar.gz](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz)|71.5|89.8|
 [VGG 19](http://arxiv.org/abs/1409.1556.pdf)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/vgg.py)|[vgg_19.tar.gz](http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz)|71.1|89.8|

+^ ResNet V2 models use Inception pre-processing and input image size of 299 (use
+`--preprocessing_name inception --eval_image_size 299` when using
+`eval_image_classifier.py`). Performance numbers for ResNet V2 models are
+reported on ImageNet valdiation set. 

 Here is an example of how to download the Inception V3 checkpoint:

@@ -303,8 +296,8 @@ $ python train_image_classifier.py \
    --dataset_split_name=train \
    --model_name=inception_v3 \
    --checkpoint_path=${CHECKPOINT_PATH} \
-    --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits/Logits \
-    --trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits/Logits
+    --checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \
+    --trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits
 ```


@@ -358,10 +351,10 @@ following error:
 ```bash
 InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [1001] rhs shape= [1000]
 ```
-This is due to the fact that the VGG and ResNet final layers have only 1000
+This is due to the fact that the VGG and ResNet V1 final layers have only 1000
 outputs rather than 1001.

-To fix this issue, you can set the `--labels_offsets=1` flag. This results in
+To fix this issue, you can set the `--labels_offset=1` flag. This results in
 the ImageNet labels being shifted down by one:



--- a/slim/WORKSPACE
+++ b/slim/WORKSPACE
--- a/slim/datasets/cifar10.py
+++ b/slim/datasets/cifar10.py
@@ -15,7 +15,7 @@
 """Provides data for the Cifar10 dataset.

 The dataset scripts used to create the dataset can be found at:
-tensorflow/models/slim/data/create_cifar10_dataset.py
+tensorflow/models/slim/datasets/download_and_convert_cifar10.py
 """

 from __future__ import absolute_import