"examples/backends/vllm/launch/xpu/agg_multimodal_xpu.sh" did not exist on "75bf1e09930681c76586f281ccff6159a0e50449"
Commit 68a18b70 authored by Toby Boyd's avatar Toby Boyd Committed by GitHub
Browse files

Merge pull request #1 from tensorflow/master

update to tensorflow/model master
parents bc70271a 2c4fea8d
package(default_visibility = ["//skip_thoughts:internal"])
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
py_library(
name = "special_words",
srcs = ["special_words.py"],
srcs_version = "PY2AND3",
deps = [],
)
py_binary(
name = "preprocess_dataset",
srcs = [
"preprocess_dataset.py",
],
srcs_version = "PY2AND3",
deps = [
":special_words",
],
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Converts a set of text files to TFRecord format with Example protos.
Each Example proto in the output contains the following fields:
decode_pre: list of int64 ids corresponding to the "previous" sentence.
encode: list of int64 ids corresponding to the "current" sentence.
decode_post: list of int64 ids corresponding to the "post" sentence.
In addition, the following files are generated:
vocab.txt: List of "<word> <id>" pairs, where <id> is the integer
encoding of <word> in the Example protos.
word_counts.txt: List of "<word> <count>" pairs, where <count> is the number
of occurrences of <word> in the input files.
The vocabulary of word ids is constructed from the top --num_words by word
count. All other words get the <unk> word id.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os
import numpy as np
import tensorflow as tf
from skip_thoughts.data import special_words
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("input_files", None,
"Comma-separated list of globs matching the input "
"files. The format of the input files is assumed to be "
"a list of newline-separated sentences, where each "
"sentence is already tokenized.")
tf.flags.DEFINE_string("vocab_file", "",
"(Optional) existing vocab file. Otherwise, a new vocab "
"file is created and written to the output directory. "
"The file format is a list of newline-separated words, "
"where the word id is the corresponding 0-based index "
"in the file.")
tf.flags.DEFINE_string("output_dir", None, "Output directory.")
tf.flags.DEFINE_integer("train_output_shards", 100,
"Number of output shards for the training set.")
tf.flags.DEFINE_integer("validation_output_shards", 1,
"Number of output shards for the validation set.")
tf.flags.DEFINE_integer("num_validation_sentences", 50000,
"Number of output shards for the validation set.")
tf.flags.DEFINE_integer("num_words", 20000,
"Number of words to include in the output.")
tf.flags.DEFINE_integer("max_sentences", 0,
"If > 0, the maximum number of sentences to output.")
tf.flags.DEFINE_integer("max_sentence_length", 30,
"If > 0, exclude sentences whose encode, decode_pre OR"
"decode_post sentence exceeds this length.")
tf.flags.DEFINE_boolean("add_eos", True,
"Whether to add end-of-sentence ids to the output.")
tf.logging.set_verbosity(tf.logging.INFO)
def _build_vocabulary(input_files):
"""Loads or builds the model vocabulary.
Args:
input_files: List of pre-tokenized input .txt files.
Returns:
vocab: A dictionary of word to id.
"""
if FLAGS.vocab_file:
tf.logging.info("Loading existing vocab file.")
vocab = collections.OrderedDict()
with tf.gfile.GFile(FLAGS.vocab_file, mode="r") as f:
for i, line in enumerate(f):
word = line.decode("utf-8").strip()
assert word not in vocab, "Attempting to add word twice: %s" % word
vocab[word] = i
tf.logging.info("Read vocab of size %d from %s",
len(vocab), FLAGS.vocab_file)
return vocab
tf.logging.info("Creating vocabulary.")
num = 0
wordcount = collections.Counter()
for input_file in input_files:
tf.logging.info("Processing file: %s", input_file)
for sentence in tf.gfile.FastGFile(input_file):
wordcount.update(sentence.split())
num += 1
if num % 1000000 == 0:
tf.logging.info("Processed %d sentences", num)
tf.logging.info("Processed %d sentences total", num)
words = wordcount.keys()
freqs = wordcount.values()
sorted_indices = np.argsort(freqs)[::-1]
vocab = collections.OrderedDict()
vocab[special_words.EOS] = special_words.EOS_ID
vocab[special_words.UNK] = special_words.UNK_ID
for w_id, w_index in enumerate(sorted_indices[0:FLAGS.num_words - 2]):
vocab[words[w_index]] = w_id + 2 # 0: EOS, 1: UNK.
tf.logging.info("Created vocab with %d words", len(vocab))
vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt")
with tf.gfile.FastGFile(vocab_file, "w") as f:
f.write("\n".join(vocab.keys()))
tf.logging.info("Wrote vocab file to %s", vocab_file)
word_counts_file = os.path.join(FLAGS.output_dir, "word_counts.txt")
with tf.gfile.FastGFile(word_counts_file, "w") as f:
for i in sorted_indices:
f.write("%s %d\n" % (words[i], freqs[i]))
tf.logging.info("Wrote word counts file to %s", word_counts_file)
return vocab
def _int64_feature(value):
"""Helper for creating an Int64 Feature."""
return tf.train.Feature(int64_list=tf.train.Int64List(
value=[int(v) for v in value]))
def _sentence_to_ids(sentence, vocab):
"""Helper for converting a sentence (list of words) to a list of ids."""
ids = [vocab.get(w, special_words.UNK_ID) for w in sentence]
if FLAGS.add_eos:
ids.append(special_words.EOS_ID)
return ids
def _create_serialized_example(predecessor, current, successor, vocab):
"""Helper for creating a serialized Example proto."""
example = tf.train.Example(features=tf.train.Features(feature={
"decode_pre": _int64_feature(_sentence_to_ids(predecessor, vocab)),
"encode": _int64_feature(_sentence_to_ids(current, vocab)),
"decode_post": _int64_feature(_sentence_to_ids(successor, vocab)),
}))
return example.SerializeToString()
def _process_input_file(filename, vocab, stats):
"""Processes the sentences in an input file.
Args:
filename: Path to a pre-tokenized input .txt file.
vocab: A dictionary of word to id.
stats: A Counter object for statistics.
Returns:
processed: A list of serialized Example protos
"""
tf.logging.info("Processing input file: %s", filename)
processed = []
predecessor = None # Predecessor sentence (list of words).
current = None # Current sentence (list of words).
successor = None # Successor sentence (list of words).
for successor_str in tf.gfile.FastGFile(filename):
stats.update(["sentences_seen"])
successor = successor_str.split()
# The first 2 sentences per file will be skipped.
if predecessor and current and successor:
stats.update(["sentences_considered"])
# Note that we are going to insert <EOS> later, so we only allow
# sentences with strictly less than max_sentence_length to pass.
if FLAGS.max_sentence_length and (
len(predecessor) >= FLAGS.max_sentence_length or len(current) >=
FLAGS.max_sentence_length or len(successor) >=
FLAGS.max_sentence_length):
stats.update(["sentences_too_long"])
else:
serialized = _create_serialized_example(predecessor, current, successor,
vocab)
processed.append(serialized)
stats.update(["sentences_output"])
predecessor = current
current = successor
sentences_seen = stats["sentences_seen"]
sentences_output = stats["sentences_output"]
if sentences_seen and sentences_seen % 100000 == 0:
tf.logging.info("Processed %d sentences (%d output)", sentences_seen,
sentences_output)
if FLAGS.max_sentences and sentences_output >= FLAGS.max_sentences:
break
tf.logging.info("Completed processing file %s", filename)
return processed
def _write_shard(filename, dataset, indices):
"""Writes a TFRecord shard."""
with tf.python_io.TFRecordWriter(filename) as writer:
for j in indices:
writer.write(dataset[j])
def _write_dataset(name, dataset, indices, num_shards):
"""Writes a sharded TFRecord dataset.
Args:
name: Name of the dataset (e.g. "train").
dataset: List of serialized Example protos.
indices: List of indices of 'dataset' to be written.
num_shards: The number of output shards.
"""
tf.logging.info("Writing dataset %s", name)
borders = np.int32(np.linspace(0, len(indices), num_shards + 1))
for i in range(num_shards):
filename = os.path.join(FLAGS.output_dir, "%s-%.5d-of-%.5d" % (name, i,
num_shards))
shard_indices = indices[borders[i]:borders[i + 1]]
_write_shard(filename, dataset, shard_indices)
tf.logging.info("Wrote dataset indices [%d, %d) to output shard %s",
borders[i], borders[i + 1], filename)
tf.logging.info("Finished writing %d sentences in dataset %s.",
len(indices), name)
def main(unused_argv):
if not FLAGS.input_files:
raise ValueError("--input_files is required.")
if not FLAGS.output_dir:
raise ValueError("--output_dir is required.")
if not tf.gfile.IsDirectory(FLAGS.output_dir):
tf.gfile.MakeDirs(FLAGS.output_dir)
input_files = []
for pattern in FLAGS.input_files.split(","):
match = tf.gfile.Glob(FLAGS.input_files)
if not match:
raise ValueError("Found no files matching %s" % pattern)
input_files.extend(match)
tf.logging.info("Found %d input files.", len(input_files))
vocab = _build_vocabulary(input_files)
tf.logging.info("Generating dataset.")
stats = collections.Counter()
dataset = []
for filename in input_files:
dataset.extend(_process_input_file(filename, vocab, stats))
if FLAGS.max_sentences and stats["sentences_output"] >= FLAGS.max_sentences:
break
tf.logging.info("Generated dataset with %d sentences.", len(dataset))
for k, v in stats.items():
tf.logging.info("%s: %d", k, v)
tf.logging.info("Shuffling dataset.")
np.random.seed(123)
shuffled_indices = np.random.permutation(len(dataset))
val_indices = shuffled_indices[:FLAGS.num_validation_sentences]
train_indices = shuffled_indices[FLAGS.num_validation_sentences:]
_write_dataset("train", dataset, train_indices, FLAGS.train_output_shards)
_write_dataset("validation", dataset, val_indices,
FLAGS.validation_output_shards)
if __name__ == "__main__":
tf.app.run()
# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
......@@ -12,11 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Import seq2seq python ops for backward compatibility."""
"""Special word constants.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
NOTE: The ids of the EOS and UNK constants should not be modified. It is assumed
that these always occupy the first two ids.
"""
raise ImportError(
"This module is deprecated. Use tf.contrib.legacy_seq2seq instead.")
# End of sentence.
EOS = "<eos>"
EOS_ID = 0
# Unknown.
UNK = "<unk>"
UNK_ID = 1
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Manager class for loading and encoding with multiple skip-thoughts models.
If multiple models are loaded at once then the encode() function returns the
concatenation of the outputs of each model.
Example usage:
manager = EncoderManager()
manager.load_model(model_config_1, vocabulary_file_1, embedding_matrix_file_1,
checkpoint_path_1)
manager.load_model(model_config_2, vocabulary_file_2, embedding_matrix_file_2,
checkpoint_path_2)
encodings = manager.encode(data)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import tensorflow as tf
from skip_thoughts import skip_thoughts_encoder
class EncoderManager(object):
"""Manager class for loading and encoding with skip-thoughts models."""
def __init__(self):
self.encoders = []
self.sessions = []
def load_model(self, model_config, vocabulary_file, embedding_matrix_file,
checkpoint_path):
"""Loads a skip-thoughts model.
Args:
model_config: Object containing parameters for building the model.
vocabulary_file: Path to vocabulary file containing a list of newline-
separated words where the word id is the corresponding 0-based index in
the file.
embedding_matrix_file: Path to a serialized numpy array of shape
[vocab_size, embedding_dim].
checkpoint_path: SkipThoughtsModel checkpoint file or a directory
containing a checkpoint file.
"""
tf.logging.info("Reading vocabulary from %s", vocabulary_file)
with tf.gfile.GFile(vocabulary_file, mode="r") as f:
lines = list(f.readlines())
reverse_vocab = [line.decode("utf-8").strip() for line in lines]
tf.logging.info("Loaded vocabulary with %d words.", len(reverse_vocab))
tf.logging.info("Loading embedding matrix from %s", embedding_matrix_file)
# Note: tf.gfile.GFile doesn't work here because np.load() calls f.seek()
# with 3 arguments.
with open(embedding_matrix_file, "r") as f:
embedding_matrix = np.load(f)
tf.logging.info("Loaded embedding matrix with shape %s",
embedding_matrix.shape)
word_embeddings = collections.OrderedDict(
zip(reverse_vocab, embedding_matrix))
g = tf.Graph()
with g.as_default():
encoder = skip_thoughts_encoder.SkipThoughtsEncoder(word_embeddings)
restore_model = encoder.build_graph_from_config(model_config,
checkpoint_path)
sess = tf.Session(graph=g)
restore_model(sess)
self.encoders.append(encoder)
self.sessions.append(sess)
def encode(self,
data,
use_norm=True,
verbose=False,
batch_size=128,
use_eos=False):
"""Encodes a sequence of sentences as skip-thought vectors.
Args:
data: A list of input strings.
use_norm: If True, normalize output skip-thought vectors to unit L2 norm.
verbose: Whether to log every batch.
batch_size: Batch size for the RNN encoders.
use_eos: If True, append the end-of-sentence word to each input sentence.
Returns:
thought_vectors: A list of numpy arrays corresponding to 'data'.
Raises:
ValueError: If called before calling load_encoder.
"""
if not self.encoders:
raise ValueError(
"Must call load_model at least once before calling encode.")
encoded = []
for encoder, sess in zip(self.encoders, self.sessions):
encoded.append(
np.array(
encoder.encode(
sess,
data,
use_norm=use_norm,
verbose=verbose,
batch_size=batch_size,
use_eos=use_eos)))
return np.concatenate(encoded, axis=1)
def close(self):
"""Closes the active TensorFlow Sessions."""
for sess in self.sessions:
sess.close()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to evaluate a skip-thoughts model.
This script can evaluate a model with a unidirectional encoder ("uni-skip" in
the paper); or a model with a bidirectional encoder ("bi-skip"); or the
combination of a model with a unidirectional encoder and a model with a
bidirectional encoder ("combine-skip").
The uni-skip model (if it exists) is specified by the flags
--uni_vocab_file, --uni_embeddings_file, --uni_checkpoint_path.
The bi-skip model (if it exists) is specified by the flags
--bi_vocab_file, --bi_embeddings_path, --bi_checkpoint_path.
The evaluation tasks have different running times. SICK may take 5-10 minutes.
MSRP, TREC and CR may take 20-60 minutes. SUBJ, MPQA and MR may take 2+ hours.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from skipthoughts import eval_classification
from skipthoughts import eval_msrp
from skipthoughts import eval_sick
from skipthoughts import eval_trec
import tensorflow as tf
from skip_thoughts import configuration
from skip_thoughts import encoder_manager
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("eval_task", "CR",
"Name of the evaluation task to run. Available tasks: "
"MR, CR, SUBJ, MPQA, SICK, MSRP, TREC.")
tf.flags.DEFINE_string("data_dir", None, "Directory containing training data.")
tf.flags.DEFINE_string("uni_vocab_file", None,
"Path to vocabulary file containing a list of newline-"
"separated words where the word id is the "
"corresponding 0-based index in the file.")
tf.flags.DEFINE_string("bi_vocab_file", None,
"Path to vocabulary file containing a list of newline-"
"separated words where the word id is the "
"corresponding 0-based index in the file.")
tf.flags.DEFINE_string("uni_embeddings_file", None,
"Path to serialized numpy array of shape "
"[vocab_size, embedding_dim].")
tf.flags.DEFINE_string("bi_embeddings_file", None,
"Path to serialized numpy array of shape "
"[vocab_size, embedding_dim].")
tf.flags.DEFINE_string("uni_checkpoint_path", None,
"Checkpoint file or directory containing a checkpoint "
"file.")
tf.flags.DEFINE_string("bi_checkpoint_path", None,
"Checkpoint file or directory containing a checkpoint "
"file.")
tf.logging.set_verbosity(tf.logging.INFO)
def main(unused_argv):
if not FLAGS.data_dir:
raise ValueError("--data_dir is required.")
encoder = encoder_manager.EncoderManager()
# Maybe load unidirectional encoder.
if FLAGS.uni_checkpoint_path:
print("Loading unidirectional model...")
uni_config = configuration.model_config()
encoder.load_model(uni_config, FLAGS.uni_vocab_file,
FLAGS.uni_embeddings_file, FLAGS.uni_checkpoint_path)
# Maybe load bidirectional encoder.
if FLAGS.bi_checkpoint_path:
print("Loading bidirectional model...")
bi_config = configuration.model_config(bidirectional_encoder=True)
encoder.load_model(bi_config, FLAGS.bi_vocab_file, FLAGS.bi_embeddings_file,
FLAGS.bi_checkpoint_path)
if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]:
eval_classification.eval_nested_kfold(
encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False)
elif FLAGS.eval_task == "SICK":
eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir)
elif FLAGS.eval_task == "MSRP":
eval_msrp.evaluate(
encoder, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir)
elif FLAGS.eval_task == "TREC":
eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir)
else:
raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task)
encoder.close()
if __name__ == "__main__":
tf.app.run()
package(default_visibility = ["//skip_thoughts:internal"])
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
py_library(
name = "input_ops",
srcs = ["input_ops.py"],
srcs_version = "PY2AND3",
)
py_library(
name = "gru_cell",
srcs = ["gru_cell.py"],
srcs_version = "PY2AND3",
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""GRU cell implementation for the skip-thought vectors model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
_layer_norm = tf.contrib.layers.layer_norm
class LayerNormGRUCell(tf.contrib.rnn.RNNCell):
"""GRU cell with layer normalization.
The layer normalization implementation is based on:
https://arxiv.org/abs/1607.06450.
"Layer Normalization"
Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
"""
def __init__(self,
num_units,
w_initializer,
u_initializer,
b_initializer,
activation=tf.nn.tanh):
"""Initializes the cell.
Args:
num_units: Number of cell units.
w_initializer: Initializer for the "W" (input) parameter matrices.
u_initializer: Initializer for the "U" (recurrent) parameter matrices.
b_initializer: Initializer for the "b" (bias) parameter vectors.
activation: Cell activation function.
"""
self._num_units = num_units
self._w_initializer = w_initializer
self._u_initializer = u_initializer
self._b_initializer = b_initializer
self._activation = activation
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def _w_h_initializer(self):
"""Returns an initializer for the "W_h" parameter matrix.
See equation (23) in the paper. The "W_h" parameter matrix is the
concatenation of two parameter submatrices. The matrix returned is
[U_z, U_r].
Returns:
A Tensor with shape [num_units, 2 * num_units] as described above.
"""
def _initializer(shape, dtype=tf.float32, partition_info=None):
num_units = self._num_units
assert shape == [num_units, 2 * num_units]
u_z = self._u_initializer([num_units, num_units], dtype, partition_info)
u_r = self._u_initializer([num_units, num_units], dtype, partition_info)
return tf.concat([u_z, u_r], 1)
return _initializer
def _w_x_initializer(self, input_dim):
"""Returns an initializer for the "W_x" parameter matrix.
See equation (23) in the paper. The "W_x" parameter matrix is the
concatenation of two parameter submatrices. The matrix returned is
[W_z, W_r].
Args:
input_dim: The dimension of the cell inputs.
Returns:
A Tensor with shape [input_dim, 2 * num_units] as described above.
"""
def _initializer(shape, dtype=tf.float32, partition_info=None):
num_units = self._num_units
assert shape == [input_dim, 2 * num_units]
w_z = self._w_initializer([input_dim, num_units], dtype, partition_info)
w_r = self._w_initializer([input_dim, num_units], dtype, partition_info)
return tf.concat([w_z, w_r], 1)
return _initializer
def __call__(self, inputs, state, scope=None):
"""GRU cell with layer normalization."""
input_dim = inputs.get_shape().as_list()[1]
num_units = self._num_units
with tf.variable_scope(scope or "gru_cell"):
with tf.variable_scope("gates"):
w_h = tf.get_variable(
"w_h", [num_units, 2 * num_units],
initializer=self._w_h_initializer())
w_x = tf.get_variable(
"w_x", [input_dim, 2 * num_units],
initializer=self._w_x_initializer(input_dim))
z_and_r = (_layer_norm(tf.matmul(state, w_h), scope="layer_norm/w_h") +
_layer_norm(tf.matmul(inputs, w_x), scope="layer_norm/w_x"))
z, r = tf.split(tf.sigmoid(z_and_r), 2, 1)
with tf.variable_scope("candidate"):
w = tf.get_variable(
"w", [input_dim, num_units], initializer=self._w_initializer)
u = tf.get_variable(
"u", [num_units, num_units], initializer=self._u_initializer)
h_hat = (r * _layer_norm(tf.matmul(state, u), scope="layer_norm/u") +
_layer_norm(tf.matmul(inputs, w), scope="layer_norm/w"))
new_h = (1 - z) * state + z * self._activation(h_hat)
return new_h, new_h
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Input ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import tensorflow as tf
# A SentenceBatch is a pair of Tensors:
# ids: Batch of input sentences represented as sequences of word ids: an int64
# Tensor with shape [batch_size, padded_length].
# mask: Boolean mask distinguishing real words (1) from padded words (0): an
# int32 Tensor with shape [batch_size, padded_length].
SentenceBatch = collections.namedtuple("SentenceBatch", ("ids", "mask"))
def parse_example_batch(serialized):
"""Parses a batch of tf.Example protos.
Args:
serialized: A 1-D string Tensor; a batch of serialized tf.Example protos.
Returns:
encode: A SentenceBatch of encode sentences.
decode_pre: A SentenceBatch of "previous" sentences to decode.
decode_post: A SentenceBatch of "post" sentences to decode.
"""
features = tf.parse_example(
serialized,
features={
"encode": tf.VarLenFeature(dtype=tf.int64),
"decode_pre": tf.VarLenFeature(dtype=tf.int64),
"decode_post": tf.VarLenFeature(dtype=tf.int64),
})
def _sparse_to_batch(sparse):
ids = tf.sparse_tensor_to_dense(sparse) # Padding with zeroes.
mask = tf.sparse_to_dense(sparse.indices, sparse.dense_shape,
tf.ones_like(sparse.values, dtype=tf.int32))
return SentenceBatch(ids=ids, mask=mask)
output_names = ("encode", "decode_pre", "decode_post")
return tuple(_sparse_to_batch(features[x]) for x in output_names)
def prefetch_input_data(reader,
file_pattern,
shuffle,
capacity,
num_reader_threads=1):
"""Prefetches string values from disk into an input queue.
Args:
reader: Instance of tf.ReaderBase.
file_pattern: Comma-separated list of file patterns (e.g.
"/tmp/train_data-?????-of-00100", where '?' acts as a wildcard that
matches any character).
shuffle: Boolean; whether to randomly shuffle the input data.
capacity: Queue capacity (number of records).
num_reader_threads: Number of reader threads feeding into the queue.
Returns:
A Queue containing prefetched string values.
"""
data_files = []
for pattern in file_pattern.split(","):
data_files.extend(tf.gfile.Glob(pattern))
if not data_files:
tf.logging.fatal("Found no input files matching %s", file_pattern)
else:
tf.logging.info("Prefetching values from %d files matching %s",
len(data_files), file_pattern)
filename_queue = tf.train.string_input_producer(
data_files, shuffle=shuffle, capacity=16, name="filename_queue")
if shuffle:
min_after_dequeue = int(0.6 * capacity)
values_queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.string],
shapes=[[]],
name="random_input_queue")
else:
values_queue = tf.FIFOQueue(
capacity=capacity,
dtypes=[tf.string],
shapes=[[]],
name="fifo_input_queue")
enqueue_ops = []
for _ in range(num_reader_threads):
_, value = reader.read(filename_queue)
enqueue_ops.append(values_queue.enqueue([value]))
tf.train.queue_runner.add_queue_runner(
tf.train.queue_runner.QueueRunner(values_queue, enqueue_ops))
tf.summary.scalar("queue/%s/fraction_of_%d_full" % (values_queue.name,
capacity),
tf.cast(values_queue.size(), tf.float32) * (1.0 / capacity))
return values_queue
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class for encoding text using a trained SkipThoughtsModel.
Example usage:
g = tf.Graph()
with g.as_default():
encoder = SkipThoughtsEncoder(embeddings)
restore_fn = encoder.build_graph_from_config(model_config, checkpoint_path)
with tf.Session(graph=g) as sess:
restore_fn(sess)
skip_thought_vectors = encoder.encode(sess, data)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import nltk
import nltk.tokenize
import numpy as np
import tensorflow as tf
from skip_thoughts import skip_thoughts_model
from skip_thoughts.data import special_words
def _pad(seq, target_len):
"""Pads a sequence of word embeddings up to the target length.
Args:
seq: Sequence of word embeddings.
target_len: Desired padded sequence length.
Returns:
embeddings: Input sequence padded with zero embeddings up to the target
length.
mask: A 0/1 vector with zeros corresponding to padded embeddings.
Raises:
ValueError: If len(seq) is not in the interval (0, target_len].
"""
seq_len = len(seq)
if seq_len <= 0 or seq_len > target_len:
raise ValueError("Expected 0 < len(seq) <= %d, got %d" % (target_len,
seq_len))
emb_dim = seq[0].shape[0]
padded_seq = np.zeros(shape=(target_len, emb_dim), dtype=seq[0].dtype)
mask = np.zeros(shape=(target_len,), dtype=np.int8)
for i in range(seq_len):
padded_seq[i] = seq[i]
mask[i] = 1
return padded_seq, mask
def _batch_and_pad(sequences):
"""Batches and pads sequences of word embeddings into a 2D array.
Args:
sequences: A list of batch_size sequences of word embeddings.
Returns:
embeddings: A numpy array with shape [batch_size, padded_length, emb_dim].
mask: A numpy 0/1 array with shape [batch_size, padded_length] with zeros
corresponding to padded elements.
"""
batch_embeddings = []
batch_mask = []
batch_len = max([len(seq) for seq in sequences])
for seq in sequences:
embeddings, mask = _pad(seq, batch_len)
batch_embeddings.append(embeddings)
batch_mask.append(mask)
return np.array(batch_embeddings), np.array(batch_mask)
class SkipThoughtsEncoder(object):
"""Skip-thoughts sentence encoder."""
def __init__(self, embeddings):
"""Initializes the encoder.
Args:
embeddings: Dictionary of word to embedding vector (1D numpy array).
"""
self._sentence_detector = nltk.data.load("tokenizers/punkt/english.pickle")
self._embeddings = embeddings
def _create_restore_fn(self, checkpoint_path, saver):
"""Creates a function that restores a model from checkpoint.
Args:
checkpoint_path: Checkpoint file or a directory containing a checkpoint
file.
saver: Saver for restoring variables from the checkpoint file.
Returns:
restore_fn: A function such that restore_fn(sess) loads model variables
from the checkpoint file.
Raises:
ValueError: If checkpoint_path does not refer to a checkpoint file or a
directory containing a checkpoint file.
"""
if tf.gfile.IsDirectory(checkpoint_path):
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
if not latest_checkpoint:
raise ValueError("No checkpoint file found in: %s" % checkpoint_path)
checkpoint_path = latest_checkpoint
def _restore_fn(sess):
tf.logging.info("Loading model from checkpoint: %s", checkpoint_path)
saver.restore(sess, checkpoint_path)
tf.logging.info("Successfully loaded checkpoint: %s",
os.path.basename(checkpoint_path))
return _restore_fn
def build_graph_from_config(self, model_config, checkpoint_path):
"""Builds the inference graph from a configuration object.
Args:
model_config: Object containing configuration for building the model.
checkpoint_path: Checkpoint file or a directory containing a checkpoint
file.
Returns:
restore_fn: A function such that restore_fn(sess) loads model variables
from the checkpoint file.
"""
tf.logging.info("Building model.")
model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="encode")
model.build()
saver = tf.train.Saver()
return self._create_restore_fn(checkpoint_path, saver)
def build_graph_from_proto(self, graph_def_file, saver_def_file,
checkpoint_path):
"""Builds the inference graph from serialized GraphDef and SaverDef protos.
Args:
graph_def_file: File containing a serialized GraphDef proto.
saver_def_file: File containing a serialized SaverDef proto.
checkpoint_path: Checkpoint file or a directory containing a checkpoint
file.
Returns:
restore_fn: A function such that restore_fn(sess) loads model variables
from the checkpoint file.
"""
# Load the Graph.
tf.logging.info("Loading GraphDef from file: %s", graph_def_file)
graph_def = tf.GraphDef()
with tf.gfile.FastGFile(graph_def_file, "rb") as f:
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name="")
# Load the Saver.
tf.logging.info("Loading SaverDef from file: %s", saver_def_file)
saver_def = tf.train.SaverDef()
with tf.gfile.FastGFile(saver_def_file, "rb") as f:
saver_def.ParseFromString(f.read())
saver = tf.train.Saver(saver_def=saver_def)
return self._create_restore_fn(checkpoint_path, saver)
def _tokenize(self, item):
"""Tokenizes an input string into a list of words."""
tokenized = []
for s in self._sentence_detector.tokenize(item):
tokenized.extend(nltk.tokenize.word_tokenize(s))
return tokenized
def _word_to_embedding(self, w):
"""Returns the embedding of a word."""
return self._embeddings.get(w, self._embeddings[special_words.UNK])
def _preprocess(self, data, use_eos):
"""Preprocesses text for the encoder.
Args:
data: A list of input strings.
use_eos: Whether to append the end-of-sentence word to each sentence.
Returns:
embeddings: A list of word embedding sequences corresponding to the input
strings.
"""
preprocessed_data = []
for item in data:
tokenized = self._tokenize(item)
if use_eos:
tokenized.append(special_words.EOS)
preprocessed_data.append([self._word_to_embedding(w) for w in tokenized])
return preprocessed_data
def encode(self,
sess,
data,
use_norm=True,
verbose=True,
batch_size=128,
use_eos=False):
"""Encodes a sequence of sentences as skip-thought vectors.
Args:
sess: TensorFlow Session.
data: A list of input strings.
use_norm: Whether to normalize skip-thought vectors to unit L2 norm.
verbose: Whether to log every batch.
batch_size: Batch size for the encoder.
use_eos: Whether to append the end-of-sentence word to each input
sentence.
Returns:
thought_vectors: A list of numpy arrays corresponding to the skip-thought
encodings of sentences in 'data'.
"""
data = self._preprocess(data, use_eos)
thought_vectors = []
batch_indices = np.arange(0, len(data), batch_size)
for batch, start_index in enumerate(batch_indices):
if verbose:
tf.logging.info("Batch %d / %d.", batch, len(batch_indices))
embeddings, mask = _batch_and_pad(
data[start_index:start_index + batch_size])
feed_dict = {
"encode_emb:0": embeddings,
"encode_mask:0": mask,
}
thought_vectors.extend(
sess.run("encoder/thought_vectors:0", feed_dict=feed_dict))
if use_norm:
thought_vectors = [v / np.linalg.norm(v) for v in thought_vectors]
return thought_vectors
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Skip-Thoughts model for learning sentence vectors.
The model is based on the paper:
"Skip-Thought Vectors"
Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, Richard S. Zemel,
Antonio Torralba, Raquel Urtasun, Sanja Fidler.
https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf
Layer normalization is applied based on the paper:
"Layer Normalization"
Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
https://arxiv.org/abs/1607.06450
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from skip_thoughts.ops import gru_cell
from skip_thoughts.ops import input_ops
def random_orthonormal_initializer(shape, dtype=tf.float32,
partition_info=None): # pylint: disable=unused-argument
"""Variable initializer that produces a random orthonormal matrix."""
if len(shape) != 2 or shape[0] != shape[1]:
raise ValueError("Expecting square shape, got %s" % shape)
_, u, _ = tf.svd(tf.random_normal(shape, dtype=dtype), full_matrices=True)
return u
class SkipThoughtsModel(object):
"""Skip-thoughts model."""
def __init__(self, config, mode="train", input_reader=None):
"""Basic setup. The actual TensorFlow graph is constructed in build().
Args:
config: Object containing configuration parameters.
mode: "train", "eval" or "encode".
input_reader: Subclass of tf.ReaderBase for reading the input serialized
tf.Example protocol buffers. Defaults to TFRecordReader.
Raises:
ValueError: If mode is invalid.
"""
if mode not in ["train", "eval", "encode"]:
raise ValueError("Unrecognized mode: %s" % mode)
self.config = config
self.mode = mode
self.reader = input_reader if input_reader else tf.TFRecordReader()
# Initializer used for non-recurrent weights.
self.uniform_initializer = tf.random_uniform_initializer(
minval=-self.config.uniform_init_scale,
maxval=self.config.uniform_init_scale)
# Input sentences represented as sequences of word ids. "encode" is the
# source sentence, "decode_pre" is the previous sentence and "decode_post"
# is the next sentence.
# Each is an int64 Tensor with shape [batch_size, padded_length].
self.encode_ids = None
self.decode_pre_ids = None
self.decode_post_ids = None
# Boolean masks distinguishing real words (1) from padded words (0).
# Each is an int32 Tensor with shape [batch_size, padded_length].
self.encode_mask = None
self.decode_pre_mask = None
self.decode_post_mask = None
# Input sentences represented as sequences of word embeddings.
# Each is a float32 Tensor with shape [batch_size, padded_length, emb_dim].
self.encode_emb = None
self.decode_pre_emb = None
self.decode_post_emb = None
# The output from the sentence encoder.
# A float32 Tensor with shape [batch_size, num_gru_units].
self.thought_vectors = None
# The cross entropy losses and corresponding weights of the decoders. Used
# for evaluation.
self.target_cross_entropy_losses = []
self.target_cross_entropy_loss_weights = []
# The total loss to optimize.
self.total_loss = None
def build_inputs(self):
"""Builds the ops for reading input data.
Outputs:
self.encode_ids
self.decode_pre_ids
self.decode_post_ids
self.encode_mask
self.decode_pre_mask
self.decode_post_mask
"""
if self.mode == "encode":
# Word embeddings are fed from an external vocabulary which has possibly
# been expanded (see vocabulary_expansion.py).
encode_ids = None
decode_pre_ids = None
decode_post_ids = None
encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask")
decode_pre_mask = None
decode_post_mask = None
else:
# Prefetch serialized tf.Example protos.
input_queue = input_ops.prefetch_input_data(
self.reader,
self.config.input_file_pattern,
shuffle=self.config.shuffle_input_data,
capacity=self.config.input_queue_capacity,
num_reader_threads=self.config.num_input_reader_threads)
# Deserialize a batch.
serialized = input_queue.dequeue_many(self.config.batch_size)
encode, decode_pre, decode_post = input_ops.parse_example_batch(
serialized)
encode_ids = encode.ids
decode_pre_ids = decode_pre.ids
decode_post_ids = decode_post.ids
encode_mask = encode.mask
decode_pre_mask = decode_pre.mask
decode_post_mask = decode_post.mask
self.encode_ids = encode_ids
self.decode_pre_ids = decode_pre_ids
self.decode_post_ids = decode_post_ids
self.encode_mask = encode_mask
self.decode_pre_mask = decode_pre_mask
self.decode_post_mask = decode_post_mask
def build_word_embeddings(self):
"""Builds the word embeddings.
Inputs:
self.encode_ids
self.decode_pre_ids
self.decode_post_ids
Outputs:
self.encode_emb
self.decode_pre_emb
self.decode_post_emb
"""
if self.mode == "encode":
# Word embeddings are fed from an external vocabulary which has possibly
# been expanded (see vocabulary_expansion.py).
encode_emb = tf.placeholder(tf.float32, (
None, None, self.config.word_embedding_dim), "encode_emb")
# No sequences to decode.
decode_pre_emb = None
decode_post_emb = None
else:
word_emb = tf.get_variable(
name="word_embedding",
shape=[self.config.vocab_size, self.config.word_embedding_dim],
initializer=self.uniform_initializer)
encode_emb = tf.nn.embedding_lookup(word_emb, self.encode_ids)
decode_pre_emb = tf.nn.embedding_lookup(word_emb, self.decode_pre_ids)
decode_post_emb = tf.nn.embedding_lookup(word_emb, self.decode_post_ids)
self.encode_emb = encode_emb
self.decode_pre_emb = decode_pre_emb
self.decode_post_emb = decode_post_emb
def _initialize_gru_cell(self, num_units):
"""Initializes a GRU cell.
The Variables of the GRU cell are initialized in a way that exactly matches
the skip-thoughts paper: recurrent weights are initialized from random
orthonormal matrices and non-recurrent weights are initialized from random
uniform matrices.
Args:
num_units: Number of output units.
Returns:
cell: An instance of RNNCell with variable initializers that match the
skip-thoughts paper.
"""
return gru_cell.LayerNormGRUCell(
num_units,
w_initializer=self.uniform_initializer,
u_initializer=random_orthonormal_initializer,
b_initializer=tf.constant_initializer(0.0))
def build_encoder(self):
"""Builds the sentence encoder.
Inputs:
self.encode_emb
self.encode_mask
Outputs:
self.thought_vectors
Raises:
ValueError: if config.bidirectional_encoder is True and config.encoder_dim
is odd.
"""
with tf.variable_scope("encoder") as scope:
length = tf.to_int32(tf.reduce_sum(self.encode_mask, 1), name="length")
if self.config.bidirectional_encoder:
if self.config.encoder_dim % 2:
raise ValueError(
"encoder_dim must be even when using a bidirectional encoder.")
num_units = self.config.encoder_dim // 2
cell_fw = self._initialize_gru_cell(num_units) # Forward encoder
cell_bw = self._initialize_gru_cell(num_units) # Backward encoder
_, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw,
cell_bw=cell_bw,
inputs=self.encode_emb,
sequence_length=length,
dtype=tf.float32,
scope=scope)
thought_vectors = tf.concat(states, 1, name="thought_vectors")
else:
cell = self._initialize_gru_cell(self.config.encoder_dim)
_, state = tf.nn.dynamic_rnn(
cell=cell,
inputs=self.encode_emb,
sequence_length=length,
dtype=tf.float32,
scope=scope)
# Use an identity operation to name the Tensor in the Graph.
thought_vectors = tf.identity(state, name="thought_vectors")
self.thought_vectors = thought_vectors
def _build_decoder(self, name, embeddings, targets, mask, initial_state,
reuse_logits):
"""Builds a sentence decoder.
Args:
name: Decoder name.
embeddings: Batch of sentences to decode; a float32 Tensor with shape
[batch_size, padded_length, emb_dim].
targets: Batch of target word ids; an int64 Tensor with shape
[batch_size, padded_length].
mask: A 0/1 Tensor with shape [batch_size, padded_length].
initial_state: Initial state of the GRU. A float32 Tensor with shape
[batch_size, num_gru_cells].
reuse_logits: Whether to reuse the logits weights.
"""
# Decoder RNN.
cell = self._initialize_gru_cell(self.config.encoder_dim)
with tf.variable_scope(name) as scope:
# Add a padding word at the start of each sentence (to correspond to the
# prediction of the first word) and remove the last word.
decoder_input = tf.pad(
embeddings[:, :-1, :], [[0, 0], [1, 0], [0, 0]], name="input")
length = tf.reduce_sum(mask, 1, name="length")
decoder_output, _ = tf.nn.dynamic_rnn(
cell=cell,
inputs=decoder_input,
sequence_length=length,
initial_state=initial_state,
scope=scope)
# Stack batch vertically.
decoder_output = tf.reshape(decoder_output, [-1, self.config.encoder_dim])
targets = tf.reshape(targets, [-1])
weights = tf.to_float(tf.reshape(mask, [-1]))
# Logits.
with tf.variable_scope("logits", reuse=reuse_logits) as scope:
logits = tf.contrib.layers.fully_connected(
inputs=decoder_output,
num_outputs=self.config.vocab_size,
activation_fn=None,
weights_initializer=self.uniform_initializer,
scope=scope)
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=targets, logits=logits)
batch_loss = tf.reduce_sum(losses * weights)
tf.losses.add_loss(batch_loss)
tf.summary.scalar("losses/" + name, batch_loss)
self.target_cross_entropy_losses.append(losses)
self.target_cross_entropy_loss_weights.append(weights)
def build_decoders(self):
"""Builds the sentence decoders.
Inputs:
self.decode_pre_emb
self.decode_post_emb
self.decode_pre_ids
self.decode_post_ids
self.decode_pre_mask
self.decode_post_mask
self.thought_vectors
Outputs:
self.target_cross_entropy_losses
self.target_cross_entropy_loss_weights
"""
if self.mode != "encode":
# Pre-sentence decoder.
self._build_decoder("decoder_pre", self.decode_pre_emb,
self.decode_pre_ids, self.decode_pre_mask,
self.thought_vectors, False)
# Post-sentence decoder. Logits weights are reused.
self._build_decoder("decoder_post", self.decode_post_emb,
self.decode_post_ids, self.decode_post_mask,
self.thought_vectors, True)
def build_loss(self):
"""Builds the loss Tensor.
Outputs:
self.total_loss
"""
if self.mode != "encode":
total_loss = tf.losses.get_total_loss()
tf.summary.scalar("losses/total", total_loss)
self.total_loss = total_loss
def build_global_step(self):
"""Builds the global step Tensor.
Outputs:
self.global_step
"""
self.global_step = tf.contrib.framework.create_global_step()
def build(self):
"""Creates all ops for training, evaluation or encoding."""
self.build_inputs()
self.build_word_embeddings()
self.build_encoder()
self.build_decoders()
self.build_loss()
self.build_global_step()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.skip_thoughts.skip_thoughts_model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from skip_thoughts import configuration
from skip_thoughts import skip_thoughts_model
class SkipThoughtsModel(skip_thoughts_model.SkipThoughtsModel):
"""Subclass of SkipThoughtsModel without the disk I/O."""
def build_inputs(self):
if self.mode == "encode":
# Encode mode doesn't read from disk, so defer to parent.
return super(SkipThoughtsModel, self).build_inputs()
else:
# Replace disk I/O with random Tensors.
self.encode_ids = tf.random_uniform(
[self.config.batch_size, 15],
minval=0,
maxval=self.config.vocab_size,
dtype=tf.int64)
self.decode_pre_ids = tf.random_uniform(
[self.config.batch_size, 15],
minval=0,
maxval=self.config.vocab_size,
dtype=tf.int64)
self.decode_post_ids = tf.random_uniform(
[self.config.batch_size, 15],
minval=0,
maxval=self.config.vocab_size,
dtype=tf.int64)
self.encode_mask = tf.ones_like(self.encode_ids)
self.decode_pre_mask = tf.ones_like(self.decode_pre_ids)
self.decode_post_mask = tf.ones_like(self.decode_post_ids)
class SkipThoughtsModelTest(tf.test.TestCase):
def setUp(self):
super(SkipThoughtsModelTest, self).setUp()
self._model_config = configuration.model_config()
def _countModelParameters(self):
"""Counts the number of parameters in the model at top level scope."""
counter = {}
for v in tf.global_variables():
name = v.op.name.split("/")[0]
num_params = v.get_shape().num_elements()
if not num_params:
self.fail("Could not infer num_elements from Variable %s" % v.op.name)
counter[name] = counter.get(name, 0) + num_params
return counter
def _checkModelParameters(self):
"""Verifies the number of parameters in the model."""
param_counts = self._countModelParameters()
expected_param_counts = {
# vocab_size * embedding_size
"word_embedding": 12400000,
# GRU Cells
"encoder": 21772800,
"decoder_pre": 21772800,
"decoder_post": 21772800,
# (encoder_dim + 1) * vocab_size
"logits": 48020000,
"global_step": 1,
}
self.assertDictEqual(expected_param_counts, param_counts)
def _checkOutputs(self, expected_shapes, feed_dict=None):
"""Verifies that the model produces expected outputs.
Args:
expected_shapes: A dict mapping Tensor or Tensor name to expected output
shape.
feed_dict: Values of Tensors to feed into Session.run().
"""
fetches = expected_shapes.keys()
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
outputs = sess.run(fetches, feed_dict)
for index, output in enumerate(outputs):
tensor = fetches[index]
expected = expected_shapes[tensor]
actual = output.shape
if expected != actual:
self.fail("Tensor %s has shape %s (expected %s)." % (tensor, actual,
expected))
def testBuildForTraining(self):
model = SkipThoughtsModel(self._model_config, mode="train")
model.build()
self._checkModelParameters()
expected_shapes = {
# [batch_size, length]
model.encode_ids: (128, 15),
model.decode_pre_ids: (128, 15),
model.decode_post_ids: (128, 15),
model.encode_mask: (128, 15),
model.decode_pre_mask: (128, 15),
model.decode_post_mask: (128, 15),
# [batch_size, length, word_embedding_dim]
model.encode_emb: (128, 15, 620),
model.decode_pre_emb: (128, 15, 620),
model.decode_post_emb: (128, 15, 620),
# [batch_size, encoder_dim]
model.thought_vectors: (128, 2400),
# [batch_size * length]
model.target_cross_entropy_losses[0]: (1920,),
model.target_cross_entropy_losses[1]: (1920,),
# [batch_size * length]
model.target_cross_entropy_loss_weights[0]: (1920,),
model.target_cross_entropy_loss_weights[1]: (1920,),
# Scalar
model.total_loss: (),
}
self._checkOutputs(expected_shapes)
def testBuildForEval(self):
model = SkipThoughtsModel(self._model_config, mode="eval")
model.build()
self._checkModelParameters()
expected_shapes = {
# [batch_size, length]
model.encode_ids: (128, 15),
model.decode_pre_ids: (128, 15),
model.decode_post_ids: (128, 15),
model.encode_mask: (128, 15),
model.decode_pre_mask: (128, 15),
model.decode_post_mask: (128, 15),
# [batch_size, length, word_embedding_dim]
model.encode_emb: (128, 15, 620),
model.decode_pre_emb: (128, 15, 620),
model.decode_post_emb: (128, 15, 620),
# [batch_size, encoder_dim]
model.thought_vectors: (128, 2400),
# [batch_size * length]
model.target_cross_entropy_losses[0]: (1920,),
model.target_cross_entropy_losses[1]: (1920,),
# [batch_size * length]
model.target_cross_entropy_loss_weights[0]: (1920,),
model.target_cross_entropy_loss_weights[1]: (1920,),
# Scalar
model.total_loss: (),
}
self._checkOutputs(expected_shapes)
def testBuildForEncode(self):
model = SkipThoughtsModel(self._model_config, mode="encode")
model.build()
# Test feeding a batch of word embeddings to get skip thought vectors.
encode_emb = np.random.rand(64, 15, 620)
encode_mask = np.ones((64, 15), dtype=np.int64)
feed_dict = {model.encode_emb: encode_emb, model.encode_mask: encode_mask}
expected_shapes = {
# [batch_size, encoder_dim]
model.thought_vectors: (64, 2400),
}
self._checkOutputs(expected_shapes, feed_dict)
if __name__ == "__main__":
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tracks training progress via per-word perplexity.
This script should be run concurrently with training so that summaries show up
in TensorBoard.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os.path
import time
import numpy as np
import tensorflow as tf
from skip_thoughts import configuration
from skip_thoughts import skip_thoughts_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("input_file_pattern", None,
"File pattern of sharded TFRecord input files.")
tf.flags.DEFINE_string("checkpoint_dir", None,
"Directory containing model checkpoints.")
tf.flags.DEFINE_string("eval_dir", None, "Directory to write event logs to.")
tf.flags.DEFINE_integer("eval_interval_secs", 600,
"Interval between evaluation runs.")
tf.flags.DEFINE_integer("num_eval_examples", 50000,
"Number of examples for evaluation.")
tf.flags.DEFINE_integer("min_global_step", 100,
"Minimum global step to run evaluation.")
tf.logging.set_verbosity(tf.logging.INFO)
def evaluate_model(sess, losses, weights, num_batches, global_step,
summary_writer, summary_op):
"""Computes perplexity-per-word over the evaluation dataset.
Summaries and perplexity-per-word are written out to the eval directory.
Args:
sess: Session object.
losses: A Tensor of any shape; the target cross entropy losses for the
current batch.
weights: A Tensor of weights corresponding to losses.
num_batches: Integer; the number of evaluation batches.
global_step: Integer; global step of the model checkpoint.
summary_writer: Instance of SummaryWriter.
summary_op: Op for generating model summaries.
"""
# Log model summaries on a single batch.
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, global_step)
start_time = time.time()
sum_losses = 0.0
sum_weights = 0.0
for i in xrange(num_batches):
batch_losses, batch_weights = sess.run([losses, weights])
sum_losses += np.sum(batch_losses * batch_weights)
sum_weights += np.sum(batch_weights)
if not i % 100:
tf.logging.info("Computed losses for %d of %d batches.", i + 1,
num_batches)
eval_time = time.time() - start_time
perplexity = math.exp(sum_losses / sum_weights)
tf.logging.info("Perplexity = %f (%.2f sec)", perplexity, eval_time)
# Log perplexity to the SummaryWriter.
summary = tf.Summary()
value = summary.value.add()
value.simple_value = perplexity
value.tag = "perplexity"
summary_writer.add_summary(summary, global_step)
# Write the Events file to the eval directory.
summary_writer.flush()
tf.logging.info("Finished processing evaluation at global step %d.",
global_step)
def run_once(model, losses, weights, saver, summary_writer, summary_op):
"""Evaluates the latest model checkpoint.
Args:
model: Instance of SkipThoughtsModel; the model to evaluate.
losses: Tensor; the target cross entropy losses for the current batch.
weights: A Tensor of weights corresponding to losses.
saver: Instance of tf.train.Saver for restoring model Variables.
summary_writer: Instance of FileWriter.
summary_op: Op for generating model summaries.
"""
model_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if not model_path:
tf.logging.info("Skipping evaluation. No checkpoint found in: %s",
FLAGS.checkpoint_dir)
return
with tf.Session() as sess:
# Load model from checkpoint.
tf.logging.info("Loading model from checkpoint: %s", model_path)
saver.restore(sess, model_path)
global_step = tf.train.global_step(sess, model.global_step.name)
tf.logging.info("Successfully loaded %s at global step = %d.",
os.path.basename(model_path), global_step)
if global_step < FLAGS.min_global_step:
tf.logging.info("Skipping evaluation. Global step = %d < %d", global_step,
FLAGS.min_global_step)
return
# Start the queue runners.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
num_eval_batches = int(
math.ceil(FLAGS.num_eval_examples / model.config.batch_size))
# Run evaluation on the latest checkpoint.
try:
evaluate_model(sess, losses, weights, num_eval_batches, global_step,
summary_writer, summary_op)
except tf.InvalidArgumentError:
tf.logging.error(
"Evaluation raised InvalidArgumentError (e.g. due to Nans).")
finally:
coord.request_stop()
coord.join(threads, stop_grace_period_secs=10)
def main(unused_argv):
if not FLAGS.input_file_pattern:
raise ValueError("--input_file_pattern is required.")
if not FLAGS.checkpoint_dir:
raise ValueError("--checkpoint_dir is required.")
if not FLAGS.eval_dir:
raise ValueError("--eval_dir is required.")
# Create the evaluation directory if it doesn't exist.
eval_dir = FLAGS.eval_dir
if not tf.gfile.IsDirectory(eval_dir):
tf.logging.info("Creating eval directory: %s", eval_dir)
tf.gfile.MakeDirs(eval_dir)
g = tf.Graph()
with g.as_default():
# Build the model for evaluation.
model_config = configuration.model_config(
input_file_pattern=FLAGS.input_file_pattern,
input_queue_capacity=FLAGS.num_eval_examples,
shuffle_input_data=False)
model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="eval")
model.build()
losses = tf.concat(model.target_cross_entropy_losses, 0)
weights = tf.concat(model.target_cross_entropy_loss_weights, 0)
# Create the Saver to restore model Variables.
saver = tf.train.Saver()
# Create the summary operation and the summary writer.
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(eval_dir)
g.finalize()
# Run a new evaluation run every eval_interval_secs.
while True:
start = time.time()
tf.logging.info("Starting evaluation at " + time.strftime(
"%Y-%m-%d-%H:%M:%S", time.localtime()))
run_once(model, losses, weights, saver, summary_writer, summary_op)
time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
if time_to_next_eval > 0:
time.sleep(time_to_next_eval)
if __name__ == "__main__":
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train the skip-thoughts model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from skip_thoughts import configuration
from skip_thoughts import skip_thoughts_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("input_file_pattern", None,
"File pattern of sharded TFRecord files containing "
"tf.Example protos.")
tf.flags.DEFINE_string("train_dir", None,
"Directory for saving and loading checkpoints.")
tf.logging.set_verbosity(tf.logging.INFO)
def _setup_learning_rate(config, global_step):
"""Sets up the learning rate with optional exponential decay.
Args:
config: Object containing learning rate configuration parameters.
global_step: Tensor; the global step.
Returns:
learning_rate: Tensor; the learning rate with exponential decay.
"""
if config.learning_rate_decay_factor > 0:
learning_rate = tf.train.exponential_decay(
learning_rate=float(config.learning_rate),
global_step=global_step,
decay_steps=config.learning_rate_decay_steps,
decay_rate=config.learning_rate_decay_factor,
staircase=False)
else:
learning_rate = tf.constant(config.learning_rate)
return learning_rate
def main(unused_argv):
if not FLAGS.input_file_pattern:
raise ValueError("--input_file_pattern is required.")
if not FLAGS.train_dir:
raise ValueError("--train_dir is required.")
model_config = configuration.model_config(
input_file_pattern=FLAGS.input_file_pattern)
training_config = configuration.training_config()
tf.logging.info("Building training graph.")
g = tf.Graph()
with g.as_default():
model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="train")
model.build()
learning_rate = _setup_learning_rate(training_config, model.global_step)
optimizer = tf.train.AdamOptimizer(learning_rate)
train_tensor = tf.contrib.slim.learning.create_train_op(
total_loss=model.total_loss,
optimizer=optimizer,
global_step=model.global_step,
clip_gradient_norm=training_config.clip_gradient_norm)
saver = tf.train.Saver()
tf.contrib.slim.learning.train(
train_op=train_tensor,
logdir=FLAGS.train_dir,
graph=g,
global_step=model.global_step,
number_of_steps=training_config.number_of_steps,
save_summaries_secs=training_config.save_summaries_secs,
saver=saver,
save_interval_secs=training_config.save_model_secs)
if __name__ == "__main__":
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Compute an expanded vocabulary of embeddings using a word2vec model.
This script loads the word embeddings from a trained skip-thoughts model and
from a trained word2vec model (typically with a larger vocabulary). It trains a
linear regression model without regularization to learn a linear mapping from
the word2vec embedding space to the skip-thoughts embedding space. The model is
then applied to all words in the word2vec vocabulary, yielding vectors in the
skip-thoughts word embedding space for the union of the two vocabularies.
The linear regression task is to learn a parameter matrix W to minimize
|| X - Y * W ||^2,
where X is a matrix of skip-thoughts embeddings of shape [num_words, dim1],
Y is a matrix of word2vec embeddings of shape [num_words, dim2], and W is a
matrix of shape [dim2, dim1].
This is based on the "Translation Matrix" method from the paper:
"Exploiting Similarities among Languages for Machine Translation"
Tomas Mikolov, Quoc V. Le, Ilya Sutskever
https://arxiv.org/abs/1309.4168
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os.path
import gensim.models
import numpy as np
import sklearn.linear_model
import tensorflow as tf
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("skip_thoughts_model", None,
"Checkpoint file or directory containing a checkpoint "
"file.")
tf.flags.DEFINE_string("skip_thoughts_vocab", None,
"Path to vocabulary file containing a list of newline-"
"separated words where the word id is the "
"corresponding 0-based index in the file.")
tf.flags.DEFINE_string("word2vec_model", None,
"File containing a word2vec model in binary format.")
tf.flags.DEFINE_string("output_dir", None, "Output directory.")
tf.logging.set_verbosity(tf.logging.INFO)
def _load_skip_thoughts_embeddings(checkpoint_path):
"""Loads the embedding matrix from a skip-thoughts model checkpoint.
Args:
checkpoint_path: Model checkpoint file or directory containing a checkpoint
file.
Returns:
word_embedding: A numpy array of shape [vocab_size, embedding_dim].
Raises:
ValueError: If no checkpoint file matches checkpoint_path.
"""
if tf.gfile.IsDirectory(checkpoint_path):
checkpoint_file = tf.train.latest_checkpoint(checkpoint_path)
if not checkpoint_file:
raise ValueError("No checkpoint file found in %s" % checkpoint_path)
else:
checkpoint_file = checkpoint_path
tf.logging.info("Loading skip-thoughts embedding matrix from %s",
checkpoint_file)
reader = tf.train.NewCheckpointReader(checkpoint_file)
word_embedding = reader.get_tensor("word_embedding")
tf.logging.info("Loaded skip-thoughts embedding matrix of shape %s",
word_embedding.shape)
return word_embedding
def _load_vocabulary(filename):
"""Loads a vocabulary file.
Args:
filename: Path to text file containing newline-separated words.
Returns:
vocab: A dictionary mapping word to word id.
"""
tf.logging.info("Reading vocabulary from %s", filename)
vocab = collections.OrderedDict()
with tf.gfile.GFile(filename, mode="r") as f:
for i, line in enumerate(f):
word = line.decode("utf-8").strip()
assert word not in vocab, "Attempting to add word twice: %s" % word
vocab[word] = i
tf.logging.info("Read vocabulary of size %d", len(vocab))
return vocab
def _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab, word2vec):
"""Runs vocabulary expansion on a skip-thoughts model using a word2vec model.
Args:
skip_thoughts_emb: A numpy array of shape [skip_thoughts_vocab_size,
skip_thoughts_embedding_dim].
skip_thoughts_vocab: A dictionary of word to id.
word2vec: An instance of gensim.models.Word2Vec.
Returns:
combined_emb: A dictionary mapping words to embedding vectors.
"""
# Find words shared between the two vocabularies.
tf.logging.info("Finding shared words")
shared_words = [w for w in word2vec.vocab if w in skip_thoughts_vocab]
# Select embedding vectors for shared words.
tf.logging.info("Selecting embeddings for %d shared words", len(shared_words))
shared_st_emb = skip_thoughts_emb[[
skip_thoughts_vocab[w] for w in shared_words
]]
shared_w2v_emb = word2vec[shared_words]
# Train a linear regression model on the shared embedding vectors.
tf.logging.info("Training linear regression model")
model = sklearn.linear_model.LinearRegression()
model.fit(shared_w2v_emb, shared_st_emb)
# Create the expanded vocabulary.
tf.logging.info("Creating embeddings for expanded vocabuary")
combined_emb = collections.OrderedDict()
for w in word2vec.vocab:
# Ignore words with underscores (spaces).
if "_" not in w:
w_emb = model.predict(word2vec[w].reshape(1, -1))
combined_emb[w] = w_emb.reshape(-1)
for w in skip_thoughts_vocab:
combined_emb[w] = skip_thoughts_emb[skip_thoughts_vocab[w]]
tf.logging.info("Created expanded vocabulary of %d words", len(combined_emb))
return combined_emb
def main(unused_argv):
if not FLAGS.skip_thoughts_model:
raise ValueError("--skip_thoughts_model is required.")
if not FLAGS.skip_thoughts_vocab:
raise ValueError("--skip_thoughts_vocab is required.")
if not FLAGS.word2vec_model:
raise ValueError("--word2vec_model is required.")
if not FLAGS.output_dir:
raise ValueError("--output_dir is required.")
if not tf.gfile.IsDirectory(FLAGS.output_dir):
tf.gfile.MakeDirs(FLAGS.output_dir)
# Load the skip-thoughts embeddings and vocabulary.
skip_thoughts_emb = _load_skip_thoughts_embeddings(FLAGS.skip_thoughts_model)
skip_thoughts_vocab = _load_vocabulary(FLAGS.skip_thoughts_vocab)
# Load the Word2Vec model.
word2vec = gensim.models.Word2Vec.load_word2vec_format(
FLAGS.word2vec_model, binary=True)
# Run vocabulary expansion.
embedding_map = _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab,
word2vec)
# Save the output.
vocab = embedding_map.keys()
vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt")
with tf.gfile.GFile(vocab_file, "w") as f:
f.write("\n".join(vocab))
tf.logging.info("Wrote vocabulary file to %s", vocab_file)
embeddings = np.array(embedding_map.values())
embeddings_file = os.path.join(FLAGS.output_dir, "embeddings.npy")
np.save(embeddings_file, embeddings)
tf.logging.info("Wrote embeddings file to %s", embeddings_file)
if __name__ == "__main__":
tf.app.run()
# Description:
# Contains files for loading, training and evaluating TF-Slim-based models.
package(default_visibility = [":internal"])
package(default_visibility = [
":internal",
"//domain_adaptation:__subpackages__",
])
licenses(["notice"]) # Apache 2.0
......
......@@ -13,7 +13,7 @@ converting them
to TensorFlow's native TFRecord format and reading them in using TF-Slim's
data reading and queueing utilities. You can easily train any model on any of
these datasets, as we demonstrate below. We've also included a
[jupyter notebook](https://github.com/tensorflow/models/blob/master/slim/slim_walkthough.ipynb),
[jupyter notebook](https://github.com/tensorflow/models/blob/master/slim/slim_walkthrough.ipynb),
which provides working examples of how to use TF-Slim for image classification.
## Contacts
......@@ -41,23 +41,9 @@ prerequisite packages.
## Installing latest version of TF-slim
As of 8/28/16, the latest [stable release of TF](https://www.tensorflow.org/versions/r0.10/get_started/os_setup.html#pip-installation)
is r0.10, which contains most of TF-Slim but not some later additions. To obtain the
latest version, you must install the most recent nightly build of
TensorFlow. You can find the latest nightly binaries at
[TensorFlow Installation](https://github.com/tensorflow/tensorflow#installation)
in the section that reads "People who are a little more adventurous can
also try our nightly binaries". Copy the link address that corresponds to
the appropriate machine architecture and python version, and pip install
it. For example:
```shell
export TF_BINARY_URL=https://ci.tensorflow.org/view/Nightly/job/nightly-matrix-cpu/TF_BUILD_CONTAINER_TYPE=CPU,TF_BUILD_IS_OPT=OPT,TF_BUILD_IS_PIP=PIP,TF_BUILD_PYTHON_VERSION=PYTHON2,label=cpu-slave/lastSuccessfulBuild/artifact/pip_test/whl/tensorflow-0.10.0rc0-cp27-none-linux_x86_64.whl
sudo pip install --upgrade $TF_BINARY_URL
```
To test this has worked, execute the following command; it should run
without raising any errors.
TF-Slim is available as `tf.contrib.slim` via TensorFlow 1.0. To test that your
installation is working, execute the following command; it should run without
raising any errors.
```
python -c "import tensorflow.contrib.slim as slim; eval = slim.evaluation.evaluate_once"
......@@ -140,7 +126,7 @@ You can use the same script to create the mnist and cifar10 datasets.
However, for ImageNet, you have to follow the instructions
[here](https://github.com/tensorflow/models/blob/master/inception/README.md#getting-started).
Note that you first have to sign up for an account at image-net.org.
Also, the download can take several hours, and uses about 500MB.
Also, the download can take several hours, and could use up to 500GB.
## Creating a TF-Slim Dataset Descriptor.
......@@ -192,12 +178,12 @@ image classification dataset.
In the table below, we list each model, the corresponding
TensorFlow model file, the link to the model checkpoint, and the top 1 and top 5
accuracy (on the imagenet test set).
Note that the VGG and ResNet parameters have been converted from their original
Note that the VGG and ResNet V1 parameters have been converted from their original
caffe formats
([here](https://github.com/BVLC/caffe/wiki/Model-Zoo#models-used-by-the-vgg-team-in-ilsvrc-2014)
and
[here](https://github.com/KaimingHe/deep-residual-networks)),
whereas the Inception parameters have been trained internally at
whereas the Inception and ResNet V2 parameters have been trained internally at
Google. Also be aware that these accuracies were computed by evaluating using a
single image crop. Some academic papers report higher accuracy by using multiple
crops at multiple scales.
......@@ -209,12 +195,19 @@ Model | TF-Slim File | Checkpoint | Top-1 Accuracy| Top-5 Accuracy |
[Inception V3](http://arxiv.org/abs/1512.00567)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_v3.py)|[inception_v3_2016_08_28.tar.gz](http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz)|78.0|93.9|
[Inception V4](http://arxiv.org/abs/1602.07261)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_v4.py)|[inception_v4_2016_09_09.tar.gz](http://download.tensorflow.org/models/inception_v4_2016_09_09.tar.gz)|80.2|95.2|
[Inception-ResNet-v2](http://arxiv.org/abs/1602.07261)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/inception_resnet_v2.py)|[inception_resnet_v2.tar.gz](http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz)|80.4|95.3|
[ResNet 50](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_50.tar.gz](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)|75.2|92.2|
[ResNet 101](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_101.tar.gz](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz)|76.4|92.9|
[ResNet 152](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_152.tar.gz](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz)|76.8|93.2|
[ResNet V1 50](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_50.tar.gz](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)|75.2|92.2|
[ResNet V1 101](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_101.tar.gz](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz)|76.4|92.9|
[ResNet V1 152](https://arxiv.org/abs/1512.03385)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v1.py)|[resnet_v1_152.tar.gz](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz)|76.8|93.2|
[ResNet V2 50](https://arxiv.org/abs/1603.05027)^|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v2.py)|[resnet_v2_50.tar.gz](http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz)|75.6|92.8|
[ResNet V2 101](https://arxiv.org/abs/1603.05027)^|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v2.py)|[resnet_v2_101.tar.gz](http://download.tensorflow.org/models/resnet_v2_101_2017_04_14.tar.gz)|77.0|93.7|
[ResNet V2 152](https://arxiv.org/abs/1603.05027)^|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/resnet_v2.py)|[resnet_v2_152.tar.gz](http://download.tensorflow.org/models/resnet_v2_152_2017_04_14.tar.gz)|77.8|94.1|
[VGG 16](http://arxiv.org/abs/1409.1556.pdf)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/vgg.py)|[vgg_16.tar.gz](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz)|71.5|89.8|
[VGG 19](http://arxiv.org/abs/1409.1556.pdf)|[Code](https://github.com/tensorflow/models/blob/master/slim/nets/vgg.py)|[vgg_19.tar.gz](http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz)|71.1|89.8|
^ ResNet V2 models use Inception pre-processing and input image size of 299 (use
`--preprocessing_name inception --eval_image_size 299` when using
`eval_image_classifier.py`). Performance numbers for ResNet V2 models are
reported on ImageNet valdiation set.
Here is an example of how to download the Inception V3 checkpoint:
......@@ -303,8 +296,8 @@ $ python train_image_classifier.py \
--dataset_split_name=train \
--model_name=inception_v3 \
--checkpoint_path=${CHECKPOINT_PATH} \
--checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits/Logits \
--trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits/Logits
--checkpoint_exclude_scopes=InceptionV3/Logits,InceptionV3/AuxLogits \
--trainable_scopes=InceptionV3/Logits,InceptionV3/AuxLogits
```
......@@ -358,10 +351,10 @@ following error:
```bash
InvalidArgumentError: Assign requires shapes of both tensors to match. lhs shape= [1001] rhs shape= [1000]
```
This is due to the fact that the VGG and ResNet final layers have only 1000
This is due to the fact that the VGG and ResNet V1 final layers have only 1000
outputs rather than 1001.
To fix this issue, you can set the `--labels_offsets=1` flag. This results in
To fix this issue, you can set the `--labels_offset=1` flag. This results in
the ImageNet labels being shifted down by one:
......
......@@ -15,7 +15,7 @@
"""Provides data for the Cifar10 dataset.
The dataset scripts used to create the dataset can be found at:
tensorflow/models/slim/data/create_cifar10_dataset.py
tensorflow/models/slim/datasets/download_and_convert_cifar10.py
"""
from __future__ import absolute_import
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment