"docs/vscode:/vscode.git/clone" did not exist on "2f4eb4ac2f48a597825d3631a840afd855fe6b39"
Commit b7523ee5 authored by Ivan Bogatyy's avatar Ivan Bogatyy
Browse files
parents 66723d7d 2c6d74b7
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Converts a set of text files to TFRecord format with Example protos.
Each Example proto in the output contains the following fields:
decode_pre: list of int64 ids corresponding to the "previous" sentence.
encode: list of int64 ids corresponding to the "current" sentence.
decode_post: list of int64 ids corresponding to the "post" sentence.
In addition, the following files are generated:
vocab.txt: List of "<word> <id>" pairs, where <id> is the integer
encoding of <word> in the Example protos.
word_counts.txt: List of "<word> <count>" pairs, where <count> is the number
of occurrences of <word> in the input files.
The vocabulary of word ids is constructed from the top --num_words by word
count. All other words get the <unk> word id.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os
import numpy as np
import tensorflow as tf
from skip_thoughts.data import special_words
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("input_files", None,
"Comma-separated list of globs matching the input "
"files. The format of the input files is assumed to be "
"a list of newline-separated sentences, where each "
"sentence is already tokenized.")
tf.flags.DEFINE_string("vocab_file", "",
"(Optional) existing vocab file. Otherwise, a new vocab "
"file is created and written to the output directory. "
"The file format is a list of newline-separated words, "
"where the word id is the corresponding 0-based index "
"in the file.")
tf.flags.DEFINE_string("output_dir", None, "Output directory.")
tf.flags.DEFINE_integer("train_output_shards", 100,
"Number of output shards for the training set.")
tf.flags.DEFINE_integer("validation_output_shards", 1,
"Number of output shards for the validation set.")
tf.flags.DEFINE_integer("num_validation_sentences", 50000,
"Number of output shards for the validation set.")
tf.flags.DEFINE_integer("num_words", 20000,
"Number of words to include in the output.")
tf.flags.DEFINE_integer("max_sentences", 0,
"If > 0, the maximum number of sentences to output.")
tf.flags.DEFINE_integer("max_sentence_length", 30,
"If > 0, exclude sentences whose encode, decode_pre OR"
"decode_post sentence exceeds this length.")
tf.flags.DEFINE_boolean("add_eos", True,
"Whether to add end-of-sentence ids to the output.")
tf.logging.set_verbosity(tf.logging.INFO)
def _build_vocabulary(input_files):
"""Loads or builds the model vocabulary.
Args:
input_files: List of pre-tokenized input .txt files.
Returns:
vocab: A dictionary of word to id.
"""
if FLAGS.vocab_file:
tf.logging.info("Loading existing vocab file.")
vocab = collections.OrderedDict()
with tf.gfile.GFile(FLAGS.vocab_file, mode="r") as f:
for i, line in enumerate(f):
word = line.decode("utf-8").strip()
assert word not in vocab, "Attempting to add word twice: %s" % word
vocab[word] = i
tf.logging.info("Read vocab of size %d from %s",
len(vocab), FLAGS.vocab_file)
return vocab
tf.logging.info("Creating vocabulary.")
num = 0
wordcount = collections.Counter()
for input_file in input_files:
tf.logging.info("Processing file: %s", input_file)
for sentence in tf.gfile.FastGFile(input_file):
wordcount.update(sentence.split())
num += 1
if num % 1000000 == 0:
tf.logging.info("Processed %d sentences", num)
tf.logging.info("Processed %d sentences total", num)
words = wordcount.keys()
freqs = wordcount.values()
sorted_indices = np.argsort(freqs)[::-1]
vocab = collections.OrderedDict()
vocab[special_words.EOS] = special_words.EOS_ID
vocab[special_words.UNK] = special_words.UNK_ID
for w_id, w_index in enumerate(sorted_indices[0:FLAGS.num_words - 2]):
vocab[words[w_index]] = w_id + 2 # 0: EOS, 1: UNK.
tf.logging.info("Created vocab with %d words", len(vocab))
vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt")
with tf.gfile.FastGFile(vocab_file, "w") as f:
f.write("\n".join(vocab.keys()))
tf.logging.info("Wrote vocab file to %s", vocab_file)
word_counts_file = os.path.join(FLAGS.output_dir, "word_counts.txt")
with tf.gfile.FastGFile(word_counts_file, "w") as f:
for i in sorted_indices:
f.write("%s %d\n" % (words[i], freqs[i]))
tf.logging.info("Wrote word counts file to %s", word_counts_file)
return vocab
def _int64_feature(value):
"""Helper for creating an Int64 Feature."""
return tf.train.Feature(int64_list=tf.train.Int64List(
value=[int(v) for v in value]))
def _sentence_to_ids(sentence, vocab):
"""Helper for converting a sentence (list of words) to a list of ids."""
ids = [vocab.get(w, special_words.UNK_ID) for w in sentence]
if FLAGS.add_eos:
ids.append(special_words.EOS_ID)
return ids
def _create_serialized_example(predecessor, current, successor, vocab):
"""Helper for creating a serialized Example proto."""
example = tf.train.Example(features=tf.train.Features(feature={
"decode_pre": _int64_feature(_sentence_to_ids(predecessor, vocab)),
"encode": _int64_feature(_sentence_to_ids(current, vocab)),
"decode_post": _int64_feature(_sentence_to_ids(successor, vocab)),
}))
return example.SerializeToString()
def _process_input_file(filename, vocab, stats):
"""Processes the sentences in an input file.
Args:
filename: Path to a pre-tokenized input .txt file.
vocab: A dictionary of word to id.
stats: A Counter object for statistics.
Returns:
processed: A list of serialized Example protos
"""
tf.logging.info("Processing input file: %s", filename)
processed = []
predecessor = None # Predecessor sentence (list of words).
current = None # Current sentence (list of words).
successor = None # Successor sentence (list of words).
for successor_str in tf.gfile.FastGFile(filename):
stats.update(["sentences_seen"])
successor = successor_str.split()
# The first 2 sentences per file will be skipped.
if predecessor and current and successor:
stats.update(["sentences_considered"])
# Note that we are going to insert <EOS> later, so we only allow
# sentences with strictly less than max_sentence_length to pass.
if FLAGS.max_sentence_length and (
len(predecessor) >= FLAGS.max_sentence_length or len(current) >=
FLAGS.max_sentence_length or len(successor) >=
FLAGS.max_sentence_length):
stats.update(["sentences_too_long"])
else:
serialized = _create_serialized_example(predecessor, current, successor,
vocab)
processed.append(serialized)
stats.update(["sentences_output"])
predecessor = current
current = successor
sentences_seen = stats["sentences_seen"]
sentences_output = stats["sentences_output"]
if sentences_seen and sentences_seen % 100000 == 0:
tf.logging.info("Processed %d sentences (%d output)", sentences_seen,
sentences_output)
if FLAGS.max_sentences and sentences_output >= FLAGS.max_sentences:
break
tf.logging.info("Completed processing file %s", filename)
return processed
def _write_shard(filename, dataset, indices):
"""Writes a TFRecord shard."""
with tf.python_io.TFRecordWriter(filename) as writer:
for j in indices:
writer.write(dataset[j])
def _write_dataset(name, dataset, indices, num_shards):
"""Writes a sharded TFRecord dataset.
Args:
name: Name of the dataset (e.g. "train").
dataset: List of serialized Example protos.
indices: List of indices of 'dataset' to be written.
num_shards: The number of output shards.
"""
tf.logging.info("Writing dataset %s", name)
borders = np.int32(np.linspace(0, len(indices), num_shards + 1))
for i in range(num_shards):
filename = os.path.join(FLAGS.output_dir, "%s-%.5d-of-%.5d" % (name, i,
num_shards))
shard_indices = indices[borders[i]:borders[i + 1]]
_write_shard(filename, dataset, shard_indices)
tf.logging.info("Wrote dataset indices [%d, %d) to output shard %s",
borders[i], borders[i + 1], filename)
tf.logging.info("Finished writing %d sentences in dataset %s.",
len(indices), name)
def main(unused_argv):
if not FLAGS.input_files:
raise ValueError("--input_files is required.")
if not FLAGS.output_dir:
raise ValueError("--output_dir is required.")
if not tf.gfile.IsDirectory(FLAGS.output_dir):
tf.gfile.MakeDirs(FLAGS.output_dir)
input_files = []
for pattern in FLAGS.input_files.split(","):
match = tf.gfile.Glob(FLAGS.input_files)
if not match:
raise ValueError("Found no files matching %s" % pattern)
input_files.extend(match)
tf.logging.info("Found %d input files.", len(input_files))
vocab = _build_vocabulary(input_files)
tf.logging.info("Generating dataset.")
stats = collections.Counter()
dataset = []
for filename in input_files:
dataset.extend(_process_input_file(filename, vocab, stats))
if FLAGS.max_sentences and stats["sentences_output"] >= FLAGS.max_sentences:
break
tf.logging.info("Generated dataset with %d sentences.", len(dataset))
for k, v in stats.items():
tf.logging.info("%s: %d", k, v)
tf.logging.info("Shuffling dataset.")
np.random.seed(123)
shuffled_indices = np.random.permutation(len(dataset))
val_indices = shuffled_indices[:FLAGS.num_validation_sentences]
train_indices = shuffled_indices[FLAGS.num_validation_sentences:]
_write_dataset("train", dataset, train_indices, FLAGS.train_output_shards)
_write_dataset("validation", dataset, val_indices,
FLAGS.validation_output_shards)
if __name__ == "__main__":
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Special word constants.
NOTE: The ids of the EOS and UNK constants should not be modified. It is assumed
that these always occupy the first two ids.
"""
# End of sentence.
EOS = "<eos>"
EOS_ID = 0
# Unknown.
UNK = "<unk>"
UNK_ID = 1
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Manager class for loading and encoding with multiple skip-thoughts models.
If multiple models are loaded at once then the encode() function returns the
concatenation of the outputs of each model.
Example usage:
manager = EncoderManager()
manager.load_model(model_config_1, vocabulary_file_1, embedding_matrix_file_1,
checkpoint_path_1)
manager.load_model(model_config_2, vocabulary_file_2, embedding_matrix_file_2,
checkpoint_path_2)
encodings = manager.encode(data)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import numpy as np
import tensorflow as tf
from skip_thoughts import skip_thoughts_encoder
class EncoderManager(object):
"""Manager class for loading and encoding with skip-thoughts models."""
def __init__(self):
self.encoders = []
self.sessions = []
def load_model(self, model_config, vocabulary_file, embedding_matrix_file,
checkpoint_path):
"""Loads a skip-thoughts model.
Args:
model_config: Object containing parameters for building the model.
vocabulary_file: Path to vocabulary file containing a list of newline-
separated words where the word id is the corresponding 0-based index in
the file.
embedding_matrix_file: Path to a serialized numpy array of shape
[vocab_size, embedding_dim].
checkpoint_path: SkipThoughtsModel checkpoint file or a directory
containing a checkpoint file.
"""
tf.logging.info("Reading vocabulary from %s", vocabulary_file)
with tf.gfile.GFile(vocabulary_file, mode="r") as f:
lines = list(f.readlines())
reverse_vocab = [line.decode("utf-8").strip() for line in lines]
tf.logging.info("Loaded vocabulary with %d words.", len(reverse_vocab))
tf.logging.info("Loading embedding matrix from %s", embedding_matrix_file)
# Note: tf.gfile.GFile doesn't work here because np.load() calls f.seek()
# with 3 arguments.
with open(embedding_matrix_file, "r") as f:
embedding_matrix = np.load(f)
tf.logging.info("Loaded embedding matrix with shape %s",
embedding_matrix.shape)
word_embeddings = collections.OrderedDict(
zip(reverse_vocab, embedding_matrix))
g = tf.Graph()
with g.as_default():
encoder = skip_thoughts_encoder.SkipThoughtsEncoder(word_embeddings)
restore_model = encoder.build_graph_from_config(model_config,
checkpoint_path)
sess = tf.Session(graph=g)
restore_model(sess)
self.encoders.append(encoder)
self.sessions.append(sess)
def encode(self,
data,
use_norm=True,
verbose=False,
batch_size=128,
use_eos=False):
"""Encodes a sequence of sentences as skip-thought vectors.
Args:
data: A list of input strings.
use_norm: If True, normalize output skip-thought vectors to unit L2 norm.
verbose: Whether to log every batch.
batch_size: Batch size for the RNN encoders.
use_eos: If True, append the end-of-sentence word to each input sentence.
Returns:
thought_vectors: A list of numpy arrays corresponding to 'data'.
Raises:
ValueError: If called before calling load_encoder.
"""
if not self.encoders:
raise ValueError(
"Must call load_model at least once before calling encode.")
encoded = []
for encoder, sess in zip(self.encoders, self.sessions):
encoded.append(
np.array(
encoder.encode(
sess,
data,
use_norm=use_norm,
verbose=verbose,
batch_size=batch_size,
use_eos=use_eos)))
return np.concatenate(encoded, axis=1)
def close(self):
"""Closes the active TensorFlow Sessions."""
for sess in self.sessions:
sess.close()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Script to evaluate a skip-thoughts model.
This script can evaluate a model with a unidirectional encoder ("uni-skip" in
the paper); or a model with a bidirectional encoder ("bi-skip"); or the
combination of a model with a unidirectional encoder and a model with a
bidirectional encoder ("combine-skip").
The uni-skip model (if it exists) is specified by the flags
--uni_vocab_file, --uni_embeddings_file, --uni_checkpoint_path.
The bi-skip model (if it exists) is specified by the flags
--bi_vocab_file, --bi_embeddings_path, --bi_checkpoint_path.
The evaluation tasks have different running times. SICK may take 5-10 minutes.
MSRP, TREC and CR may take 20-60 minutes. SUBJ, MPQA and MR may take 2+ hours.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from skipthoughts import eval_classification
from skipthoughts import eval_msrp
from skipthoughts import eval_sick
from skipthoughts import eval_trec
import tensorflow as tf
from skip_thoughts import configuration
from skip_thoughts import encoder_manager
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("eval_task", "CR",
"Name of the evaluation task to run. Available tasks: "
"MR, CR, SUBJ, MPQA, SICK, MSRP, TREC.")
tf.flags.DEFINE_string("data_dir", None, "Directory containing training data.")
tf.flags.DEFINE_string("uni_vocab_file", None,
"Path to vocabulary file containing a list of newline-"
"separated words where the word id is the "
"corresponding 0-based index in the file.")
tf.flags.DEFINE_string("bi_vocab_file", None,
"Path to vocabulary file containing a list of newline-"
"separated words where the word id is the "
"corresponding 0-based index in the file.")
tf.flags.DEFINE_string("uni_embeddings_file", None,
"Path to serialized numpy array of shape "
"[vocab_size, embedding_dim].")
tf.flags.DEFINE_string("bi_embeddings_file", None,
"Path to serialized numpy array of shape "
"[vocab_size, embedding_dim].")
tf.flags.DEFINE_string("uni_checkpoint_path", None,
"Checkpoint file or directory containing a checkpoint "
"file.")
tf.flags.DEFINE_string("bi_checkpoint_path", None,
"Checkpoint file or directory containing a checkpoint "
"file.")
tf.logging.set_verbosity(tf.logging.INFO)
def main(unused_argv):
if not FLAGS.data_dir:
raise ValueError("--data_dir is required.")
encoder = encoder_manager.EncoderManager()
# Maybe load unidirectional encoder.
if FLAGS.uni_checkpoint_path:
print("Loading unidirectional model...")
uni_config = configuration.model_config()
encoder.load_model(uni_config, FLAGS.uni_vocab_file,
FLAGS.uni_embeddings_file, FLAGS.uni_checkpoint_path)
# Maybe load bidirectional encoder.
if FLAGS.bi_checkpoint_path:
print("Loading bidirectional model...")
bi_config = configuration.model_config(bidirectional_encoder=True)
encoder.load_model(bi_config, FLAGS.bi_vocab_file, FLAGS.bi_embeddings_file,
FLAGS.bi_checkpoint_path)
if FLAGS.eval_task in ["MR", "CR", "SUBJ", "MPQA"]:
eval_classification.eval_nested_kfold(
encoder, FLAGS.eval_task, FLAGS.data_dir, use_nb=False)
elif FLAGS.eval_task == "SICK":
eval_sick.evaluate(encoder, evaltest=True, loc=FLAGS.data_dir)
elif FLAGS.eval_task == "MSRP":
eval_msrp.evaluate(
encoder, evalcv=True, evaltest=True, use_feats=True, loc=FLAGS.data_dir)
elif FLAGS.eval_task == "TREC":
eval_trec.evaluate(encoder, evalcv=True, evaltest=True, loc=FLAGS.data_dir)
else:
raise ValueError("Unrecognized eval_task: %s" % FLAGS.eval_task)
encoder.close()
if __name__ == "__main__":
tf.app.run()
package(default_visibility = ["//skip_thoughts:internal"])
licenses(["notice"]) # Apache 2.0
exports_files(["LICENSE"])
py_library(
name = "input_ops",
srcs = ["input_ops.py"],
srcs_version = "PY2AND3",
)
py_library(
name = "gru_cell",
srcs = ["gru_cell.py"],
srcs_version = "PY2AND3",
)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""GRU cell implementation for the skip-thought vectors model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
_layer_norm = tf.contrib.layers.layer_norm
class LayerNormGRUCell(tf.contrib.rnn.RNNCell):
"""GRU cell with layer normalization.
The layer normalization implementation is based on:
https://arxiv.org/abs/1607.06450.
"Layer Normalization"
Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
"""
def __init__(self,
num_units,
w_initializer,
u_initializer,
b_initializer,
activation=tf.nn.tanh):
"""Initializes the cell.
Args:
num_units: Number of cell units.
w_initializer: Initializer for the "W" (input) parameter matrices.
u_initializer: Initializer for the "U" (recurrent) parameter matrices.
b_initializer: Initializer for the "b" (bias) parameter vectors.
activation: Cell activation function.
"""
self._num_units = num_units
self._w_initializer = w_initializer
self._u_initializer = u_initializer
self._b_initializer = b_initializer
self._activation = activation
@property
def state_size(self):
return self._num_units
@property
def output_size(self):
return self._num_units
def _w_h_initializer(self):
"""Returns an initializer for the "W_h" parameter matrix.
See equation (23) in the paper. The "W_h" parameter matrix is the
concatenation of two parameter submatrices. The matrix returned is
[U_z, U_r].
Returns:
A Tensor with shape [num_units, 2 * num_units] as described above.
"""
def _initializer(shape, dtype=tf.float32, partition_info=None):
num_units = self._num_units
assert shape == [num_units, 2 * num_units]
u_z = self._u_initializer([num_units, num_units], dtype, partition_info)
u_r = self._u_initializer([num_units, num_units], dtype, partition_info)
return tf.concat([u_z, u_r], 1)
return _initializer
def _w_x_initializer(self, input_dim):
"""Returns an initializer for the "W_x" parameter matrix.
See equation (23) in the paper. The "W_x" parameter matrix is the
concatenation of two parameter submatrices. The matrix returned is
[W_z, W_r].
Args:
input_dim: The dimension of the cell inputs.
Returns:
A Tensor with shape [input_dim, 2 * num_units] as described above.
"""
def _initializer(shape, dtype=tf.float32, partition_info=None):
num_units = self._num_units
assert shape == [input_dim, 2 * num_units]
w_z = self._w_initializer([input_dim, num_units], dtype, partition_info)
w_r = self._w_initializer([input_dim, num_units], dtype, partition_info)
return tf.concat([w_z, w_r], 1)
return _initializer
def __call__(self, inputs, state, scope=None):
"""GRU cell with layer normalization."""
input_dim = inputs.get_shape().as_list()[1]
num_units = self._num_units
with tf.variable_scope(scope or "gru_cell"):
with tf.variable_scope("gates"):
w_h = tf.get_variable(
"w_h", [num_units, 2 * num_units],
initializer=self._w_h_initializer())
w_x = tf.get_variable(
"w_x", [input_dim, 2 * num_units],
initializer=self._w_x_initializer(input_dim))
z_and_r = (_layer_norm(tf.matmul(state, w_h), scope="layer_norm/w_h") +
_layer_norm(tf.matmul(inputs, w_x), scope="layer_norm/w_x"))
z, r = tf.split(tf.sigmoid(z_and_r), 2, 1)
with tf.variable_scope("candidate"):
w = tf.get_variable(
"w", [input_dim, num_units], initializer=self._w_initializer)
u = tf.get_variable(
"u", [num_units, num_units], initializer=self._u_initializer)
h_hat = (r * _layer_norm(tf.matmul(state, u), scope="layer_norm/u") +
_layer_norm(tf.matmul(inputs, w), scope="layer_norm/w"))
new_h = (1 - z) * state + z * self._activation(h_hat)
return new_h, new_h
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Input ops."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import tensorflow as tf
# A SentenceBatch is a pair of Tensors:
# ids: Batch of input sentences represented as sequences of word ids: an int64
# Tensor with shape [batch_size, padded_length].
# mask: Boolean mask distinguishing real words (1) from padded words (0): an
# int32 Tensor with shape [batch_size, padded_length].
SentenceBatch = collections.namedtuple("SentenceBatch", ("ids", "mask"))
def parse_example_batch(serialized):
"""Parses a batch of tf.Example protos.
Args:
serialized: A 1-D string Tensor; a batch of serialized tf.Example protos.
Returns:
encode: A SentenceBatch of encode sentences.
decode_pre: A SentenceBatch of "previous" sentences to decode.
decode_post: A SentenceBatch of "post" sentences to decode.
"""
features = tf.parse_example(
serialized,
features={
"encode": tf.VarLenFeature(dtype=tf.int64),
"decode_pre": tf.VarLenFeature(dtype=tf.int64),
"decode_post": tf.VarLenFeature(dtype=tf.int64),
})
def _sparse_to_batch(sparse):
ids = tf.sparse_tensor_to_dense(sparse) # Padding with zeroes.
mask = tf.sparse_to_dense(sparse.indices, sparse.dense_shape,
tf.ones_like(sparse.values, dtype=tf.int32))
return SentenceBatch(ids=ids, mask=mask)
output_names = ("encode", "decode_pre", "decode_post")
return tuple(_sparse_to_batch(features[x]) for x in output_names)
def prefetch_input_data(reader,
file_pattern,
shuffle,
capacity,
num_reader_threads=1):
"""Prefetches string values from disk into an input queue.
Args:
reader: Instance of tf.ReaderBase.
file_pattern: Comma-separated list of file patterns (e.g.
"/tmp/train_data-?????-of-00100", where '?' acts as a wildcard that
matches any character).
shuffle: Boolean; whether to randomly shuffle the input data.
capacity: Queue capacity (number of records).
num_reader_threads: Number of reader threads feeding into the queue.
Returns:
A Queue containing prefetched string values.
"""
data_files = []
for pattern in file_pattern.split(","):
data_files.extend(tf.gfile.Glob(pattern))
if not data_files:
tf.logging.fatal("Found no input files matching %s", file_pattern)
else:
tf.logging.info("Prefetching values from %d files matching %s",
len(data_files), file_pattern)
filename_queue = tf.train.string_input_producer(
data_files, shuffle=shuffle, capacity=16, name="filename_queue")
if shuffle:
min_after_dequeue = int(0.6 * capacity)
values_queue = tf.RandomShuffleQueue(
capacity=capacity,
min_after_dequeue=min_after_dequeue,
dtypes=[tf.string],
shapes=[[]],
name="random_input_queue")
else:
values_queue = tf.FIFOQueue(
capacity=capacity,
dtypes=[tf.string],
shapes=[[]],
name="fifo_input_queue")
enqueue_ops = []
for _ in range(num_reader_threads):
_, value = reader.read(filename_queue)
enqueue_ops.append(values_queue.enqueue([value]))
tf.train.queue_runner.add_queue_runner(
tf.train.queue_runner.QueueRunner(values_queue, enqueue_ops))
tf.summary.scalar("queue/%s/fraction_of_%d_full" % (values_queue.name,
capacity),
tf.cast(values_queue.size(), tf.float32) * (1.0 / capacity))
return values_queue
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class for encoding text using a trained SkipThoughtsModel.
Example usage:
g = tf.Graph()
with g.as_default():
encoder = SkipThoughtsEncoder(embeddings)
restore_fn = encoder.build_graph_from_config(model_config, checkpoint_path)
with tf.Session(graph=g) as sess:
restore_fn(sess)
skip_thought_vectors = encoder.encode(sess, data)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os.path
import nltk
import nltk.tokenize
import numpy as np
import tensorflow as tf
from skip_thoughts import skip_thoughts_model
from skip_thoughts.data import special_words
def _pad(seq, target_len):
"""Pads a sequence of word embeddings up to the target length.
Args:
seq: Sequence of word embeddings.
target_len: Desired padded sequence length.
Returns:
embeddings: Input sequence padded with zero embeddings up to the target
length.
mask: A 0/1 vector with zeros corresponding to padded embeddings.
Raises:
ValueError: If len(seq) is not in the interval (0, target_len].
"""
seq_len = len(seq)
if seq_len <= 0 or seq_len > target_len:
raise ValueError("Expected 0 < len(seq) <= %d, got %d" % (target_len,
seq_len))
emb_dim = seq[0].shape[0]
padded_seq = np.zeros(shape=(target_len, emb_dim), dtype=seq[0].dtype)
mask = np.zeros(shape=(target_len,), dtype=np.int8)
for i in range(seq_len):
padded_seq[i] = seq[i]
mask[i] = 1
return padded_seq, mask
def _batch_and_pad(sequences):
"""Batches and pads sequences of word embeddings into a 2D array.
Args:
sequences: A list of batch_size sequences of word embeddings.
Returns:
embeddings: A numpy array with shape [batch_size, padded_length, emb_dim].
mask: A numpy 0/1 array with shape [batch_size, padded_length] with zeros
corresponding to padded elements.
"""
batch_embeddings = []
batch_mask = []
batch_len = max([len(seq) for seq in sequences])
for seq in sequences:
embeddings, mask = _pad(seq, batch_len)
batch_embeddings.append(embeddings)
batch_mask.append(mask)
return np.array(batch_embeddings), np.array(batch_mask)
class SkipThoughtsEncoder(object):
"""Skip-thoughts sentence encoder."""
def __init__(self, embeddings):
"""Initializes the encoder.
Args:
embeddings: Dictionary of word to embedding vector (1D numpy array).
"""
self._sentence_detector = nltk.data.load("tokenizers/punkt/english.pickle")
self._embeddings = embeddings
def _create_restore_fn(self, checkpoint_path, saver):
"""Creates a function that restores a model from checkpoint.
Args:
checkpoint_path: Checkpoint file or a directory containing a checkpoint
file.
saver: Saver for restoring variables from the checkpoint file.
Returns:
restore_fn: A function such that restore_fn(sess) loads model variables
from the checkpoint file.
Raises:
ValueError: If checkpoint_path does not refer to a checkpoint file or a
directory containing a checkpoint file.
"""
if tf.gfile.IsDirectory(checkpoint_path):
latest_checkpoint = tf.train.latest_checkpoint(checkpoint_path)
if not latest_checkpoint:
raise ValueError("No checkpoint file found in: %s" % checkpoint_path)
checkpoint_path = latest_checkpoint
def _restore_fn(sess):
tf.logging.info("Loading model from checkpoint: %s", checkpoint_path)
saver.restore(sess, checkpoint_path)
tf.logging.info("Successfully loaded checkpoint: %s",
os.path.basename(checkpoint_path))
return _restore_fn
def build_graph_from_config(self, model_config, checkpoint_path):
"""Builds the inference graph from a configuration object.
Args:
model_config: Object containing configuration for building the model.
checkpoint_path: Checkpoint file or a directory containing a checkpoint
file.
Returns:
restore_fn: A function such that restore_fn(sess) loads model variables
from the checkpoint file.
"""
tf.logging.info("Building model.")
model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="encode")
model.build()
saver = tf.train.Saver()
return self._create_restore_fn(checkpoint_path, saver)
def build_graph_from_proto(self, graph_def_file, saver_def_file,
checkpoint_path):
"""Builds the inference graph from serialized GraphDef and SaverDef protos.
Args:
graph_def_file: File containing a serialized GraphDef proto.
saver_def_file: File containing a serialized SaverDef proto.
checkpoint_path: Checkpoint file or a directory containing a checkpoint
file.
Returns:
restore_fn: A function such that restore_fn(sess) loads model variables
from the checkpoint file.
"""
# Load the Graph.
tf.logging.info("Loading GraphDef from file: %s", graph_def_file)
graph_def = tf.GraphDef()
with tf.gfile.FastGFile(graph_def_file, "rb") as f:
graph_def.ParseFromString(f.read())
tf.import_graph_def(graph_def, name="")
# Load the Saver.
tf.logging.info("Loading SaverDef from file: %s", saver_def_file)
saver_def = tf.train.SaverDef()
with tf.gfile.FastGFile(saver_def_file, "rb") as f:
saver_def.ParseFromString(f.read())
saver = tf.train.Saver(saver_def=saver_def)
return self._create_restore_fn(checkpoint_path, saver)
def _tokenize(self, item):
"""Tokenizes an input string into a list of words."""
tokenized = []
for s in self._sentence_detector.tokenize(item):
tokenized.extend(nltk.tokenize.word_tokenize(s))
return tokenized
def _word_to_embedding(self, w):
"""Returns the embedding of a word."""
return self._embeddings.get(w, self._embeddings[special_words.UNK])
def _preprocess(self, data, use_eos):
"""Preprocesses text for the encoder.
Args:
data: A list of input strings.
use_eos: Whether to append the end-of-sentence word to each sentence.
Returns:
embeddings: A list of word embedding sequences corresponding to the input
strings.
"""
preprocessed_data = []
for item in data:
tokenized = self._tokenize(item)
if use_eos:
tokenized.append(special_words.EOS)
preprocessed_data.append([self._word_to_embedding(w) for w in tokenized])
return preprocessed_data
def encode(self,
sess,
data,
use_norm=True,
verbose=True,
batch_size=128,
use_eos=False):
"""Encodes a sequence of sentences as skip-thought vectors.
Args:
sess: TensorFlow Session.
data: A list of input strings.
use_norm: Whether to normalize skip-thought vectors to unit L2 norm.
verbose: Whether to log every batch.
batch_size: Batch size for the encoder.
use_eos: Whether to append the end-of-sentence word to each input
sentence.
Returns:
thought_vectors: A list of numpy arrays corresponding to the skip-thought
encodings of sentences in 'data'.
"""
data = self._preprocess(data, use_eos)
thought_vectors = []
batch_indices = np.arange(0, len(data), batch_size)
for batch, start_index in enumerate(batch_indices):
if verbose:
tf.logging.info("Batch %d / %d.", batch, len(batch_indices))
embeddings, mask = _batch_and_pad(
data[start_index:start_index + batch_size])
feed_dict = {
"encode_emb:0": embeddings,
"encode_mask:0": mask,
}
thought_vectors.extend(
sess.run("encoder/thought_vectors:0", feed_dict=feed_dict))
if use_norm:
thought_vectors = [v / np.linalg.norm(v) for v in thought_vectors]
return thought_vectors
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Skip-Thoughts model for learning sentence vectors.
The model is based on the paper:
"Skip-Thought Vectors"
Ryan Kiros, Yukun Zhu, Ruslan Salakhutdinov, Richard S. Zemel,
Antonio Torralba, Raquel Urtasun, Sanja Fidler.
https://papers.nips.cc/paper/5950-skip-thought-vectors.pdf
Layer normalization is applied based on the paper:
"Layer Normalization"
Jimmy Lei Ba, Jamie Ryan Kiros, Geoffrey E. Hinton
https://arxiv.org/abs/1607.06450
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from skip_thoughts.ops import gru_cell
from skip_thoughts.ops import input_ops
def random_orthonormal_initializer(shape, dtype=tf.float32,
partition_info=None): # pylint: disable=unused-argument
"""Variable initializer that produces a random orthonormal matrix."""
if len(shape) != 2 or shape[0] != shape[1]:
raise ValueError("Expecting square shape, got %s" % shape)
_, u, _ = tf.svd(tf.random_normal(shape, dtype=dtype), full_matrices=True)
return u
class SkipThoughtsModel(object):
"""Skip-thoughts model."""
def __init__(self, config, mode="train", input_reader=None):
"""Basic setup. The actual TensorFlow graph is constructed in build().
Args:
config: Object containing configuration parameters.
mode: "train", "eval" or "encode".
input_reader: Subclass of tf.ReaderBase for reading the input serialized
tf.Example protocol buffers. Defaults to TFRecordReader.
Raises:
ValueError: If mode is invalid.
"""
if mode not in ["train", "eval", "encode"]:
raise ValueError("Unrecognized mode: %s" % mode)
self.config = config
self.mode = mode
self.reader = input_reader if input_reader else tf.TFRecordReader()
# Initializer used for non-recurrent weights.
self.uniform_initializer = tf.random_uniform_initializer(
minval=-self.config.uniform_init_scale,
maxval=self.config.uniform_init_scale)
# Input sentences represented as sequences of word ids. "encode" is the
# source sentence, "decode_pre" is the previous sentence and "decode_post"
# is the next sentence.
# Each is an int64 Tensor with shape [batch_size, padded_length].
self.encode_ids = None
self.decode_pre_ids = None
self.decode_post_ids = None
# Boolean masks distinguishing real words (1) from padded words (0).
# Each is an int32 Tensor with shape [batch_size, padded_length].
self.encode_mask = None
self.decode_pre_mask = None
self.decode_post_mask = None
# Input sentences represented as sequences of word embeddings.
# Each is a float32 Tensor with shape [batch_size, padded_length, emb_dim].
self.encode_emb = None
self.decode_pre_emb = None
self.decode_post_emb = None
# The output from the sentence encoder.
# A float32 Tensor with shape [batch_size, num_gru_units].
self.thought_vectors = None
# The cross entropy losses and corresponding weights of the decoders. Used
# for evaluation.
self.target_cross_entropy_losses = []
self.target_cross_entropy_loss_weights = []
# The total loss to optimize.
self.total_loss = None
def build_inputs(self):
"""Builds the ops for reading input data.
Outputs:
self.encode_ids
self.decode_pre_ids
self.decode_post_ids
self.encode_mask
self.decode_pre_mask
self.decode_post_mask
"""
if self.mode == "encode":
# Word embeddings are fed from an external vocabulary which has possibly
# been expanded (see vocabulary_expansion.py).
encode_ids = None
decode_pre_ids = None
decode_post_ids = None
encode_mask = tf.placeholder(tf.int8, (None, None), name="encode_mask")
decode_pre_mask = None
decode_post_mask = None
else:
# Prefetch serialized tf.Example protos.
input_queue = input_ops.prefetch_input_data(
self.reader,
self.config.input_file_pattern,
shuffle=self.config.shuffle_input_data,
capacity=self.config.input_queue_capacity,
num_reader_threads=self.config.num_input_reader_threads)
# Deserialize a batch.
serialized = input_queue.dequeue_many(self.config.batch_size)
encode, decode_pre, decode_post = input_ops.parse_example_batch(
serialized)
encode_ids = encode.ids
decode_pre_ids = decode_pre.ids
decode_post_ids = decode_post.ids
encode_mask = encode.mask
decode_pre_mask = decode_pre.mask
decode_post_mask = decode_post.mask
self.encode_ids = encode_ids
self.decode_pre_ids = decode_pre_ids
self.decode_post_ids = decode_post_ids
self.encode_mask = encode_mask
self.decode_pre_mask = decode_pre_mask
self.decode_post_mask = decode_post_mask
def build_word_embeddings(self):
"""Builds the word embeddings.
Inputs:
self.encode_ids
self.decode_pre_ids
self.decode_post_ids
Outputs:
self.encode_emb
self.decode_pre_emb
self.decode_post_emb
"""
if self.mode == "encode":
# Word embeddings are fed from an external vocabulary which has possibly
# been expanded (see vocabulary_expansion.py).
encode_emb = tf.placeholder(tf.float32, (
None, None, self.config.word_embedding_dim), "encode_emb")
# No sequences to decode.
decode_pre_emb = None
decode_post_emb = None
else:
word_emb = tf.get_variable(
name="word_embedding",
shape=[self.config.vocab_size, self.config.word_embedding_dim],
initializer=self.uniform_initializer)
encode_emb = tf.nn.embedding_lookup(word_emb, self.encode_ids)
decode_pre_emb = tf.nn.embedding_lookup(word_emb, self.decode_pre_ids)
decode_post_emb = tf.nn.embedding_lookup(word_emb, self.decode_post_ids)
self.encode_emb = encode_emb
self.decode_pre_emb = decode_pre_emb
self.decode_post_emb = decode_post_emb
def _initialize_gru_cell(self, num_units):
"""Initializes a GRU cell.
The Variables of the GRU cell are initialized in a way that exactly matches
the skip-thoughts paper: recurrent weights are initialized from random
orthonormal matrices and non-recurrent weights are initialized from random
uniform matrices.
Args:
num_units: Number of output units.
Returns:
cell: An instance of RNNCell with variable initializers that match the
skip-thoughts paper.
"""
return gru_cell.LayerNormGRUCell(
num_units,
w_initializer=self.uniform_initializer,
u_initializer=random_orthonormal_initializer,
b_initializer=tf.constant_initializer(0.0))
def build_encoder(self):
"""Builds the sentence encoder.
Inputs:
self.encode_emb
self.encode_mask
Outputs:
self.thought_vectors
Raises:
ValueError: if config.bidirectional_encoder is True and config.encoder_dim
is odd.
"""
with tf.variable_scope("encoder") as scope:
length = tf.to_int32(tf.reduce_sum(self.encode_mask, 1), name="length")
if self.config.bidirectional_encoder:
if self.config.encoder_dim % 2:
raise ValueError(
"encoder_dim must be even when using a bidirectional encoder.")
num_units = self.config.encoder_dim // 2
cell_fw = self._initialize_gru_cell(num_units) # Forward encoder
cell_bw = self._initialize_gru_cell(num_units) # Backward encoder
_, states = tf.nn.bidirectional_dynamic_rnn(
cell_fw=cell_fw,
cell_bw=cell_bw,
inputs=self.encode_emb,
sequence_length=length,
dtype=tf.float32,
scope=scope)
thought_vectors = tf.concat(states, 1, name="thought_vectors")
else:
cell = self._initialize_gru_cell(self.config.encoder_dim)
_, state = tf.nn.dynamic_rnn(
cell=cell,
inputs=self.encode_emb,
sequence_length=length,
dtype=tf.float32,
scope=scope)
# Use an identity operation to name the Tensor in the Graph.
thought_vectors = tf.identity(state, name="thought_vectors")
self.thought_vectors = thought_vectors
def _build_decoder(self, name, embeddings, targets, mask, initial_state,
reuse_logits):
"""Builds a sentence decoder.
Args:
name: Decoder name.
embeddings: Batch of sentences to decode; a float32 Tensor with shape
[batch_size, padded_length, emb_dim].
targets: Batch of target word ids; an int64 Tensor with shape
[batch_size, padded_length].
mask: A 0/1 Tensor with shape [batch_size, padded_length].
initial_state: Initial state of the GRU. A float32 Tensor with shape
[batch_size, num_gru_cells].
reuse_logits: Whether to reuse the logits weights.
"""
# Decoder RNN.
cell = self._initialize_gru_cell(self.config.encoder_dim)
with tf.variable_scope(name) as scope:
# Add a padding word at the start of each sentence (to correspond to the
# prediction of the first word) and remove the last word.
decoder_input = tf.pad(
embeddings[:, :-1, :], [[0, 0], [1, 0], [0, 0]], name="input")
length = tf.reduce_sum(mask, 1, name="length")
decoder_output, _ = tf.nn.dynamic_rnn(
cell=cell,
inputs=decoder_input,
sequence_length=length,
initial_state=initial_state,
scope=scope)
# Stack batch vertically.
decoder_output = tf.reshape(decoder_output, [-1, self.config.encoder_dim])
targets = tf.reshape(targets, [-1])
weights = tf.to_float(tf.reshape(mask, [-1]))
# Logits.
with tf.variable_scope("logits", reuse=reuse_logits) as scope:
logits = tf.contrib.layers.fully_connected(
inputs=decoder_output,
num_outputs=self.config.vocab_size,
activation_fn=None,
weights_initializer=self.uniform_initializer,
scope=scope)
losses = tf.nn.sparse_softmax_cross_entropy_with_logits(
labels=targets, logits=logits)
batch_loss = tf.reduce_sum(losses * weights)
tf.losses.add_loss(batch_loss)
tf.summary.scalar("losses/" + name, batch_loss)
self.target_cross_entropy_losses.append(losses)
self.target_cross_entropy_loss_weights.append(weights)
def build_decoders(self):
"""Builds the sentence decoders.
Inputs:
self.decode_pre_emb
self.decode_post_emb
self.decode_pre_ids
self.decode_post_ids
self.decode_pre_mask
self.decode_post_mask
self.thought_vectors
Outputs:
self.target_cross_entropy_losses
self.target_cross_entropy_loss_weights
"""
if self.mode != "encode":
# Pre-sentence decoder.
self._build_decoder("decoder_pre", self.decode_pre_emb,
self.decode_pre_ids, self.decode_pre_mask,
self.thought_vectors, False)
# Post-sentence decoder. Logits weights are reused.
self._build_decoder("decoder_post", self.decode_post_emb,
self.decode_post_ids, self.decode_post_mask,
self.thought_vectors, True)
def build_loss(self):
"""Builds the loss Tensor.
Outputs:
self.total_loss
"""
if self.mode != "encode":
total_loss = tf.losses.get_total_loss()
tf.summary.scalar("losses/total", total_loss)
self.total_loss = total_loss
def build_global_step(self):
"""Builds the global step Tensor.
Outputs:
self.global_step
"""
self.global_step = tf.contrib.framework.create_global_step()
def build(self):
"""Creates all ops for training, evaluation or encoding."""
self.build_inputs()
self.build_word_embeddings()
self.build_encoder()
self.build_decoders()
self.build_loss()
self.build_global_step()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.skip_thoughts.skip_thoughts_model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from skip_thoughts import configuration
from skip_thoughts import skip_thoughts_model
class SkipThoughtsModel(skip_thoughts_model.SkipThoughtsModel):
"""Subclass of SkipThoughtsModel without the disk I/O."""
def build_inputs(self):
if self.mode == "encode":
# Encode mode doesn't read from disk, so defer to parent.
return super(SkipThoughtsModel, self).build_inputs()
else:
# Replace disk I/O with random Tensors.
self.encode_ids = tf.random_uniform(
[self.config.batch_size, 15],
minval=0,
maxval=self.config.vocab_size,
dtype=tf.int64)
self.decode_pre_ids = tf.random_uniform(
[self.config.batch_size, 15],
minval=0,
maxval=self.config.vocab_size,
dtype=tf.int64)
self.decode_post_ids = tf.random_uniform(
[self.config.batch_size, 15],
minval=0,
maxval=self.config.vocab_size,
dtype=tf.int64)
self.encode_mask = tf.ones_like(self.encode_ids)
self.decode_pre_mask = tf.ones_like(self.decode_pre_ids)
self.decode_post_mask = tf.ones_like(self.decode_post_ids)
class SkipThoughtsModelTest(tf.test.TestCase):
def setUp(self):
super(SkipThoughtsModelTest, self).setUp()
self._model_config = configuration.model_config()
def _countModelParameters(self):
"""Counts the number of parameters in the model at top level scope."""
counter = {}
for v in tf.global_variables():
name = v.op.name.split("/")[0]
num_params = v.get_shape().num_elements()
if not num_params:
self.fail("Could not infer num_elements from Variable %s" % v.op.name)
counter[name] = counter.get(name, 0) + num_params
return counter
def _checkModelParameters(self):
"""Verifies the number of parameters in the model."""
param_counts = self._countModelParameters()
expected_param_counts = {
# vocab_size * embedding_size
"word_embedding": 12400000,
# GRU Cells
"encoder": 21772800,
"decoder_pre": 21772800,
"decoder_post": 21772800,
# (encoder_dim + 1) * vocab_size
"logits": 48020000,
"global_step": 1,
}
self.assertDictEqual(expected_param_counts, param_counts)
def _checkOutputs(self, expected_shapes, feed_dict=None):
"""Verifies that the model produces expected outputs.
Args:
expected_shapes: A dict mapping Tensor or Tensor name to expected output
shape.
feed_dict: Values of Tensors to feed into Session.run().
"""
fetches = expected_shapes.keys()
with self.test_session() as sess:
sess.run(tf.global_variables_initializer())
outputs = sess.run(fetches, feed_dict)
for index, output in enumerate(outputs):
tensor = fetches[index]
expected = expected_shapes[tensor]
actual = output.shape
if expected != actual:
self.fail("Tensor %s has shape %s (expected %s)." % (tensor, actual,
expected))
def testBuildForTraining(self):
model = SkipThoughtsModel(self._model_config, mode="train")
model.build()
self._checkModelParameters()
expected_shapes = {
# [batch_size, length]
model.encode_ids: (128, 15),
model.decode_pre_ids: (128, 15),
model.decode_post_ids: (128, 15),
model.encode_mask: (128, 15),
model.decode_pre_mask: (128, 15),
model.decode_post_mask: (128, 15),
# [batch_size, length, word_embedding_dim]
model.encode_emb: (128, 15, 620),
model.decode_pre_emb: (128, 15, 620),
model.decode_post_emb: (128, 15, 620),
# [batch_size, encoder_dim]
model.thought_vectors: (128, 2400),
# [batch_size * length]
model.target_cross_entropy_losses[0]: (1920,),
model.target_cross_entropy_losses[1]: (1920,),
# [batch_size * length]
model.target_cross_entropy_loss_weights[0]: (1920,),
model.target_cross_entropy_loss_weights[1]: (1920,),
# Scalar
model.total_loss: (),
}
self._checkOutputs(expected_shapes)
def testBuildForEval(self):
model = SkipThoughtsModel(self._model_config, mode="eval")
model.build()
self._checkModelParameters()
expected_shapes = {
# [batch_size, length]
model.encode_ids: (128, 15),
model.decode_pre_ids: (128, 15),
model.decode_post_ids: (128, 15),
model.encode_mask: (128, 15),
model.decode_pre_mask: (128, 15),
model.decode_post_mask: (128, 15),
# [batch_size, length, word_embedding_dim]
model.encode_emb: (128, 15, 620),
model.decode_pre_emb: (128, 15, 620),
model.decode_post_emb: (128, 15, 620),
# [batch_size, encoder_dim]
model.thought_vectors: (128, 2400),
# [batch_size * length]
model.target_cross_entropy_losses[0]: (1920,),
model.target_cross_entropy_losses[1]: (1920,),
# [batch_size * length]
model.target_cross_entropy_loss_weights[0]: (1920,),
model.target_cross_entropy_loss_weights[1]: (1920,),
# Scalar
model.total_loss: (),
}
self._checkOutputs(expected_shapes)
def testBuildForEncode(self):
model = SkipThoughtsModel(self._model_config, mode="encode")
model.build()
# Test feeding a batch of word embeddings to get skip thought vectors.
encode_emb = np.random.rand(64, 15, 620)
encode_mask = np.ones((64, 15), dtype=np.int64)
feed_dict = {model.encode_emb: encode_emb, model.encode_mask: encode_mask}
expected_shapes = {
# [batch_size, encoder_dim]
model.thought_vectors: (64, 2400),
}
self._checkOutputs(expected_shapes, feed_dict)
if __name__ == "__main__":
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tracks training progress via per-word perplexity.
This script should be run concurrently with training so that summaries show up
in TensorBoard.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import os.path
import time
import numpy as np
import tensorflow as tf
from skip_thoughts import configuration
from skip_thoughts import skip_thoughts_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("input_file_pattern", None,
"File pattern of sharded TFRecord input files.")
tf.flags.DEFINE_string("checkpoint_dir", None,
"Directory containing model checkpoints.")
tf.flags.DEFINE_string("eval_dir", None, "Directory to write event logs to.")
tf.flags.DEFINE_integer("eval_interval_secs", 600,
"Interval between evaluation runs.")
tf.flags.DEFINE_integer("num_eval_examples", 50000,
"Number of examples for evaluation.")
tf.flags.DEFINE_integer("min_global_step", 100,
"Minimum global step to run evaluation.")
tf.logging.set_verbosity(tf.logging.INFO)
def evaluate_model(sess, losses, weights, num_batches, global_step,
summary_writer, summary_op):
"""Computes perplexity-per-word over the evaluation dataset.
Summaries and perplexity-per-word are written out to the eval directory.
Args:
sess: Session object.
losses: A Tensor of any shape; the target cross entropy losses for the
current batch.
weights: A Tensor of weights corresponding to losses.
num_batches: Integer; the number of evaluation batches.
global_step: Integer; global step of the model checkpoint.
summary_writer: Instance of SummaryWriter.
summary_op: Op for generating model summaries.
"""
# Log model summaries on a single batch.
summary_str = sess.run(summary_op)
summary_writer.add_summary(summary_str, global_step)
start_time = time.time()
sum_losses = 0.0
sum_weights = 0.0
for i in xrange(num_batches):
batch_losses, batch_weights = sess.run([losses, weights])
sum_losses += np.sum(batch_losses * batch_weights)
sum_weights += np.sum(batch_weights)
if not i % 100:
tf.logging.info("Computed losses for %d of %d batches.", i + 1,
num_batches)
eval_time = time.time() - start_time
perplexity = math.exp(sum_losses / sum_weights)
tf.logging.info("Perplexity = %f (%.2f sec)", perplexity, eval_time)
# Log perplexity to the SummaryWriter.
summary = tf.Summary()
value = summary.value.add()
value.simple_value = perplexity
value.tag = "perplexity"
summary_writer.add_summary(summary, global_step)
# Write the Events file to the eval directory.
summary_writer.flush()
tf.logging.info("Finished processing evaluation at global step %d.",
global_step)
def run_once(model, losses, weights, saver, summary_writer, summary_op):
"""Evaluates the latest model checkpoint.
Args:
model: Instance of SkipThoughtsModel; the model to evaluate.
losses: Tensor; the target cross entropy losses for the current batch.
weights: A Tensor of weights corresponding to losses.
saver: Instance of tf.train.Saver for restoring model Variables.
summary_writer: Instance of FileWriter.
summary_op: Op for generating model summaries.
"""
model_path = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
if not model_path:
tf.logging.info("Skipping evaluation. No checkpoint found in: %s",
FLAGS.checkpoint_dir)
return
with tf.Session() as sess:
# Load model from checkpoint.
tf.logging.info("Loading model from checkpoint: %s", model_path)
saver.restore(sess, model_path)
global_step = tf.train.global_step(sess, model.global_step.name)
tf.logging.info("Successfully loaded %s at global step = %d.",
os.path.basename(model_path), global_step)
if global_step < FLAGS.min_global_step:
tf.logging.info("Skipping evaluation. Global step = %d < %d", global_step,
FLAGS.min_global_step)
return
# Start the queue runners.
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(coord=coord)
num_eval_batches = int(
math.ceil(FLAGS.num_eval_examples / model.config.batch_size))
# Run evaluation on the latest checkpoint.
try:
evaluate_model(sess, losses, weights, num_eval_batches, global_step,
summary_writer, summary_op)
except tf.InvalidArgumentError:
tf.logging.error(
"Evaluation raised InvalidArgumentError (e.g. due to Nans).")
finally:
coord.request_stop()
coord.join(threads, stop_grace_period_secs=10)
def main(unused_argv):
if not FLAGS.input_file_pattern:
raise ValueError("--input_file_pattern is required.")
if not FLAGS.checkpoint_dir:
raise ValueError("--checkpoint_dir is required.")
if not FLAGS.eval_dir:
raise ValueError("--eval_dir is required.")
# Create the evaluation directory if it doesn't exist.
eval_dir = FLAGS.eval_dir
if not tf.gfile.IsDirectory(eval_dir):
tf.logging.info("Creating eval directory: %s", eval_dir)
tf.gfile.MakeDirs(eval_dir)
g = tf.Graph()
with g.as_default():
# Build the model for evaluation.
model_config = configuration.model_config(
input_file_pattern=FLAGS.input_file_pattern,
input_queue_capacity=FLAGS.num_eval_examples,
shuffle_input_data=False)
model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="eval")
model.build()
losses = tf.concat(model.target_cross_entropy_losses, 0)
weights = tf.concat(model.target_cross_entropy_loss_weights, 0)
# Create the Saver to restore model Variables.
saver = tf.train.Saver()
# Create the summary operation and the summary writer.
summary_op = tf.summary.merge_all()
summary_writer = tf.summary.FileWriter(eval_dir)
g.finalize()
# Run a new evaluation run every eval_interval_secs.
while True:
start = time.time()
tf.logging.info("Starting evaluation at " + time.strftime(
"%Y-%m-%d-%H:%M:%S", time.localtime()))
run_once(model, losses, weights, saver, summary_writer, summary_op)
time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
if time_to_next_eval > 0:
time.sleep(time_to_next_eval)
if __name__ == "__main__":
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Train the skip-thoughts model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
from skip_thoughts import configuration
from skip_thoughts import skip_thoughts_model
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("input_file_pattern", None,
"File pattern of sharded TFRecord files containing "
"tf.Example protos.")
tf.flags.DEFINE_string("train_dir", None,
"Directory for saving and loading checkpoints.")
tf.logging.set_verbosity(tf.logging.INFO)
def _setup_learning_rate(config, global_step):
"""Sets up the learning rate with optional exponential decay.
Args:
config: Object containing learning rate configuration parameters.
global_step: Tensor; the global step.
Returns:
learning_rate: Tensor; the learning rate with exponential decay.
"""
if config.learning_rate_decay_factor > 0:
learning_rate = tf.train.exponential_decay(
learning_rate=float(config.learning_rate),
global_step=global_step,
decay_steps=config.learning_rate_decay_steps,
decay_rate=config.learning_rate_decay_factor,
staircase=False)
else:
learning_rate = tf.constant(config.learning_rate)
return learning_rate
def main(unused_argv):
if not FLAGS.input_file_pattern:
raise ValueError("--input_file_pattern is required.")
if not FLAGS.train_dir:
raise ValueError("--train_dir is required.")
model_config = configuration.model_config(
input_file_pattern=FLAGS.input_file_pattern)
training_config = configuration.training_config()
tf.logging.info("Building training graph.")
g = tf.Graph()
with g.as_default():
model = skip_thoughts_model.SkipThoughtsModel(model_config, mode="train")
model.build()
learning_rate = _setup_learning_rate(training_config, model.global_step)
optimizer = tf.train.AdamOptimizer(learning_rate)
train_tensor = tf.contrib.slim.learning.create_train_op(
total_loss=model.total_loss,
optimizer=optimizer,
global_step=model.global_step,
clip_gradient_norm=training_config.clip_gradient_norm)
saver = tf.train.Saver()
tf.contrib.slim.learning.train(
train_op=train_tensor,
logdir=FLAGS.train_dir,
graph=g,
global_step=model.global_step,
number_of_steps=training_config.number_of_steps,
save_summaries_secs=training_config.save_summaries_secs,
saver=saver,
save_interval_secs=training_config.save_model_secs)
if __name__ == "__main__":
tf.app.run()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Compute an expanded vocabulary of embeddings using a word2vec model.
This script loads the word embeddings from a trained skip-thoughts model and
from a trained word2vec model (typically with a larger vocabulary). It trains a
linear regression model without regularization to learn a linear mapping from
the word2vec embedding space to the skip-thoughts embedding space. The model is
then applied to all words in the word2vec vocabulary, yielding vectors in the
skip-thoughts word embedding space for the union of the two vocabularies.
The linear regression task is to learn a parameter matrix W to minimize
|| X - Y * W ||^2,
where X is a matrix of skip-thoughts embeddings of shape [num_words, dim1],
Y is a matrix of word2vec embeddings of shape [num_words, dim2], and W is a
matrix of shape [dim2, dim1].
This is based on the "Translation Matrix" method from the paper:
"Exploiting Similarities among Languages for Machine Translation"
Tomas Mikolov, Quoc V. Le, Ilya Sutskever
https://arxiv.org/abs/1309.4168
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os.path
import gensim.models
import numpy as np
import sklearn.linear_model
import tensorflow as tf
FLAGS = tf.flags.FLAGS
tf.flags.DEFINE_string("skip_thoughts_model", None,
"Checkpoint file or directory containing a checkpoint "
"file.")
tf.flags.DEFINE_string("skip_thoughts_vocab", None,
"Path to vocabulary file containing a list of newline-"
"separated words where the word id is the "
"corresponding 0-based index in the file.")
tf.flags.DEFINE_string("word2vec_model", None,
"File containing a word2vec model in binary format.")
tf.flags.DEFINE_string("output_dir", None, "Output directory.")
tf.logging.set_verbosity(tf.logging.INFO)
def _load_skip_thoughts_embeddings(checkpoint_path):
"""Loads the embedding matrix from a skip-thoughts model checkpoint.
Args:
checkpoint_path: Model checkpoint file or directory containing a checkpoint
file.
Returns:
word_embedding: A numpy array of shape [vocab_size, embedding_dim].
Raises:
ValueError: If no checkpoint file matches checkpoint_path.
"""
if tf.gfile.IsDirectory(checkpoint_path):
checkpoint_file = tf.train.latest_checkpoint(checkpoint_path)
if not checkpoint_file:
raise ValueError("No checkpoint file found in %s" % checkpoint_path)
else:
checkpoint_file = checkpoint_path
tf.logging.info("Loading skip-thoughts embedding matrix from %s",
checkpoint_file)
reader = tf.train.NewCheckpointReader(checkpoint_file)
word_embedding = reader.get_tensor("word_embedding")
tf.logging.info("Loaded skip-thoughts embedding matrix of shape %s",
word_embedding.shape)
return word_embedding
def _load_vocabulary(filename):
"""Loads a vocabulary file.
Args:
filename: Path to text file containing newline-separated words.
Returns:
vocab: A dictionary mapping word to word id.
"""
tf.logging.info("Reading vocabulary from %s", filename)
vocab = collections.OrderedDict()
with tf.gfile.GFile(filename, mode="r") as f:
for i, line in enumerate(f):
word = line.decode("utf-8").strip()
assert word not in vocab, "Attempting to add word twice: %s" % word
vocab[word] = i
tf.logging.info("Read vocabulary of size %d", len(vocab))
return vocab
def _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab, word2vec):
"""Runs vocabulary expansion on a skip-thoughts model using a word2vec model.
Args:
skip_thoughts_emb: A numpy array of shape [skip_thoughts_vocab_size,
skip_thoughts_embedding_dim].
skip_thoughts_vocab: A dictionary of word to id.
word2vec: An instance of gensim.models.Word2Vec.
Returns:
combined_emb: A dictionary mapping words to embedding vectors.
"""
# Find words shared between the two vocabularies.
tf.logging.info("Finding shared words")
shared_words = [w for w in word2vec.vocab if w in skip_thoughts_vocab]
# Select embedding vectors for shared words.
tf.logging.info("Selecting embeddings for %d shared words", len(shared_words))
shared_st_emb = skip_thoughts_emb[[
skip_thoughts_vocab[w] for w in shared_words
]]
shared_w2v_emb = word2vec[shared_words]
# Train a linear regression model on the shared embedding vectors.
tf.logging.info("Training linear regression model")
model = sklearn.linear_model.LinearRegression()
model.fit(shared_w2v_emb, shared_st_emb)
# Create the expanded vocabulary.
tf.logging.info("Creating embeddings for expanded vocabuary")
combined_emb = collections.OrderedDict()
for w in word2vec.vocab:
# Ignore words with underscores (spaces).
if "_" not in w:
w_emb = model.predict(word2vec[w].reshape(1, -1))
combined_emb[w] = w_emb.reshape(-1)
for w in skip_thoughts_vocab:
combined_emb[w] = skip_thoughts_emb[skip_thoughts_vocab[w]]
tf.logging.info("Created expanded vocabulary of %d words", len(combined_emb))
return combined_emb
def main(unused_argv):
if not FLAGS.skip_thoughts_model:
raise ValueError("--skip_thoughts_model is required.")
if not FLAGS.skip_thoughts_vocab:
raise ValueError("--skip_thoughts_vocab is required.")
if not FLAGS.word2vec_model:
raise ValueError("--word2vec_model is required.")
if not FLAGS.output_dir:
raise ValueError("--output_dir is required.")
if not tf.gfile.IsDirectory(FLAGS.output_dir):
tf.gfile.MakeDirs(FLAGS.output_dir)
# Load the skip-thoughts embeddings and vocabulary.
skip_thoughts_emb = _load_skip_thoughts_embeddings(FLAGS.skip_thoughts_model)
skip_thoughts_vocab = _load_vocabulary(FLAGS.skip_thoughts_vocab)
# Load the Word2Vec model.
word2vec = gensim.models.Word2Vec.load_word2vec_format(
FLAGS.word2vec_model, binary=True)
# Run vocabulary expansion.
embedding_map = _expand_vocabulary(skip_thoughts_emb, skip_thoughts_vocab,
word2vec)
# Save the output.
vocab = embedding_map.keys()
vocab_file = os.path.join(FLAGS.output_dir, "vocab.txt")
with tf.gfile.GFile(vocab_file, "w") as f:
f.write("\n".join(vocab))
tf.logging.info("Wrote vocabulary file to %s", vocab_file)
embeddings = np.array(embedding_map.values())
embeddings_file = os.path.join(FLAGS.output_dir, "embeddings.npy")
np.save(embeddings_file, embeddings)
tf.logging.info("Wrote embeddings file to %s", embeddings_file)
if __name__ == "__main__":
tf.app.run()
...@@ -18,7 +18,7 @@ The following table shows their accuracy on Universal ...@@ -18,7 +18,7 @@ The following table shows their accuracy on Universal
Dependencies test sets for different types of annotations. Dependencies test sets for different types of annotations.
Language | No. tokens | POS | fPOS | Morph | UAS | LAS Language | No. tokens | POS | fPOS | Morph | UAS | LAS
-------- | :--: | :--: | :--: | :--: | :--: | :--: | :--: -------- | :--: | :--: | :--: | :--: | :--: | :--:
Ancient_Greek-PROIEL | 18502 | 97.14% | 96.97% | 89.77% | 78.74% | 73.15% Ancient_Greek-PROIEL | 18502 | 97.14% | 96.97% | 89.77% | 78.74% | 73.15%
Ancient_Greek | 25251 | 93.22% | 84.22% | 90.01% | 68.98% | 62.07% Ancient_Greek | 25251 | 93.22% | 84.22% | 90.01% | 68.98% | 62.07%
Arabic | 28268 | 95.65% | 91.03% | 91.23% | 81.49% | 75.82% Arabic | 28268 | 95.65% | 91.03% | 91.23% | 81.49% | 75.82%
......
...@@ -166,7 +166,7 @@ class Seq2SeqAttentionModel(object): ...@@ -166,7 +166,7 @@ class Seq2SeqAttentionModel(object):
hps.num_hidden, hps.num_hidden,
initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113), initializer=tf.random_uniform_initializer(-0.1, 0.1, seed=113),
state_is_tuple=False) state_is_tuple=False)
(emb_encoder_inputs, fw_state, _) = tf.nn.bidirectional_rnn( (emb_encoder_inputs, fw_state, _) = tf.contrib.rnn.static_bidirectional_rnn(
cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32, cell_fw, cell_bw, emb_encoder_inputs, dtype=tf.float32,
sequence_length=article_lens) sequence_length=article_lens)
encoder_outputs = emb_encoder_inputs encoder_outputs = emb_encoder_inputs
...@@ -200,7 +200,7 @@ class Seq2SeqAttentionModel(object): ...@@ -200,7 +200,7 @@ class Seq2SeqAttentionModel(object):
# During decoding, follow up _dec_in_state are fed from beam_search. # During decoding, follow up _dec_in_state are fed from beam_search.
# dec_out_state are stored by beam_search for next step feeding. # dec_out_state are stored by beam_search for next step feeding.
initial_state_attention = (hps.mode == 'decode') initial_state_attention = (hps.mode == 'decode')
decoder_outputs, self._dec_out_state = tf.nn.seq2seq.attention_decoder( decoder_outputs, self._dec_out_state = tf.contrib.legacy_seq2seq.attention_decoder(
emb_decoder_inputs, self._dec_in_state, self._enc_top_states, emb_decoder_inputs, self._dec_in_state, self._enc_top_states,
cell, num_heads=1, loop_function=loop_function, cell, num_heads=1, loop_function=loop_function,
initial_state_attention=initial_state_attention) initial_state_attention=initial_state_attention)
...@@ -234,7 +234,7 @@ class Seq2SeqAttentionModel(object): ...@@ -234,7 +234,7 @@ class Seq2SeqAttentionModel(object):
self._loss = seq2seq_lib.sampled_sequence_loss( self._loss = seq2seq_lib.sampled_sequence_loss(
decoder_outputs, targets, loss_weights, sampled_loss_func) decoder_outputs, targets, loss_weights, sampled_loss_func)
else: else:
self._loss = tf.nn.seq2seq.sequence_loss( self._loss = tf.contrib.legacy_seq2seq.sequence_loss(
model_outputs, targets, loss_weights) model_outputs, targets, loss_weights)
tf.summary.scalar('loss', tf.minimum(12.0, self._loss)) tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
......
...@@ -123,7 +123,7 @@ y_logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 ...@@ -123,7 +123,7 @@ y_logits = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
# %% Define loss/eval/training functions # %% Define loss/eval/training functions
cross_entropy = tf.reduce_mean( cross_entropy = tf.reduce_mean(
tf.nn.softmax_cross_entropy_with_logits(logits=y_logits, targets=y)) tf.nn.softmax_cross_entropy_with_logits(logits=y_logits, labels=y))
opt = tf.train.AdamOptimizer() opt = tf.train.AdamOptimizer()
optimizer = opt.minimize(cross_entropy) optimizer = opt.minimize(cross_entropy)
grads = opt.compute_gradients(cross_entropy, [b_fc_loc2]) grads = opt.compute_gradients(cross_entropy, [b_fc_loc2])
......
# Tutorial Models # Tutorial Models
This repository contains models referenced to from the [TensorFlow tutorials](https://www.tensorflow.org/tutorials/). We recommend installing TensorFlow from the [nightly builds](https://github.com/tensorflow/tensorflow#installation) rather than the r0.12 release before running these models. This folder contains models referenced to from the [TensorFlow tutorials](https://www.tensorflow.org/tutorials/).
...@@ -100,7 +100,7 @@ class Seq2SeqModel(object): ...@@ -100,7 +100,7 @@ class Seq2SeqModel(object):
b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype) b = tf.get_variable("proj_b", [self.target_vocab_size], dtype=dtype)
output_projection = (w, b) output_projection = (w, b)
def sampled_loss(inputs, labels): def sampled_loss(labels, inputs):
labels = tf.reshape(labels, [-1, 1]) labels = tf.reshape(labels, [-1, 1])
# We need to compute the sampled_softmax_loss using 32bit floats to # We need to compute the sampled_softmax_loss using 32bit floats to
# avoid numerical instabilities. # avoid numerical instabilities.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment