Commit 748eceae authored by Marianne Linhares Monteiro's avatar Marianne Linhares Monteiro Committed by GitHub
Browse files

Merge branch 'master' into cifar10_experiment

parents 40e906d2 ed65b632
#!/bin/bash
# This script prepares the various different versions of MobileNet models for
# use in a mobile application. If you don't specify your own trained checkpoint
# file, it will download pretrained checkpoints for ImageNet. You'll also need
# to have a copy of the TensorFlow source code to run some of the commands,
# by default it will be looked for in ./tensorflow, but you can set the
# TENSORFLOW_PATH environment variable before calling the script if your source
# is in a different location.
# The main slim/nets/mobilenet_v1.md description has more details about the
# model, but the main points are that it comes in four size versions, 1.0, 0.75,
# 0.50, and 0.25, which controls the number of parameters and so the file size
# of the model, and the input image size, which can be 224, 192, 160, or 128
# pixels, and affects the amount of computation needed, and the latency.
# Here's an example generating a frozen model from pretrained weights:
#
set -e
print_usage () {
echo "Creates a frozen mobilenet model suitable for mobile use"
echo "Usage:"
echo "$0 <mobilenet version> <input size> [checkpoint path]"
}
MOBILENET_VERSION=$1
IMAGE_SIZE=$2
CHECKPOINT=$3
if [[ ${MOBILENET_VERSION} = "1.0" ]]; then
SLIM_NAME=mobilenet_v1
elif [[ ${MOBILENET_VERSION} = "0.75" ]]; then
SLIM_NAME=mobilenet_v1_075
elif [[ ${MOBILENET_VERSION} = "0.50" ]]; then
SLIM_NAME=mobilenet_v1_050
elif [[ ${MOBILENET_VERSION} = "0.25" ]]; then
SLIM_NAME=mobilenet_v1_025
else
echo "Bad mobilenet version, should be one of 1.0, 0.75, 0.50, or 0.25"
print_usage
exit 1
fi
if [[ ${IMAGE_SIZE} -ne "224" ]] && [[ ${IMAGE_SIZE} -ne "192" ]] && [[ ${IMAGE_SIZE} -ne "160" ]] && [[ ${IMAGE_SIZE} -ne "128" ]]; then
echo "Bad input image size, should be one of 224, 192, 160, or 128"
print_usage
exit 1
fi
if [[ ${TENSORFLOW_PATH} -eq "" ]]; then
TENSORFLOW_PATH=../tensorflow
fi
if [[ ! -d ${TENSORFLOW_PATH} ]]; then
echo "TensorFlow source folder not found. You should download the source and then set"
echo "the TENSORFLOW_PATH environment variable to point to it, like this:"
echo "export TENSORFLOW_PATH=/my/path/to/tensorflow"
print_usage
exit 1
fi
MODEL_FOLDER=/tmp/mobilenet_v1_${MOBILENET_VERSION}_${IMAGE_SIZE}
if [[ -d ${MODEL_FOLDER} ]]; then
echo "Model folder ${MODEL_FOLDER} already exists!"
echo "If you want to overwrite it, then 'rm -rf ${MODEL_FOLDER}' first."
print_usage
exit 1
fi
mkdir ${MODEL_FOLDER}
if [[ ${CHECKPOINT} = "" ]]; then
echo "*******"
echo "Downloading pretrained weights"
echo "*******"
curl "http://download.tensorflow.org/models/mobilenet_v1_${MOBILENET_VERSION}_${IMAGE_SIZE}_2017_06_14.tar.gz" \
-o ${MODEL_FOLDER}/checkpoints.tar.gz
tar xzf ${MODEL_FOLDER}/checkpoints.tar.gz --directory ${MODEL_FOLDER}
CHECKPOINT=${MODEL_FOLDER}/mobilenet_v1_${MOBILENET_VERSION}_${IMAGE_SIZE}.ckpt
fi
echo "*******"
echo "Exporting graph architecture to ${MODEL_FOLDER}/unfrozen_graph.pb"
echo "*******"
bazel run slim:export_inference_graph -- \
--model_name=${SLIM_NAME} --image_size=${IMAGE_SIZE} --logtostderr \
--output_file=${MODEL_FOLDER}/unfrozen_graph.pb --dataset_dir=${MODEL_FOLDER}
cd ../tensorflow
echo "*******"
echo "Freezing graph to ${MODEL_FOLDER}/frozen_graph.pb"
echo "*******"
bazel run tensorflow/python/tools:freeze_graph -- \
--input_graph=${MODEL_FOLDER}/unfrozen_graph.pb \
--input_checkpoint=${CHECKPOINT} \
--input_binary=true --output_graph=${MODEL_FOLDER}/frozen_graph.pb \
--output_node_names=MobilenetV1/Predictions/Reshape_1
echo "Quantizing weights to ${MODEL_FOLDER}/quantized_graph.pb"
bazel run tensorflow/tools/graph_transforms:transform_graph -- \
--in_graph=${MODEL_FOLDER}/frozen_graph.pb \
--out_graph=${MODEL_FOLDER}/quantized_graph.pb \
--inputs=input --outputs=MobilenetV1/Predictions/Reshape_1 \
--transforms='fold_constants fold_batch_norms quantize_weights'
echo "*******"
echo "Running label_image using the graph"
echo "*******"
bazel build tensorflow/examples/label_image:label_image
bazel-bin/tensorflow/examples/label_image/label_image \
--input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1 \
--graph=${MODEL_FOLDER}/quantized_graph.pb --input_mean=-127 --input_std=127 \
--image=tensorflow/examples/label_image/data/grace_hopper.jpg \
--input_width=${IMAGE_SIZE} --input_height=${IMAGE_SIZE} --labels=${MODEL_FOLDER}/labels.txt
echo "*******"
echo "Saved graphs to ${MODEL_FOLDER}/frozen_graph.pb and ${MODEL_FOLDER}/quantized_graph.pb"
echo "*******"
#!/bin/bash
#
# This script performs the following operations:
# 1. Downloads the Flowers dataset
# 2. Fine-tunes an Inception Resnet V2 model on the Flowers training set.
# 3. Evaluates the model on the Flowers validation set.
#
# Usage:
# cd slim
# ./slim/scripts/finetune_inception_resnet_v2_on_flowers.sh
set -e
# Where the pre-trained Inception Resnet V2 checkpoint is saved to.
PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints
# Where the pre-trained Inception Resnet V2 checkpoint is saved to.
MODEL_NAME=inception_resnet_v2
# Where the training (fine-tuned) checkpoint and logs will be saved to.
TRAIN_DIR=/tmp/flowers-models/${MODEL_NAME}
# Where the dataset is saved to.
DATASET_DIR=/tmp/flowers
# Download the pre-trained checkpoint.
if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then
mkdir ${PRETRAINED_CHECKPOINT_DIR}
fi
if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/${MODEL_NAME}.ckpt ]; then
wget http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz
tar -xvf inception_resnet_v2_2016_08_30.tar.gz
mv inception_resnet_v2.ckpt ${PRETRAINED_CHECKPOINT_DIR}/${MODEL_NAME}.ckpt
rm inception_resnet_v2_2016_08_30.tar.gz
fi
# Download the dataset
python download_and_convert_data.py \
--dataset_name=flowers \
--dataset_dir=${DATASET_DIR}
# Fine-tune only the new layers for 1000 steps.
python train_image_classifier.py \
--train_dir=${TRAIN_DIR} \
--dataset_name=flowers \
--dataset_split_name=train \
--dataset_dir=${DATASET_DIR} \
--model_name=${MODEL_NAME} \
--checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/${MODEL_NAME}.ckpt \
--checkpoint_exclude_scopes=InceptionResnetV2/Logits,InceptionResnetV2/AuxLogits \
--trainable_scopes=InceptionResnetV2/Logits,InceptionResnetV2/AuxLogits \
--max_number_of_steps=1000 \
--batch_size=32 \
--learning_rate=0.01 \
--learning_rate_decay_type=fixed \
--save_interval_secs=60 \
--save_summaries_secs=60 \
--log_every_n_steps=10 \
--optimizer=rmsprop \
--weight_decay=0.00004
# Run evaluation.
python eval_image_classifier.py \
--checkpoint_path=${TRAIN_DIR} \
--eval_dir=${TRAIN_DIR} \
--dataset_name=flowers \
--dataset_split_name=validation \
--dataset_dir=${DATASET_DIR} \
--model_name=${MODEL_NAME}
# Fine-tune all the new layers for 500 steps.
python train_image_classifier.py \
--train_dir=${TRAIN_DIR}/all \
--dataset_name=flowers \
--dataset_split_name=train \
--dataset_dir=${DATASET_DIR} \
--model_name=${MODEL_NAME} \
--checkpoint_path=${TRAIN_DIR} \
--max_number_of_steps=500 \
--batch_size=32 \
--learning_rate=0.0001 \
--learning_rate_decay_type=fixed \
--save_interval_secs=60 \
--save_summaries_secs=60 \
--log_every_n_steps=10 \
--optimizer=rmsprop \
--weight_decay=0.00004
# Run evaluation.
python eval_image_classifier.py \
--checkpoint_path=${TRAIN_DIR}/all \
--eval_dir=${TRAIN_DIR}/all \
--dataset_name=flowers \
--dataset_split_name=validation \
--dataset_dir=${DATASET_DIR} \
--model_name=${MODEL_NAME}
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
# #
# Usage: # Usage:
# cd slim # cd slim
# ./slim/scripts/finetune_inceptionv3_on_flowers.sh # ./slim/scripts/finetune_inception_v3_on_flowers.sh
set -e set -e
# Where the pre-trained InceptionV3 checkpoint is saved to. # Where the pre-trained InceptionV3 checkpoint is saved to.
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
# #
# Usage: # Usage:
# cd slim # cd slim
# ./scripts/train_cifar_net_on_mnist.sh # ./scripts/train_cifarnet_on_cifar10.sh
set -e set -e
# Where the checkpoint and logs will be saved to. # Where the checkpoint and logs will be saved to.
......
...@@ -818,7 +818,7 @@ ...@@ -818,7 +818,7 @@
"import tensorflow as tf\n", "import tensorflow as tf\n",
"\n", "\n",
"try:\n", "try:\n",
" import urllib2\n", " import urllib2 as urllib\n",
"except ImportError:\n", "except ImportError:\n",
" import urllib.request as urllib\n", " import urllib.request as urllib\n",
"\n", "\n",
......
...@@ -89,6 +89,7 @@ py_library( ...@@ -89,6 +89,7 @@ py_library(
srcs = ["network_units.py"], srcs = ["network_units.py"],
deps = [ deps = [
":dragnn_ops", ":dragnn_ops",
"//syntaxnet:syntaxnet_ops",
"//syntaxnet/util:check", "//syntaxnet/util:check",
"//syntaxnet/util:pyregistry", "//syntaxnet/util:pyregistry",
"@org_tensorflow//tensorflow:tensorflow_py", "@org_tensorflow//tensorflow:tensorflow_py",
......
...@@ -15,9 +15,11 @@ ...@@ -15,9 +15,11 @@
"""Basic network units used in assembling DRAGNN graphs.""" """Basic network units used in assembling DRAGNN graphs."""
from abc import ABCMeta from __future__ import absolute_import
from abc import abstractmethod from __future__ import division
from __future__ import print_function
import abc
import tensorflow as tf import tensorflow as tf
from tensorflow.python.ops import nn from tensorflow.python.ops import nn
...@@ -25,6 +27,7 @@ from tensorflow.python.ops import tensor_array_ops as ta ...@@ -25,6 +27,7 @@ from tensorflow.python.ops import tensor_array_ops as ta
from tensorflow.python.platform import tf_logging as logging from tensorflow.python.platform import tf_logging as logging
from dragnn.python import dragnn_ops from dragnn.python import dragnn_ops
from syntaxnet import syntaxnet_ops
from syntaxnet.util import check from syntaxnet.util import check
from syntaxnet.util import registry from syntaxnet.util import registry
...@@ -135,11 +138,11 @@ def add_embeddings(channel_id, feature_spec, seed=None): ...@@ -135,11 +138,11 @@ def add_embeddings(channel_id, feature_spec, seed=None):
raise RuntimeError('vocab resource contains more than one part:\n%s', raise RuntimeError('vocab resource contains more than one part:\n%s',
str(feature_spec.vocab)) str(feature_spec.vocab))
seed1, seed2 = tf.get_seed(seed) seed1, seed2 = tf.get_seed(seed)
embeddings = dragnn_ops.dragnn_embedding_initializer( embeddings = syntaxnet_ops.word_embedding_initializer(
embedding_input=feature_spec.pretrained_embedding_matrix.part[0] vectors=feature_spec.pretrained_embedding_matrix.part[0].file_pattern,
.file_pattern, vocabulary=feature_spec.vocab.part[0].file_pattern,
vocab=feature_spec.vocab.part[0].file_pattern, num_special_embeddings=1,
scaling_coefficient=1.0, embedding_init=1.0,
seed=seed1, seed=seed1,
seed2=seed2) seed2=seed2)
return tf.get_variable(name, initializer=tf.reshape(embeddings, shape)) return tf.get_variable(name, initializer=tf.reshape(embeddings, shape))
...@@ -626,7 +629,7 @@ class NetworkUnitInterface(object): ...@@ -626,7 +629,7 @@ class NetworkUnitInterface(object):
layers (list): List of Layer objects to track network layers that should layers (list): List of Layer objects to track network layers that should
be written to Tensors during training and inference. be written to Tensors during training and inference.
""" """
__metaclass__ = ABCMeta # required for @abstractmethod __metaclass__ = abc.ABCMeta # required for @abstractmethod
def __init__(self, component, init_layers=None, init_context_layers=None): def __init__(self, component, init_layers=None, init_context_layers=None):
"""Initializes parameters for embedding matrices. """Initializes parameters for embedding matrices.
...@@ -738,7 +741,7 @@ class NetworkUnitInterface(object): ...@@ -738,7 +741,7 @@ class NetworkUnitInterface(object):
[attention_hidden_layer_size, component.num_actions], [attention_hidden_layer_size, component.num_actions],
initializer=tf.random_normal_initializer(stddev=1e-4))) initializer=tf.random_normal_initializer(stddev=1e-4)))
@abstractmethod @abc.abstractmethod
def create(self, def create(self,
fixed_embeddings, fixed_embeddings,
linked_embeddings, linked_embeddings,
......
...@@ -747,6 +747,15 @@ py_library( ...@@ -747,6 +747,15 @@ py_library(
data = [":parser_ops.so"], data = [":parser_ops.so"],
) )
py_library(
name = "syntaxnet_ops",
srcs = ["syntaxnet_ops.py"],
deps = [
":parser_ops",
":load_parser_ops_py",
],
)
py_library( py_library(
name = "graph_builder", name = "graph_builder",
srcs = ["graph_builder.py"], srcs = ["graph_builder.py"],
......
...@@ -247,7 +247,10 @@ weights: vector of weight extracted from the SparseFeatures proto. ...@@ -247,7 +247,10 @@ weights: vector of weight extracted from the SparseFeatures proto.
REGISTER_OP("WordEmbeddingInitializer") REGISTER_OP("WordEmbeddingInitializer")
.Output("word_embeddings: float") .Output("word_embeddings: float")
.Attr("vectors: string") .Attr("vectors: string")
.Attr("task_context: string") .Attr("task_context: string = ''")
.Attr("vocabulary: string = ''")
.Attr("cache_vectors_locally: bool = true")
.Attr("num_special_embeddings: int = 3")
.Attr("embedding_init: float = 1.0") .Attr("embedding_init: float = 1.0")
.Attr("seed: int = 0") .Attr("seed: int = 0")
.Attr("seed2: int = 0") .Attr("seed2: int = 0")
...@@ -255,9 +258,17 @@ REGISTER_OP("WordEmbeddingInitializer") ...@@ -255,9 +258,17 @@ REGISTER_OP("WordEmbeddingInitializer")
Reads word embeddings from an sstable of dist_belief.TokenEmbedding protos for Reads word embeddings from an sstable of dist_belief.TokenEmbedding protos for
every word specified in a text vocabulary file. every word specified in a text vocabulary file.
word_embeddings: a tensor containing word embeddings from the specified sstable. word_embeddings: a tensor containing word embeddings from the specified table.
vectors: path to recordio of word embedding vectors. vectors: path to TF record file of word embedding vectors.
task_context: file path at which to read the task context. task_context: file path at which to read the task context, for its "word-map"
input. Exactly one of `task_context` or `vocabulary` must be specified.
vocabulary: path to vocabulary file, which contains one unique word per line, in
order. Exactly one of `task_context` or `vocabulary` must be specified.
cache_vectors_locally: Whether to cache the vectors file to a local temp file
before parsing it. This greatly reduces initialization time when the vectors
are stored remotely, but requires that "/tmp" has sufficient space.
num_special_embeddings: Number of special embeddings to allocate, in addition to
those allocated for real words.
embedding_init: embedding vectors that are not found in the input sstable are embedding_init: embedding vectors that are not found in the input sstable are
initialized randomly from a normal distribution with zero mean and initialized randomly from a normal distribution with zero mean and
std dev = embedding_init / sqrt(embedding_size). std dev = embedding_init / sqrt(embedding_size).
......
...@@ -34,9 +34,11 @@ limitations under the License. ...@@ -34,9 +34,11 @@ limitations under the License.
#include "tensorflow/core/framework/op_kernel.h" #include "tensorflow/core/framework/op_kernel.h"
#include "tensorflow/core/framework/tensor.h" #include "tensorflow/core/framework/tensor.h"
#include "tensorflow/core/framework/tensor_shape.h" #include "tensorflow/core/framework/tensor_shape.h"
#include "tensorflow/core/lib/core/errors.h"
#include "tensorflow/core/lib/core/status.h" #include "tensorflow/core/lib/core/status.h"
#include "tensorflow/core/lib/io/table.h" #include "tensorflow/core/lib/io/table.h"
#include "tensorflow/core/lib/io/table_options.h" #include "tensorflow/core/lib/io/table_options.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/stringprintf.h" #include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/env.h" #include "tensorflow/core/platform/env.h"
...@@ -439,14 +441,18 @@ class WordEmbeddingInitializer : public OpKernel { ...@@ -439,14 +441,18 @@ class WordEmbeddingInitializer : public OpKernel {
public: public:
explicit WordEmbeddingInitializer(OpKernelConstruction *context) explicit WordEmbeddingInitializer(OpKernelConstruction *context)
: OpKernel(context) { : OpKernel(context) {
string file_path, data; OP_REQUIRES_OK(context,
OP_REQUIRES_OK(context, context->GetAttr("task_context", &file_path)); context->GetAttr("task_context", &task_context_path_));
OP_REQUIRES_OK(context, ReadFileToString(tensorflow::Env::Default(), OP_REQUIRES_OK(context, context->GetAttr("vocabulary", &vocabulary_path_));
file_path, &data)); OP_REQUIRES(
OP_REQUIRES(context, context, task_context_path_.empty() != vocabulary_path_.empty(),
TextFormat::ParseFromString(data, task_context_.mutable_spec()), InvalidArgument(
InvalidArgument("Could not parse task context at ", file_path)); "Exactly one of task_context or vocabulary must be specified"));
OP_REQUIRES_OK(context, context->GetAttr("vectors", &vectors_path_)); OP_REQUIRES_OK(context, context->GetAttr("vectors", &vectors_path_));
OP_REQUIRES_OK(context, context->GetAttr("cache_vectors_locally",
&cache_vectors_locally_));
OP_REQUIRES_OK(context, context->GetAttr("num_special_embeddings",
&num_special_embeddings_));
OP_REQUIRES_OK(context, OP_REQUIRES_OK(context,
context->GetAttr("embedding_init", &embedding_init_)); context->GetAttr("embedding_init", &embedding_init_));
...@@ -462,43 +468,117 @@ class WordEmbeddingInitializer : public OpKernel { ...@@ -462,43 +468,117 @@ class WordEmbeddingInitializer : public OpKernel {
} }
void Compute(OpKernelContext *context) override { void Compute(OpKernelContext *context) override {
// Loads words from vocabulary with mapping to ids. std::unordered_map<string, int64> vocab;
string path = TaskContext::InputFile(*task_context_.GetInput("word-map")); OP_REQUIRES_OK(context, LoadVocabulary(&vocab));
const TermFrequencyMap *word_map =
SharedStoreUtils::GetWithDefaultName<TermFrequencyMap>(path, 0, 0);
unordered_map<string, int64> vocab;
for (int i = 0; i < word_map->Size(); ++i) {
vocab[word_map->GetTerm(i)] = i;
}
// Creates a reader pointing to a local copy of the vectors recordio. string vectors_path = vectors_path_;
string tmp_vectors_path; if (cache_vectors_locally_) {
OP_REQUIRES_OK(context, CopyToTmpPath(vectors_path_, &tmp_vectors_path)); OP_REQUIRES_OK(context, CopyToTmpPath(vectors_path_, &vectors_path));
ProtoRecordReader reader(tmp_vectors_path); }
ProtoRecordReader reader(vectors_path);
// Loads the embedding vectors into a matrix. // Load the embedding vectors into a matrix. Since the |embedding_matrix|
// output cannot be allocated until the embedding dimension is known, delay
// allocation until the first iteration of the loop.
Tensor *embedding_matrix = nullptr; Tensor *embedding_matrix = nullptr;
TokenEmbedding embedding; TokenEmbedding embedding;
while (reader.Read(&embedding) == tensorflow::Status::OK()) { while (reader.Read(&embedding) == tensorflow::Status::OK()) {
if (embedding_matrix == nullptr) { if (embedding_matrix == nullptr) {
const int embedding_size = embedding.vector().values_size(); OP_REQUIRES_OK(context,
OP_REQUIRES_OK( InitRandomEmbeddingMatrix(vocab, embedding, context,
context, context->allocate_output( &embedding_matrix));
0, TensorShape({word_map->Size() + 3, embedding_size}),
&embedding_matrix));
auto matrix = embedding_matrix->matrix<float>();
Eigen::internal::NormalRandomGenerator<float> prng(seed_);
matrix =
matrix.random(prng) * (embedding_init_ / sqrtf(embedding_size));
} }
if (vocab.find(embedding.token()) != vocab.end()) { if (vocab.find(embedding.token()) != vocab.end()) {
SetNormalizedRow(embedding.vector(), vocab[embedding.token()], SetNormalizedRow(embedding.vector(), vocab[embedding.token()],
embedding_matrix); embedding_matrix);
} }
} }
// The vectors file might not contain any embeddings (perhaps due to read
// errors), in which case the |embedding_matrix| output is never allocated.
// Signal this error early instead of letting downstream ops complain about
// a missing input.
OP_REQUIRES(
context, embedding_matrix != nullptr,
InvalidArgument(tensorflow::strings::StrCat(
"found no pretrained embeddings in vectors=", vectors_path_,
" vocabulary=", vocabulary_path_, " vocab_size=", vocab.size())));
} }
private: private:
// Loads the vocabulary from the task context or vocabulary.
tensorflow::Status LoadVocabulary(
std::unordered_map<string, int64> *vocabulary) const {
if (!task_context_path_.empty()) {
return LoadVocabularyFromTaskContext(vocabulary);
} else {
return LoadVocabularyFromFile(vocabulary);
}
}
// Loads the |vocabulary| from the "word-map" input of the task context at
// |task_context_path_|, or returns non-OK on error.
tensorflow::Status LoadVocabularyFromTaskContext(
std::unordered_map<string, int64> *vocabulary) const {
vocabulary->clear();
string textproto;
TF_RETURN_IF_ERROR(ReadFileToString(tensorflow::Env::Default(),
task_context_path_, &textproto));
TaskContext task_context;
if (!TextFormat::ParseFromString(textproto, task_context.mutable_spec())) {
return InvalidArgument("Could not parse task context at ",
task_context_path_);
}
const string path =
TaskContext::InputFile(*task_context.GetInput("word-map"));
const TermFrequencyMap *word_map =
SharedStoreUtils::GetWithDefaultName<TermFrequencyMap>(path, 0, 0);
for (int i = 0; i < word_map->Size(); ++i) {
(*vocabulary)[word_map->GetTerm(i)] = i;
}
return tensorflow::Status::OK();
}
// Loads the |vocabulary| from the |vocabulary_path_| file, which contains one
// word per line in order, or returns non-OK on error.
tensorflow::Status LoadVocabularyFromFile(
std::unordered_map<string, int64> *vocabulary) const {
vocabulary->clear();
string text;
TF_RETURN_IF_ERROR(
ReadFileToString(tensorflow::Env::Default(), vocabulary_path_, &text));
// Chomp a trailing newline, if any, to avoid producing a spurious empty
// term at the end of the vocabulary file.
if (!text.empty() && text.back() == '\n') text.pop_back();
for (const string &line : tensorflow::str_util::Split(text, "\n")) {
if (vocabulary->find(line) != vocabulary->end()) {
return InvalidArgument("Vocabulary file at ", vocabulary_path_,
" contains multiple instances of term: ", line);
}
const int64 index = vocabulary->size();
(*vocabulary)[line] = index;
}
return tensorflow::Status::OK();
}
// Allocates the |embedding_matrix| based on the |vocabulary| and |embedding|
// and initializes it to random values, or returns non-OK on error.
tensorflow::Status InitRandomEmbeddingMatrix(
const std::unordered_map<string, int64> &vocabulary,
const TokenEmbedding &embedding, OpKernelContext *context,
Tensor **embedding_matrix) const {
const int rows = vocabulary.size() + num_special_embeddings_;
const int columns = embedding.vector().values_size();
TF_RETURN_IF_ERROR(context->allocate_output(0, TensorShape({rows, columns}),
embedding_matrix));
auto matrix = (*embedding_matrix)->matrix<float>();
Eigen::internal::NormalRandomGenerator<float> prng(seed_);
matrix = matrix.random(prng) * (embedding_init_ / sqrtf(columns));
return tensorflow::Status::OK();
}
// Sets embedding_matrix[row] to a normalized version of the given vector. // Sets embedding_matrix[row] to a normalized version of the given vector.
void SetNormalizedRow(const TokenEmbedding::Vector &vector, const int row, void SetNormalizedRow(const TokenEmbedding::Vector &vector, const int row,
Tensor *embedding_matrix) { Tensor *embedding_matrix) {
...@@ -547,8 +627,15 @@ class WordEmbeddingInitializer : public OpKernel { ...@@ -547,8 +627,15 @@ class WordEmbeddingInitializer : public OpKernel {
} }
} }
// Task context used to configure this op. // Path to the task context or vocabulary. Exactly one must be specified.
TaskContext task_context_; string task_context_path_;
string vocabulary_path_;
// Whether to cache the vectors to a local temp file, to reduce I/O latency.
bool cache_vectors_locally_ = true;
// Number of special embeddings to allocate.
int num_special_embeddings_ = 3;
// Seed for random initialization. // Seed for random initialization.
uint64 seed_ = 0; uint64 seed_ = 0;
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Tests for reader_ops.""" """Tests for reader_ops."""
# pylint: disable=no-name-in-module,unused-import,g-bad-import-order,maybe-no-member,no-member,g-importing-member
import os.path import os.path
import numpy as np import numpy as np
...@@ -29,7 +30,6 @@ from syntaxnet import graph_builder ...@@ -29,7 +30,6 @@ from syntaxnet import graph_builder
from syntaxnet import sparse_pb2 from syntaxnet import sparse_pb2
from syntaxnet.ops import gen_parser_ops from syntaxnet.ops import gen_parser_ops
FLAGS = tf.app.flags.FLAGS FLAGS = tf.app.flags.FLAGS
if not hasattr(FLAGS, 'test_srcdir'): if not hasattr(FLAGS, 'test_srcdir'):
FLAGS.test_srcdir = '' FLAGS.test_srcdir = ''
...@@ -220,6 +220,76 @@ class ParsingReaderOpsTest(test_util.TensorFlowTestCase): ...@@ -220,6 +220,76 @@ class ParsingReaderOpsTest(test_util.TensorFlowTestCase):
self.assertEqual(tf.shape(embeddings2)[1].eval(), 3) self.assertEqual(tf.shape(embeddings2)[1].eval(), 3)
self.assertAllEqual(embeddings1.eval(), embeddings2.eval()) self.assertAllEqual(embeddings1.eval(), embeddings2.eval())
def testWordEmbeddingInitializerFailIfNeitherTaskContextOrVocabulary(self):
with self.test_session():
with self.assertRaises(Exception):
gen_parser_ops.word_embedding_initializer(vectors='/dev/null').eval()
def testWordEmbeddingInitializerFailIfBothTaskContextAndVocabulary(self):
with self.test_session():
with self.assertRaises(Exception):
gen_parser_ops.word_embedding_initializer(
vectors='/dev/null',
task_context='/dev/null',
vocabulary='/dev/null').eval()
def testWordEmbeddingInitializerVocabularyFile(self):
records_path = os.path.join(FLAGS.test_tmpdir, 'records3')
writer = tf.python_io.TFRecordWriter(records_path)
writer.write(self._token_embedding('a', [1, 2, 3]))
writer.write(self._token_embedding('b', [2, 3, 4]))
writer.write(self._token_embedding('c', [3, 4, 5]))
writer.write(self._token_embedding('d', [4, 5, 6]))
writer.write(self._token_embedding('e', [5, 6, 7]))
del writer
vocabulary_path = os.path.join(FLAGS.test_tmpdir, 'vocabulary3')
with open(vocabulary_path, 'w') as vocabulary_file:
vocabulary_file.write('a\nc\ne\nx\n') # 'x' not in pretrained embeddings
# Enumerate a variety of configurations.
for cache_vectors_locally in [False, True]:
for num_special_embeddings in [None, 1, 2, 5]: # None = use default of 3
with self.test_session():
embeddings = gen_parser_ops.word_embedding_initializer(
vectors=records_path,
vocabulary=vocabulary_path,
cache_vectors_locally=cache_vectors_locally,
num_special_embeddings=num_special_embeddings)
# Expect 4 embeddings from the vocabulary plus special embeddings.
expected_num_embeddings = 4 + (num_special_embeddings or 3)
self.assertAllEqual([expected_num_embeddings, 3],
tf.shape(embeddings).eval())
# The first 3 embeddings should be pretrained.
norm_a = (1.0 + 4.0 + 9.0) ** 0.5
norm_c = (9.0 + 16.0 + 25.0) ** 0.5
norm_e = (25.0 + 36.0 + 49.0) ** 0.5
self.assertAllClose([[1.0 / norm_a, 2.0 / norm_a, 3.0 / norm_a],
[3.0 / norm_c, 4.0 / norm_c, 5.0 / norm_c],
[5.0 / norm_e, 6.0 / norm_e, 7.0 / norm_e]],
embeddings[:3].eval())
def testWordEmbeddingInitializerVocabularyFileWithDuplicates(self):
records_path = os.path.join(FLAGS.test_tmpdir, 'records4')
writer = tf.python_io.TFRecordWriter(records_path)
writer.write(self._token_embedding('a', [1, 2, 3]))
writer.write(self._token_embedding('b', [2, 3, 4]))
writer.write(self._token_embedding('c', [3, 4, 5]))
writer.write(self._token_embedding('d', [4, 5, 6]))
writer.write(self._token_embedding('e', [5, 6, 7]))
del writer
vocabulary_path = os.path.join(FLAGS.test_tmpdir, 'vocabulary4')
with open(vocabulary_path, 'w') as vocabulary_file:
vocabulary_file.write('a\nc\ne\nx\ny\nx') # 'x' duplicated
with self.test_session():
with self.assertRaises(Exception):
gen_parser_ops.word_embedding_initializer(
vectors=records_path, vocabulary=vocabulary_path).eval()
if __name__ == '__main__': if __name__ == '__main__':
googletest.main() googletest.main()
# Copyright 2017 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Imports the SyntaxNet ops and their C++ implementations."""
from syntaxnet.ops.gen_parser_ops import * # pylint: disable=wildcard-import
import syntaxnet.load_parser_ops
...@@ -8,7 +8,7 @@ The Spatial Transformer Network [1] allows the spatial manipulation of data with ...@@ -8,7 +8,7 @@ The Spatial Transformer Network [1] allows the spatial manipulation of data with
### API ### API
A Spatial Transformer Network implemented in Tensorflow 0.7 and based on [2]. A Spatial Transformer Network implemented in Tensorflow 1.0 and based on [2].
#### How to use #### How to use
......
...@@ -11,8 +11,8 @@ Code in this directory focuses on how to use TensorFlow Estimators to train and ...@@ -11,8 +11,8 @@ Code in this directory focuses on how to use TensorFlow Estimators to train and
2. Download the CIFAR-10 dataset. 2. Download the CIFAR-10 dataset.
```shell ```shell
curl -o cifar-10-python.tar.gz https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz curl -o cifar-10-binary.tar.gz https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz
tar xzf cifar-10-python.tar.gz tar xzf cifar-10-binary.tar.gz
``` ```
<b>How to run:</b> <b>How to run:</b>
...@@ -20,12 +20,13 @@ tar xzf cifar-10-python.tar.gz ...@@ -20,12 +20,13 @@ tar xzf cifar-10-python.tar.gz
```shell ```shell
# After running the above commands, you should see the following in the folder # After running the above commands, you should see the following in the folder
# where the data is downloaded. # where the data is downloaded.
$ ls -R cifar-10-batches-py $ ls -R cifar-10-batches-bin
cifar-10-batches-bin: cifar-10-batches-bin:
batches.meta data_batch_1 data_batch_2 data_batch_3 batches.meta data_batch_1 data_batch_2 data_batch_3
data_batch_4 data_batch_5 readme.html test_batch data_batch_4 data_batch_5 readme.html test_batch
# Run the model on CPU only. After training, it runs the evaluation. # Run the model on CPU only. After training, it runs the evaluation.
$ python cifar10_main.py --data_dir=/prefix/to/downloaded/data/cifar-10-batches-bin \ $ python cifar10_main.py --data_dir=/prefix/to/downloaded/data/cifar-10-batches-bin \
--model_dir=/tmp/cifar10 \ --model_dir=/tmp/cifar10 \
......
...@@ -179,6 +179,7 @@ class ExamplesPerSecondHook(session_run_hook.SessionRunHook): ...@@ -179,6 +179,7 @@ class ExamplesPerSecondHook(session_run_hook.SessionRunHook):
class GpuParamServerDeviceSetter(object): class GpuParamServerDeviceSetter(object):
"""Used with tf.device() to place variables on the least loaded GPU. """Used with tf.device() to place variables on the least loaded GPU.
A common use for this class is to pass a list of GPU devices, e.g. ['gpu:0', A common use for this class is to pass a list of GPU devices, e.g. ['gpu:0',
'gpu:1','gpu:2'], as ps_devices. When each variable is placed, it will be 'gpu:1','gpu:2'], as ps_devices. When each variable is placed, it will be
placed on the least loaded gpu. All other Ops, which will be the computation placed on the least loaded gpu. All other Ops, which will be the computation
......
...@@ -28,10 +28,7 @@ import tensorflow as tf ...@@ -28,10 +28,7 @@ import tensorflow as tf
def _read_words(filename): def _read_words(filename):
with tf.gfile.GFile(filename, "r") as f: with tf.gfile.GFile(filename, "r") as f:
if sys.version_info[0] >= 3: return f.read().decode("utf-8").replace("\n", "<eos>").split()
return f.read().replace("\n", "<eos>").split()
else:
return f.read().decode("utf-8").replace("\n", "<eos>").split()
def _build_vocab(filename): def _build_vocab(filename):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment