Merge branch 'master' of github.com:tensorflow/models

dff0f0c1 · Alexander Gorban · da341f70 · 36203f09 · dff0f0c1 · dff0f0c1
Commit dff0f0c1 authored Aug 08, 2017 by Alexander Gorban
20 changed files
--- a/slim/nets/resnet_utils.py
+++ b/slim/nets/resnet_utils.py
@@ -199,7 +199,9 @@ def stack_blocks_dense(net, blocks, output_stride=None,
 def resnet_arg_scope(weight_decay=0.0001,
                     batch_norm_decay=0.997,
                     batch_norm_epsilon=1e-5,
-                     batch_norm_scale=True):
+                     batch_norm_scale=True,
+                     activation_fn=tf.nn.relu,
+                     use_batch_norm=True):
  """Defines the default ResNet arg scope.
  TODO(gpapan): The batch-normalization related default values above are
@@ -215,6 +217,8 @@ def resnet_arg_scope(weight_decay=0.0001,
      normalizing activations by their variance in batch normalization.
    batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the
      activations in the batch normalization layer.
+    activation_fn: The activation function which is used in ResNet.
+    use_batch_norm: Whether or not to use batch normalization.
  Returns:
    An `arg_scope` to use for the resnet models.
@@ -230,8 +234,8 @@ def resnet_arg_scope(weight_decay=0.0001,
      [slim.conv2d],
      weights_regularizer=slim.l2_regularizer(weight_decay),
      weights_initializer=slim.variance_scaling_initializer(),
-      activation_fn=tf.nn.relu,
+      activation_fn=activation_fn,
-      normalizer_fn=slim.batch_norm,
+      normalizer_fn=slim.batch_norm if use_batch_norm else None,
      normalizer_params=batch_norm_params):
    with slim.arg_scope([slim.batch_norm], **batch_norm_params):
      # The following implies padding='SAME' for pool1, which makes feature

--- a/slim/nets/resnet_v1.py
+++ b/slim/nets/resnet_v1.py
@@ -66,8 +66,14 @@ slim = tf.contrib.slim
 @slim.add_arg_scope
-def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
+def bottleneck(inputs,
-               outputs_collections=None, scope=None):
+               depth,
+               depth_bottleneck,
+               stride,
+               rate=1,
+               outputs_collections=None,
+               scope=None,
+               use_bounded_activations=False):
  """Bottleneck residual unit variant with BN after convolutions.
  This is the original residual unit proposed in [1]. See Fig. 1(a) of [2] for
@@ -86,6 +92,8 @@ def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
    rate: An integer, rate for atrous convolution.
    outputs_collections: Collection to add the ResNet unit output.
    scope: Optional variable_scope.
+    use_bounded_activations: Whether or not to use bounded activations. Bounded
+      activations better lend themselves to quantized inference.
  Returns:
    The ResNet unit's output.
@@ -95,8 +103,12 @@ def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
    if depth == depth_in:
      shortcut = resnet_utils.subsample(inputs, stride, 'shortcut')
    else:
-      shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride,
+      shortcut = slim.conv2d(
-                             activation_fn=None, scope='shortcut')
+          inputs,
+          depth, [1, 1],
+          stride=stride,
+          activation_fn=tf.nn.relu6 if use_bounded_activations else None,
+          scope='shortcut')
    residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1,
                           scope='conv1')
@@ -105,7 +117,12 @@ def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
    residual = slim.conv2d(residual, depth, [1, 1], stride=1,
                           activation_fn=None, scope='conv3')
-    output = tf.nn.relu(shortcut + residual)
+    if use_bounded_activations:
+      # Use clip_by_value to simulate bandpass activation.
+      residual = tf.clip_by_value(residual, -6.0, 6.0)
+      output = tf.nn.relu6(shortcut + residual)
+    else:
+      output = tf.nn.relu(shortcut + residual)
    return slim.utils.collect_named_outputs(outputs_collections,
                                            sc.original_name_scope,
@@ -119,7 +136,7 @@ def resnet_v1(inputs,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
-              spatial_squeeze=False,
+              spatial_squeeze=True,
              reuse=None,
              scope=None):
  """Generator for v1 ResNet models.

--- a/slim/nets/resnet_v1_test.py
+++ b/slim/nets/resnet_v1_test.py
@@ -251,6 +251,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
                    global_pool=True,
                    output_stride=None,
                    include_root_block=True,
+                    spatial_squeeze=True,
                    reuse=None,
                    scope='resnet_v1_small'):
    """A shallow and thin ResNet v1 for faster tests."""
@@ -266,6 +267,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
                               global_pool=global_pool,
                               output_stride=output_stride,
                               include_root_block=include_root_block,
+                               spatial_squeeze=spatial_squeeze,
                               reuse=reuse,
                               scope=scope)
@@ -276,6 +278,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      logits, end_points = self._resnet_small(inputs, num_classes,
                                              global_pool=global_pool,
+                                              spatial_squeeze=False,
                                              scope='resnet')
    self.assertTrue(logits.op.name.startswith('resnet/logits'))
    self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
@@ -307,6 +310,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
+                                         spatial_squeeze=False,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 41, 41, 4],
@@ -325,6 +329,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
                                         include_root_block=False,
+                                         spatial_squeeze=False,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 64, 64, 4],
@@ -345,6 +350,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
                                         num_classes,
                                         global_pool=global_pool,
                                         output_stride=output_stride,
+                                         spatial_squeeze=False,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 41, 41, 4],
@@ -391,6 +397,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      logits, _ = self._resnet_small(inputs, num_classes,
                                     global_pool=global_pool,
+                                     spatial_squeeze=False,
                                     scope='resnet')
    self.assertTrue(logits.op.name.startswith('resnet/logits'))
    self.assertListEqual(logits.get_shape().as_list(),

--- a/slim/nets/resnet_v2.py
+++ b/slim/nets/resnet_v2.py
@@ -115,7 +115,7 @@ def resnet_v2(inputs,
              global_pool=True,
              output_stride=None,
              include_root_block=True,
-              spatial_squeeze=False,
+              spatial_squeeze=True,
              reuse=None,
              scope=None):
  """Generator for v2 (preactivation) ResNet models.
@@ -251,7 +251,7 @@ def resnet_v2_50(inputs,
                 is_training=True,
                 global_pool=True,
                 output_stride=None,
-                 spatial_squeeze=False,
+                 spatial_squeeze=True,
                 reuse=None,
                 scope='resnet_v2_50'):
  """ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
@@ -273,7 +273,7 @@ def resnet_v2_101(inputs,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
-                  spatial_squeeze=False,
+                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v2_101'):
  """ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
@@ -295,7 +295,7 @@ def resnet_v2_152(inputs,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
-                  spatial_squeeze=False,
+                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v2_152'):
  """ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
@@ -317,7 +317,7 @@ def resnet_v2_200(inputs,
                  is_training=True,
                  global_pool=True,
                  output_stride=None,
-                  spatial_squeeze=False,
+                  spatial_squeeze=True,
                  reuse=None,
                  scope='resnet_v2_200'):
  """ResNet-200 model of [2]. See resnet_v2() for arg and return description."""

--- a/slim/nets/resnet_v2_test.py
+++ b/slim/nets/resnet_v2_test.py
@@ -251,6 +251,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
                    global_pool=True,
                    output_stride=None,
                    include_root_block=True,
+                    spatial_squeeze=True,
                    reuse=None,
                    scope='resnet_v2_small'):
    """A shallow and thin ResNet v2 for faster tests."""
@@ -266,6 +267,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
                               global_pool=global_pool,
                               output_stride=output_stride,
                               include_root_block=include_root_block,
+                               spatial_squeeze=spatial_squeeze,
                               reuse=reuse,
                               scope=scope)
@@ -276,6 +278,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      logits, end_points = self._resnet_small(inputs, num_classes,
                                              global_pool=global_pool,
+                                              spatial_squeeze=False,
                                              scope='resnet')
    self.assertTrue(logits.op.name.startswith('resnet/logits'))
    self.assertListEqual(logits.get_shape().as_list(), [2, 1, 1, num_classes])
@@ -307,6 +310,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
+                                         spatial_squeeze=False,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 41, 41, 4],
@@ -325,6 +329,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
      _, end_points = self._resnet_small(inputs, num_classes,
                                         global_pool=global_pool,
                                         include_root_block=False,
+                                         spatial_squeeze=False,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 64, 64, 4],
@@ -345,6 +350,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
                                         num_classes,
                                         global_pool=global_pool,
                                         output_stride=output_stride,
+                                         spatial_squeeze=False,
                                         scope='resnet')
      endpoint_to_shape = {
          'resnet/block1': [2, 41, 41, 4],
@@ -393,6 +399,7 @@ class ResnetCompleteNetworkTest(tf.test.TestCase):
    with slim.arg_scope(resnet_utils.resnet_arg_scope()):
      logits, _ = self._resnet_small(inputs, num_classes,
                                     global_pool=global_pool,
+                                     spatial_squeeze=False,
                                     scope='resnet')
    self.assertTrue(logits.op.name.startswith('resnet/logits'))
    self.assertListEqual(logits.get_shape().as_list(),

--- a/slim/nets/vgg.py
+++ b/slim/nets/vgg.py
@@ -87,8 +87,9 @@ def vgg_a(inputs,
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
-      get a prediction map downsampled by a factor of 32 as an output. Otherwise,
+      get a prediction map downsampled by a factor of 32 as an output.
-      the output prediction map will be (input / 32) - 6 in case of 'VALID' padding.
+      Otherwise, the output prediction map will be (input / 32) - 6 in case of
+      'VALID' padding.
  Returns:
    the last op containing the log predictions and end_points dict.
@@ -152,8 +153,9 @@ def vgg_16(inputs,
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
-      get a prediction map downsampled by a factor of 32 as an output. Otherwise,
+      get a prediction map downsampled by a factor of 32 as an output.
-      the output prediction map will be (input / 32) - 6 in case of 'VALID' padding.
+      Otherwise, the output prediction map will be (input / 32) - 6 in case of
+      'VALID' padding.
  Returns:
    the last op containing the log predictions and end_points dict.
@@ -217,8 +219,10 @@ def vgg_19(inputs,
    fc_conv_padding: the type of padding to use for the fully connected layer
      that is implemented as a convolutional layer. Use 'SAME' padding if you
      are applying the network in a fully convolutional manner and want to
-      get a prediction map downsampled by a factor of 32 as an output. Otherwise,
+      get a prediction map downsampled by a factor of 32 as an output.
-      the output prediction map will be (input / 32) - 6 in case of 'VALID' padding.
+      Otherwise, the output prediction map will be (input / 32) - 6 in case of
+      'VALID' padding.
  Returns:
    the last op containing the log predictions and end_points dict.

--- a/slim/preprocessing/preprocessing_factory.py
+++ b/slim/preprocessing/preprocessing_factory.py
@@ -57,6 +57,11 @@ def get_preprocessing(name, is_training=False):
      'resnet_v1_50': vgg_preprocessing,
      'resnet_v1_101': vgg_preprocessing,
      'resnet_v1_152': vgg_preprocessing,
+      'resnet_v1_200': vgg_preprocessing,
+      'resnet_v2_50': vgg_preprocessing,
+      'resnet_v2_101': vgg_preprocessing,
+      'resnet_v2_152': vgg_preprocessing,
+      'resnet_v2_200': vgg_preprocessing,
      'vgg': vgg_preprocessing,
      'vgg_a': vgg_preprocessing,
      'vgg_16': vgg_preprocessing,

--- a/slim/scripts/export_mobilenet.sh
+++ b/slim/scripts/export_mobilenet.sh
+#!/bin/bash
+# This script prepares the various different versions of MobileNet models for
+# use in a mobile application. If you don't specify your own trained checkpoint
+# file, it will download pretrained checkpoints for ImageNet. You'll also need
+# to have a copy of the TensorFlow source code to run some of the commands,
+# by default it will be looked for in ./tensorflow, but you can set the
+# TENSORFLOW_PATH environment variable before calling the script if your source
+# is in a different location.
+# The main slim/nets/mobilenet_v1.md description has more details about the
+# model, but the main points are that it comes in four size versions, 1.0, 0.75,
+# 0.50, and 0.25, which controls the number of parameters and so the file size
+# of the model, and the input image size, which can be 224, 192, 160, or 128
+# pixels, and affects the amount of computation needed, and the latency.
+# Here's an example generating a frozen model from pretrained weights:
+# 
+set -e
+print_usage () {
+  echo "Creates a frozen mobilenet model suitable for mobile use"
+  echo "Usage:"
+  echo "$0 <mobilenet version> <input size> [checkpoint path]"
+} 
+MOBILENET_VERSION=$1
+IMAGE_SIZE=$2
+CHECKPOINT=$3
+if [[ ${MOBILENET_VERSION} = "1.0" ]]; then
+   SLIM_NAME=mobilenet_v1
+elif [[ ${MOBILENET_VERSION} = "0.75" ]]; then
+   SLIM_NAME=mobilenet_v1_075
+elif [[ ${MOBILENET_VERSION} = "0.50" ]]; then
+   SLIM_NAME=mobilenet_v1_050
+elif [[ ${MOBILENET_VERSION} = "0.25" ]]; then
+   SLIM_NAME=mobilenet_v1_025
+else
+  echo "Bad mobilenet version, should be one of 1.0, 0.75, 0.50, or 0.25"
+  print_usage
+  exit 1
+fi
+if [[ ${IMAGE_SIZE} -ne "224" ]] && [[ ${IMAGE_SIZE} -ne "192" ]] && [[ ${IMAGE_SIZE} -ne "160" ]] && [[ ${IMAGE_SIZE} -ne "128" ]]; then
+  echo "Bad input image size, should be one of 224, 192, 160, or 128"
+  print_usage
+  exit 1
+fi
+if [[ ${TENSORFLOW_PATH} -eq "" ]]; then
+   TENSORFLOW_PATH=../tensorflow
+fi
+if [[ ! -d ${TENSORFLOW_PATH} ]]; then
+   echo "TensorFlow source folder not found. You should download the source and then set"
+   echo "the TENSORFLOW_PATH environment variable to point to it, like this:"
+   echo "export TENSORFLOW_PATH=/my/path/to/tensorflow"
+   print_usage
+   exit 1
+fi
+MODEL_FOLDER=/tmp/mobilenet_v1_${MOBILENET_VERSION}_${IMAGE_SIZE}
+if [[ -d ${MODEL_FOLDER} ]]; then
+  echo "Model folder ${MODEL_FOLDER} already exists!"
+  echo "If you want to overwrite it, then 'rm -rf ${MODEL_FOLDER}' first."
+  print_usage
+  exit 1
+fi
+mkdir ${MODEL_FOLDER}
+if [[ ${CHECKPOINT} = "" ]]; then
+  echo "*******"
+  echo "Downloading pretrained weights"
+  echo "*******"
+  curl "http://download.tensorflow.org/models/mobilenet_v1_${MOBILENET_VERSION}_${IMAGE_SIZE}_2017_06_14.tar.gz" \
+    -o ${MODEL_FOLDER}/checkpoints.tar.gz
+  tar xzf ${MODEL_FOLDER}/checkpoints.tar.gz --directory ${MODEL_FOLDER}
+  CHECKPOINT=${MODEL_FOLDER}/mobilenet_v1_${MOBILENET_VERSION}_${IMAGE_SIZE}.ckpt
+fi
+echo "*******"
+echo "Exporting graph architecture to ${MODEL_FOLDER}/unfrozen_graph.pb"
+echo "*******"
+bazel run slim:export_inference_graph -- \
+  --model_name=${SLIM_NAME} --image_size=${IMAGE_SIZE} --logtostderr \
+  --output_file=${MODEL_FOLDER}/unfrozen_graph.pb --dataset_dir=${MODEL_FOLDER}
+cd ../tensorflow
+echo "*******"
+echo "Freezing graph to ${MODEL_FOLDER}/frozen_graph.pb"
+echo "*******"
+bazel run tensorflow/python/tools:freeze_graph -- \
+  --input_graph=${MODEL_FOLDER}/unfrozen_graph.pb \
+  --input_checkpoint=${CHECKPOINT} \
+  --input_binary=true --output_graph=${MODEL_FOLDER}/frozen_graph.pb \
+  --output_node_names=MobilenetV1/Predictions/Reshape_1
+echo "Quantizing weights to ${MODEL_FOLDER}/quantized_graph.pb"
+bazel run tensorflow/tools/graph_transforms:transform_graph -- \
+  --in_graph=${MODEL_FOLDER}/frozen_graph.pb \
+  --out_graph=${MODEL_FOLDER}/quantized_graph.pb \
+  --inputs=input --outputs=MobilenetV1/Predictions/Reshape_1 \
+  --transforms='fold_constants fold_batch_norms quantize_weights'
+echo "*******"
+echo "Running label_image using the graph"
+echo "*******"
+bazel build tensorflow/examples/label_image:label_image
+bazel-bin/tensorflow/examples/label_image/label_image \
+  --input_layer=input --output_layer=MobilenetV1/Predictions/Reshape_1 \
+  --graph=${MODEL_FOLDER}/quantized_graph.pb --input_mean=-127 --input_std=127 \
+  --image=tensorflow/examples/label_image/data/grace_hopper.jpg \
+  --input_width=${IMAGE_SIZE} --input_height=${IMAGE_SIZE} --labels=${MODEL_FOLDER}/labels.txt
+echo "*******"
+echo "Saved graphs to ${MODEL_FOLDER}/frozen_graph.pb and ${MODEL_FOLDER}/quantized_graph.pb"
+echo "*******"
--- a/slim/scripts/finetune_inception_resnet_v2_on_flowers.sh
+++ b/slim/scripts/finetune_inception_resnet_v2_on_flowers.sh
+#!/bin/bash
+#
+# This script performs the following operations:
+# 1. Downloads the Flowers dataset
+# 2. Fine-tunes an Inception Resnet V2 model on the Flowers training set.
+# 3. Evaluates the model on the Flowers validation set.
+#
+# Usage:
+# cd slim
+# ./slim/scripts/finetune_inception_resnet_v2_on_flowers.sh
+set -e
+# Where the pre-trained Inception Resnet V2 checkpoint is saved to.
+PRETRAINED_CHECKPOINT_DIR=/tmp/checkpoints
+# Where the pre-trained Inception Resnet V2 checkpoint is saved to.
+MODEL_NAME=inception_resnet_v2
+# Where the training (fine-tuned) checkpoint and logs will be saved to.
+TRAIN_DIR=/tmp/flowers-models/${MODEL_NAME}
+# Where the dataset is saved to.
+DATASET_DIR=/tmp/flowers
+# Download the pre-trained checkpoint.
+if [ ! -d "$PRETRAINED_CHECKPOINT_DIR" ]; then
+  mkdir ${PRETRAINED_CHECKPOINT_DIR}
+fi
+if [ ! -f ${PRETRAINED_CHECKPOINT_DIR}/${MODEL_NAME}.ckpt ]; then
+  wget http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz
+  tar -xvf inception_resnet_v2_2016_08_30.tar.gz
+  mv inception_resnet_v2.ckpt ${PRETRAINED_CHECKPOINT_DIR}/${MODEL_NAME}.ckpt
+  rm inception_resnet_v2_2016_08_30.tar.gz
+fi
+# Download the dataset
+python download_and_convert_data.py \
+  --dataset_name=flowers \
+  --dataset_dir=${DATASET_DIR}
+# Fine-tune only the new layers for 1000 steps.
+python train_image_classifier.py \
+  --train_dir=${TRAIN_DIR} \
+  --dataset_name=flowers \
+  --dataset_split_name=train \
+  --dataset_dir=${DATASET_DIR} \
+  --model_name=${MODEL_NAME} \
+  --checkpoint_path=${PRETRAINED_CHECKPOINT_DIR}/${MODEL_NAME}.ckpt \
+  --checkpoint_exclude_scopes=InceptionResnetV2/Logits,InceptionResnetV2/AuxLogits \
+  --trainable_scopes=InceptionResnetV2/Logits,InceptionResnetV2/AuxLogits \
+  --max_number_of_steps=1000 \
+  --batch_size=32 \
+  --learning_rate=0.01 \
+  --learning_rate_decay_type=fixed \
+  --save_interval_secs=60 \
+  --save_summaries_secs=60 \
+  --log_every_n_steps=10 \
+  --optimizer=rmsprop \
+  --weight_decay=0.00004
+# Run evaluation.
+python eval_image_classifier.py \
+  --checkpoint_path=${TRAIN_DIR} \
+  --eval_dir=${TRAIN_DIR} \
+  --dataset_name=flowers \
+  --dataset_split_name=validation \
+  --dataset_dir=${DATASET_DIR} \
+  --model_name=${MODEL_NAME}
+# Fine-tune all the new layers for 500 steps.
+python train_image_classifier.py \
+  --train_dir=${TRAIN_DIR}/all \
+  --dataset_name=flowers \
+  --dataset_split_name=train \
+  --dataset_dir=${DATASET_DIR} \
+  --model_name=${MODEL_NAME} \
+  --checkpoint_path=${TRAIN_DIR} \
+  --max_number_of_steps=500 \
+  --batch_size=32 \
+  --learning_rate=0.0001 \
+  --learning_rate_decay_type=fixed \
+  --save_interval_secs=60 \
+  --save_summaries_secs=60 \
+  --log_every_n_steps=10 \
+  --optimizer=rmsprop \
+  --weight_decay=0.00004
+# Run evaluation.
+python eval_image_classifier.py \
+  --checkpoint_path=${TRAIN_DIR}/all \
+  --eval_dir=${TRAIN_DIR}/all \
+  --dataset_name=flowers \
+  --dataset_split_name=validation \
+  --dataset_dir=${DATASET_DIR} \
+  --model_name=${MODEL_NAME}
--- a/slim/scripts/finetune_inception_v3_on_flowers.sh
+++ b/slim/scripts/finetune_inception_v3_on_flowers.sh
@@ -7,7 +7,7 @@
 #
 # Usage:
 # cd slim
-# ./slim/scripts/finetune_inceptionv3_on_flowers.sh
+# ./slim/scripts/finetune_inception_v3_on_flowers.sh
 set -e
 # Where the pre-trained InceptionV3 checkpoint is saved to.

--- a/slim/scripts/train_cifarnet_on_cifar10.sh
+++ b/slim/scripts/train_cifarnet_on_cifar10.sh
@@ -7,7 +7,7 @@
 #
 # Usage:
 # cd slim
-# ./scripts/train_cifar_net_on_mnist.sh
+# ./scripts/train_cifarnet_on_cifar10.sh
 set -e
 # Where the checkpoint and logs will be saved to.

--- a/slim/slim_walkthrough.ipynb
+++ b/slim/slim_walkthrough.ipynb
@@ -818,7 +818,7 @@
    "import tensorflow as tf\n",
    "\n",
    "try:\n",
-    "    import urllib2\n",
+    "    import urllib2 as urllib\n",
    "except ImportError:\n",
    "    import urllib.request as urllib\n",
    "\n",

--- a/syntaxnet/dragnn/python/BUILD
+++ b/syntaxnet/dragnn/python/BUILD
@@ -89,6 +89,7 @@ py_library(
    srcs = ["network_units.py"],
    deps = [
        ":dragnn_ops",
+        "//syntaxnet:syntaxnet_ops",
        "//syntaxnet/util:check",
        "//syntaxnet/util:pyregistry",
        "@org_tensorflow//tensorflow:tensorflow_py",

--- a/syntaxnet/dragnn/python/network_units.py
+++ b/syntaxnet/dragnn/python/network_units.py
@@ -15,9 +15,11 @@
 """Basic network units used in assembling DRAGNN graphs."""
-from abc import ABCMeta
+from __future__ import absolute_import
-from abc import abstractmethod
+from __future__ import division
+from __future__ import print_function
+import abc
 import tensorflow as tf
 from tensorflow.python.ops import nn
@@ -25,6 +27,7 @@ from tensorflow.python.ops import tensor_array_ops as ta
 from tensorflow.python.platform import tf_logging as logging
 from dragnn.python import dragnn_ops
+from syntaxnet import syntaxnet_ops
 from syntaxnet.util import check
 from syntaxnet.util import registry
@@ -135,11 +138,11 @@ def add_embeddings(channel_id, feature_spec, seed=None):
      raise RuntimeError('vocab resource contains more than one part:\n%s',
                         str(feature_spec.vocab))
    seed1, seed2 = tf.get_seed(seed)
-    embeddings = dragnn_ops.dragnn_embedding_initializer(
+    embeddings = syntaxnet_ops.word_embedding_initializer(
-        embedding_input=feature_spec.pretrained_embedding_matrix.part[0]
+        vectors=feature_spec.pretrained_embedding_matrix.part[0].file_pattern,
-        .file_pattern,
+        vocabulary=feature_spec.vocab.part[0].file_pattern,
-        vocab=feature_spec.vocab.part[0].file_pattern,
+        num_special_embeddings=1,
-        scaling_coefficient=1.0,
+        embedding_init=1.0,
        seed=seed1,
        seed2=seed2)
    return tf.get_variable(name, initializer=tf.reshape(embeddings, shape))
@@ -626,7 +629,7 @@ class NetworkUnitInterface(object):
    layers (list): List of Layer objects to track network layers that should
      be written to Tensors during training and inference.
  """
-  __metaclass__ = ABCMeta  # required for @abstractmethod
+  __metaclass__ = abc.ABCMeta  # required for @abstractmethod
  def __init__(self, component, init_layers=None, init_context_layers=None):
    """Initializes parameters for embedding matrices.
@@ -738,7 +741,7 @@ class NetworkUnitInterface(object):
              [attention_hidden_layer_size, component.num_actions],
              initializer=tf.random_normal_initializer(stddev=1e-4)))
-  @abstractmethod
+  @abc.abstractmethod
  def create(self,
             fixed_embeddings,
             linked_embeddings,

--- a/syntaxnet/syntaxnet/BUILD
+++ b/syntaxnet/syntaxnet/BUILD
@@ -747,6 +747,15 @@ py_library(
    data = [":parser_ops.so"],
 )
+py_library(
+    name = "syntaxnet_ops",
+    srcs = ["syntaxnet_ops.py"],
+    deps = [
+        ":parser_ops",
+        ":load_parser_ops_py",
+    ],
+)
 py_library(
    name = "graph_builder",
    srcs = ["graph_builder.py"],

--- a/syntaxnet/syntaxnet/ops/parser_ops.cc
+++ b/syntaxnet/syntaxnet/ops/parser_ops.cc
@@ -247,7 +247,10 @@ weights: vector of weight extracted from the SparseFeatures proto.
 REGISTER_OP("WordEmbeddingInitializer")
    .Output("word_embeddings: float")
    .Attr("vectors: string")
-    .Attr("task_context: string")
+    .Attr("task_context: string = ''")
+    .Attr("vocabulary: string = ''")
+    .Attr("cache_vectors_locally: bool = true")
+    .Attr("num_special_embeddings: int = 3")
    .Attr("embedding_init: float = 1.0")
    .Attr("seed: int = 0")
    .Attr("seed2: int = 0")
@@ -255,9 +258,17 @@ REGISTER_OP("WordEmbeddingInitializer")
 Reads word embeddings from an sstable of dist_belief.TokenEmbedding protos for
 every word specified in a text vocabulary file.
-word_embeddings: a tensor containing word embeddings from the specified sstable.
+word_embeddings: a tensor containing word embeddings from the specified table.
-vectors: path to recordio of word embedding vectors.
+vectors: path to TF record file of word embedding vectors.
-task_context: file path at which to read the task context.
+task_context: file path at which to read the task context, for its "word-map"
+  input.  Exactly one of `task_context` or `vocabulary` must be specified.
+vocabulary: path to vocabulary file, which contains one unique word per line, in
+  order.  Exactly one of `task_context` or `vocabulary` must be specified.
+cache_vectors_locally: Whether to cache the vectors file to a local temp file
+  before parsing it.  This greatly reduces initialization time when the vectors
+  are stored remotely, but requires that "/tmp" has sufficient space.
+num_special_embeddings: Number of special embeddings to allocate, in addition to
+  those allocated for real words.
 embedding_init: embedding vectors that are not found in the input sstable are
  initialized randomly from a normal distribution with zero mean and
  std dev = embedding_init / sqrt(embedding_size).

--- a/syntaxnet/syntaxnet/reader_ops.cc
+++ b/syntaxnet/syntaxnet/reader_ops.cc
@@ -34,9 +34,11 @@ limitations under the License.
 #include "tensorflow/core/framework/op_kernel.h"
 #include "tensorflow/core/framework/tensor.h"
 #include "tensorflow/core/framework/tensor_shape.h"
+#include "tensorflow/core/lib/core/errors.h"
 #include "tensorflow/core/lib/core/status.h"
 #include "tensorflow/core/lib/io/table.h"
 #include "tensorflow/core/lib/io/table_options.h"
+#include "tensorflow/core/lib/strings/str_util.h"
 #include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/env.h"
@@ -439,14 +441,18 @@ class WordEmbeddingInitializer : public OpKernel {
 public:
  explicit WordEmbeddingInitializer(OpKernelConstruction *context)
      : OpKernel(context) {
-    string file_path, data;
+    OP_REQUIRES_OK(context,
-    OP_REQUIRES_OK(context, context->GetAttr("task_context", &file_path));
+                   context->GetAttr("task_context", &task_context_path_));
-    OP_REQUIRES_OK(context, ReadFileToString(tensorflow::Env::Default(),
+    OP_REQUIRES_OK(context, context->GetAttr("vocabulary", &vocabulary_path_));
-                                             file_path, &data));
+    OP_REQUIRES(
-    OP_REQUIRES(context,
+        context, task_context_path_.empty() != vocabulary_path_.empty(),
-                TextFormat::ParseFromString(data, task_context_.mutable_spec()),
+        InvalidArgument(
-                InvalidArgument("Could not parse task context at ", file_path));
+            "Exactly one of task_context or vocabulary must be specified"));
    OP_REQUIRES_OK(context, context->GetAttr("vectors", &vectors_path_));
+    OP_REQUIRES_OK(context, context->GetAttr("cache_vectors_locally",
+                                             &cache_vectors_locally_));
+    OP_REQUIRES_OK(context, context->GetAttr("num_special_embeddings",
+                                             &num_special_embeddings_));
    OP_REQUIRES_OK(context,
                   context->GetAttr("embedding_init", &embedding_init_));
@@ -462,43 +468,117 @@ class WordEmbeddingInitializer : public OpKernel {
  }
  void Compute(OpKernelContext *context) override {
-    // Loads words from vocabulary with mapping to ids.
+    std::unordered_map<string, int64> vocab;
-    string path = TaskContext::InputFile(*task_context_.GetInput("word-map"));
+    OP_REQUIRES_OK(context, LoadVocabulary(&vocab));
-    const TermFrequencyMap *word_map =
-        SharedStoreUtils::GetWithDefaultName<TermFrequencyMap>(path, 0, 0);
-    unordered_map<string, int64> vocab;
-    for (int i = 0; i < word_map->Size(); ++i) {
-      vocab[word_map->GetTerm(i)] = i;
-    }
-    // Creates a reader pointing to a local copy of the vectors recordio.
+    string vectors_path = vectors_path_;
-    string tmp_vectors_path;
+    if (cache_vectors_locally_) {
-    OP_REQUIRES_OK(context, CopyToTmpPath(vectors_path_, &tmp_vectors_path));
+      OP_REQUIRES_OK(context, CopyToTmpPath(vectors_path_, &vectors_path));
-    ProtoRecordReader reader(tmp_vectors_path);
+    }
+    ProtoRecordReader reader(vectors_path);
-    // Loads the embedding vectors into a matrix.
+    // Load the embedding vectors into a matrix.  Since the |embedding_matrix|
+    // output cannot be allocated until the embedding dimension is known, delay
+    // allocation until the first iteration of the loop.
    Tensor *embedding_matrix = nullptr;
    TokenEmbedding embedding;
    while (reader.Read(&embedding) == tensorflow::Status::OK()) {
      if (embedding_matrix == nullptr) {
-        const int embedding_size = embedding.vector().values_size();
+        OP_REQUIRES_OK(context,
-        OP_REQUIRES_OK(
+                       InitRandomEmbeddingMatrix(vocab, embedding, context,
-            context, context->allocate_output(
+                                                 &embedding_matrix));
-                         0, TensorShape({word_map->Size() + 3, embedding_size}),
-                         &embedding_matrix));
-        auto matrix = embedding_matrix->matrix<float>();
-        Eigen::internal::NormalRandomGenerator<float> prng(seed_);
-        matrix =
-            matrix.random(prng) * (embedding_init_ / sqrtf(embedding_size));
      }
      if (vocab.find(embedding.token()) != vocab.end()) {
        SetNormalizedRow(embedding.vector(), vocab[embedding.token()],
                         embedding_matrix);
      }
    }
+    // The vectors file might not contain any embeddings (perhaps due to read
+    // errors), in which case the |embedding_matrix| output is never allocated.
+    // Signal this error early instead of letting downstream ops complain about
+    // a missing input.
+    OP_REQUIRES(
+        context, embedding_matrix != nullptr,
+        InvalidArgument(tensorflow::strings::StrCat(
+            "found no pretrained embeddings in vectors=", vectors_path_,
+            " vocabulary=", vocabulary_path_, " vocab_size=", vocab.size())));
  }
 private:
+  // Loads the vocabulary from the task context or vocabulary.
+  tensorflow::Status LoadVocabulary(
+      std::unordered_map<string, int64> *vocabulary) const {
+    if (!task_context_path_.empty()) {
+      return LoadVocabularyFromTaskContext(vocabulary);
+    } else {
+      return LoadVocabularyFromFile(vocabulary);
+    }
+  }
+  // Loads the |vocabulary| from the "word-map" input of the task context at
+  // |task_context_path_|, or returns non-OK on error.
+  tensorflow::Status LoadVocabularyFromTaskContext(
+      std::unordered_map<string, int64> *vocabulary) const {
+    vocabulary->clear();
+    string textproto;
+    TF_RETURN_IF_ERROR(ReadFileToString(tensorflow::Env::Default(),
+                                        task_context_path_, &textproto));
+    TaskContext task_context;
+    if (!TextFormat::ParseFromString(textproto, task_context.mutable_spec())) {
+      return InvalidArgument("Could not parse task context at ",
+                             task_context_path_);
+    }
+    const string path =
+        TaskContext::InputFile(*task_context.GetInput("word-map"));
+    const TermFrequencyMap *word_map =
+        SharedStoreUtils::GetWithDefaultName<TermFrequencyMap>(path, 0, 0);
+    for (int i = 0; i < word_map->Size(); ++i) {
+      (*vocabulary)[word_map->GetTerm(i)] = i;
+    }
+    return tensorflow::Status::OK();
+  }
+  // Loads the |vocabulary| from the |vocabulary_path_| file, which contains one
+  // word per line in order, or returns non-OK on error.
+  tensorflow::Status LoadVocabularyFromFile(
+      std::unordered_map<string, int64> *vocabulary) const {
+    vocabulary->clear();
+    string text;
+    TF_RETURN_IF_ERROR(
+        ReadFileToString(tensorflow::Env::Default(), vocabulary_path_, &text));
+    // Chomp a trailing newline, if any, to avoid producing a spurious empty
+    // term at the end of the vocabulary file.
+    if (!text.empty() && text.back() == '\n') text.pop_back();
+    for (const string &line : tensorflow::str_util::Split(text, "\n")) {
+      if (vocabulary->find(line) != vocabulary->end()) {
+        return InvalidArgument("Vocabulary file at ", vocabulary_path_,
+                               " contains multiple instances of term: ", line);
+      }
+      const int64 index = vocabulary->size();
+      (*vocabulary)[line] = index;
+    }
+    return tensorflow::Status::OK();
+  }
+  // Allocates the |embedding_matrix| based on the |vocabulary| and |embedding|
+  // and initializes it to random values, or returns non-OK on error.
+  tensorflow::Status InitRandomEmbeddingMatrix(
+      const std::unordered_map<string, int64> &vocabulary,
+      const TokenEmbedding &embedding, OpKernelContext *context,
+      Tensor **embedding_matrix) const {
+    const int rows = vocabulary.size() + num_special_embeddings_;
+    const int columns = embedding.vector().values_size();
+    TF_RETURN_IF_ERROR(context->allocate_output(0, TensorShape({rows, columns}),
+                                                embedding_matrix));
+    auto matrix = (*embedding_matrix)->matrix<float>();
+    Eigen::internal::NormalRandomGenerator<float> prng(seed_);
+    matrix = matrix.random(prng) * (embedding_init_ / sqrtf(columns));
+    return tensorflow::Status::OK();
+  }
  // Sets embedding_matrix[row] to a normalized version of the given vector.
  void SetNormalizedRow(const TokenEmbedding::Vector &vector, const int row,
                        Tensor *embedding_matrix) {
@@ -547,8 +627,15 @@ class WordEmbeddingInitializer : public OpKernel {
    }
  }
-  // Task context used to configure this op.
+  // Path to the task context or vocabulary.  Exactly one must be specified.
-  TaskContext task_context_;
+  string task_context_path_;
+  string vocabulary_path_;
+  // Whether to cache the vectors to a local temp file, to reduce I/O latency.
+  bool cache_vectors_locally_ = true;
+  // Number of special embeddings to allocate.
+  int num_special_embeddings_ = 3;
  // Seed for random initialization.
  uint64 seed_ = 0;

--- a/syntaxnet/syntaxnet/reader_ops_test.py
+++ b/syntaxnet/syntaxnet/reader_ops_test.py
@@ -15,6 +15,7 @@
 """Tests for reader_ops."""
+# pylint: disable=no-name-in-module,unused-import,g-bad-import-order,maybe-no-member,no-member,g-importing-member
 import os.path
 import numpy as np
@@ -29,7 +30,6 @@ from syntaxnet import graph_builder
 from syntaxnet import sparse_pb2
 from syntaxnet.ops import gen_parser_ops
 FLAGS = tf.app.flags.FLAGS
 if not hasattr(FLAGS, 'test_srcdir'):
  FLAGS.test_srcdir = ''
@@ -220,6 +220,76 @@ class ParsingReaderOpsTest(test_util.TensorFlowTestCase):
        self.assertEqual(tf.shape(embeddings2)[1].eval(), 3)
        self.assertAllEqual(embeddings1.eval(), embeddings2.eval())
+  def testWordEmbeddingInitializerFailIfNeitherTaskContextOrVocabulary(self):
+    with self.test_session():
+      with self.assertRaises(Exception):
+        gen_parser_ops.word_embedding_initializer(vectors='/dev/null').eval()
+  def testWordEmbeddingInitializerFailIfBothTaskContextAndVocabulary(self):
+    with self.test_session():
+      with self.assertRaises(Exception):
+        gen_parser_ops.word_embedding_initializer(
+            vectors='/dev/null',
+            task_context='/dev/null',
+            vocabulary='/dev/null').eval()
+  def testWordEmbeddingInitializerVocabularyFile(self):
+    records_path = os.path.join(FLAGS.test_tmpdir, 'records3')
+    writer = tf.python_io.TFRecordWriter(records_path)
+    writer.write(self._token_embedding('a', [1, 2, 3]))
+    writer.write(self._token_embedding('b', [2, 3, 4]))
+    writer.write(self._token_embedding('c', [3, 4, 5]))
+    writer.write(self._token_embedding('d', [4, 5, 6]))
+    writer.write(self._token_embedding('e', [5, 6, 7]))
+    del writer
+    vocabulary_path = os.path.join(FLAGS.test_tmpdir, 'vocabulary3')
+    with open(vocabulary_path, 'w') as vocabulary_file:
+      vocabulary_file.write('a\nc\ne\nx\n')  # 'x' not in pretrained embeddings
+    # Enumerate a variety of configurations.
+    for cache_vectors_locally in [False, True]:
+      for num_special_embeddings in [None, 1, 2, 5]:  # None = use default of 3
+        with self.test_session():
+          embeddings = gen_parser_ops.word_embedding_initializer(
+              vectors=records_path,
+              vocabulary=vocabulary_path,
+              cache_vectors_locally=cache_vectors_locally,
+              num_special_embeddings=num_special_embeddings)
+          # Expect 4 embeddings from the vocabulary plus special embeddings.
+          expected_num_embeddings = 4 + (num_special_embeddings or 3)
+          self.assertAllEqual([expected_num_embeddings, 3],
+                              tf.shape(embeddings).eval())
+          # The first 3 embeddings should be pretrained.
+          norm_a = (1.0 + 4.0 + 9.0) ** 0.5
+          norm_c = (9.0 + 16.0 + 25.0) ** 0.5
+          norm_e = (25.0 + 36.0 + 49.0) ** 0.5
+          self.assertAllClose([[1.0 / norm_a, 2.0 / norm_a, 3.0 / norm_a],
+                               [3.0 / norm_c, 4.0 / norm_c, 5.0 / norm_c],
+                               [5.0 / norm_e, 6.0 / norm_e, 7.0 / norm_e]],
+                              embeddings[:3].eval())
+  def testWordEmbeddingInitializerVocabularyFileWithDuplicates(self):
+    records_path = os.path.join(FLAGS.test_tmpdir, 'records4')
+    writer = tf.python_io.TFRecordWriter(records_path)
+    writer.write(self._token_embedding('a', [1, 2, 3]))
+    writer.write(self._token_embedding('b', [2, 3, 4]))
+    writer.write(self._token_embedding('c', [3, 4, 5]))
+    writer.write(self._token_embedding('d', [4, 5, 6]))
+    writer.write(self._token_embedding('e', [5, 6, 7]))
+    del writer
+    vocabulary_path = os.path.join(FLAGS.test_tmpdir, 'vocabulary4')
+    with open(vocabulary_path, 'w') as vocabulary_file:
+      vocabulary_file.write('a\nc\ne\nx\ny\nx')  # 'x' duplicated
+    with self.test_session():
+      with self.assertRaises(Exception):
+        gen_parser_ops.word_embedding_initializer(
+            vectors=records_path, vocabulary=vocabulary_path).eval()
 if __name__ == '__main__':
  googletest.main()
--- a/syntaxnet/syntaxnet/syntaxnet_ops.py
+++ b/syntaxnet/syntaxnet/syntaxnet_ops.py
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Imports the SyntaxNet ops and their C++ implementations."""
+from syntaxnet.ops.gen_parser_ops import *  # pylint: disable=wildcard-import
+import syntaxnet.load_parser_ops
--- a/transformer/README.md
+++ b/transformer/README.md
@@ -8,7 +8,7 @@ The Spatial Transformer Network [1] allows the spatial manipulation of data with
 ### API 
-A Spatial Transformer Network implemented in Tensorflow 0.7 and based on [2].
+A Spatial Transformer Network implemented in Tensorflow 1.0 and based on [2].
 #### How to use