Merge branch 'master' into dataset

f8e854b5 · Sergii Khomenko · GitHub · 52c7c53e · 31adae53 · f8e854b5
Unverified Commit f8e854b5 authored Feb 02, 2018 by Sergii Khomenko Committed by GitHub Feb 02, 2018
20 changed files
--- a/official/README.md
+++ b/official/README.md
 # TensorFlow Official Models
-The TensorFlow official models are a collection of example models that use TensorFlow's high-level APIs. They are intended to be well-maintained, tested, and kept up to date with the latest stable TensorFlow API. They should also be reasonably optimized for fast performance while still being easy to read.
+The TensorFlow official models are a collection of example models that use TensorFlow's high-level APIs. They are intended to be well-maintained, tested, and kept up to date with the latest TensorFlow API. They should also be reasonably optimized for fast performance while still being easy to read.
+The master branch of the models are **in development**, and they target the [nightly binaries](https://github.com/tensorflow/tensorflow#installation) built from the [master branch of TensorFlow](https://github.com/tensorflow/tensorflow/tree/master).
+**Stable versions** of the official models targeting releases of TensorFlow are available as tagged branches or [downloadable releases](https://github.com/tensorflow/models/releases). Model repository version numbers match the target TensorFlow release, such that [branch r1.4.0](https://github.com/tensorflow/models/tree/r1.4.0) and [release v1.4.0](https://github.com/tensorflow/models/releases/tag/v1.4.0) are compatible with [TensorFlow v1.4.0](https://github.com/tensorflow/tensorflow/releases/tag/v1.4.0).
+If you are on a version of TensorFlow earlier than v1.4, please [update your installation](https://www.tensorflow.org/install/).
-Currently the models are compatible with TensorFlow 1.4. If you are on an earlier version please [update your installation](https://www.tensorflow.org/install/).
 ---

--- a/official/mnist/dataset.py
+++ b/official/mnist/dataset.py
@@ -89,15 +89,14 @@ def dataset(directory, images_file, labels_file):
    image = tf.reshape(image, [784])
    return image / 255.0
-  def one_hot_label(label):
+  def decode_label(label):
-    label = tf.decode_raw(label, tf.uint8)  # tf.string -> tf.uint8
+    label = tf.decode_raw(label, tf.uint8)  # tf.string -> [tf.uint8]
-    label = tf.reshape(label, [])  # label is a scalar
+    return tf.to_int32(label)
-    return tf.one_hot(label, 10)
  images = tf.data.FixedLengthRecordDataset(
      images_file, 28 * 28, header_bytes=16).map(decode_image)
  labels = tf.data.FixedLengthRecordDataset(
-      labels_file, 1, header_bytes=8).map(one_hot_label)
+      labels_file, 1, header_bytes=8).map(decode_label)
  return tf.data.Dataset.zip((images, labels))

--- a/official/mnist/mnist.py
+++ b/official/mnist/mnist.py
@@ -96,10 +96,15 @@ def model_fn(features, labels, mode, params):
        })
  if mode == tf.estimator.ModeKeys.TRAIN:
    optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
+    # If we are running multi-GPU, we need to wrap the optimizer.
+    if params.get('multi_gpu'):
+      optimizer = tf.contrib.estimator.TowerOptimizer(optimizer)
    logits = model(image, training=True)
-    loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
+    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    accuracy = tf.metrics.accuracy(
-        labels=tf.argmax(labels, axis=1), predictions=tf.argmax(logits, axis=1))
+        labels=labels, predictions=tf.argmax(logits, axis=1))
    # Name the accuracy tensor 'train_accuracy' to demonstrate the
    # LoggingTensorHook.
    tf.identity(accuracy[1], name='train_accuracy')
@@ -110,28 +115,65 @@ def model_fn(features, labels, mode, params):
        train_op=optimizer.minimize(loss, tf.train.get_or_create_global_step()))
  if mode == tf.estimator.ModeKeys.EVAL:
    logits = model(image, training=False)
-    loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
+    loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
    return tf.estimator.EstimatorSpec(
        mode=tf.estimator.ModeKeys.EVAL,
        loss=loss,
        eval_metric_ops={
            'accuracy':
                tf.metrics.accuracy(
-                    labels=tf.argmax(labels, axis=1),
+                    labels=labels,
                    predictions=tf.argmax(logits, axis=1)),
        })
+def validate_batch_size_for_multi_gpu(batch_size):
+  """For multi-gpu, batch-size must be a multiple of the number of
+  available GPUs.
+  Note that this should eventually be handled by replicate_model_fn
+  directly. Multi-GPU support is currently experimental, however,
+  so doing the work here until that feature is in place.
+  """
+  from tensorflow.python.client import device_lib
+  local_device_protos = device_lib.list_local_devices()
+  num_gpus = sum([1 for d in local_device_protos if d.device_type == 'GPU'])
+  if not num_gpus:
+    raise ValueError('Multi-GPU mode was specified, but no GPUs '
+      'were found. To use CPU, run without --multi_gpu.')
+  remainder = batch_size % num_gpus
+  if remainder:
+    err = ('When running with multiple GPUs, batch size '
+      'must be a multiple of the number of available GPUs. '
+      'Found {} GPUs with a batch size of {}; try --batch_size={} instead.'
+      ).format(num_gpus, batch_size, batch_size - remainder)
+    raise ValueError(err)
 def main(unused_argv):
+  model_function = model_fn
+  if FLAGS.multi_gpu:
+    validate_batch_size_for_multi_gpu(FLAGS.batch_size)
+    # There are two steps required if using multi-GPU: (1) wrap the model_fn,
+    # and (2) wrap the optimizer. The first happens here, and (2) happens
+    # in the model_fn itself when the optimizer is defined.
+    model_function = tf.contrib.estimator.replicate_model_fn(
+        model_fn, loss_reduction=tf.losses.Reduction.MEAN)
  data_format = FLAGS.data_format
  if data_format is None:
    data_format = ('channels_first'
                   if tf.test.is_built_with_cuda() else 'channels_last')
  mnist_classifier = tf.estimator.Estimator(
-      model_fn=model_fn,
+      model_fn=model_function,
      model_dir=FLAGS.model_dir,
      params={
-          'data_format': data_format
+          'data_format': data_format,
+          'multi_gpu': FLAGS.multi_gpu
      })
  # Train the model
@@ -169,39 +211,52 @@ def main(unused_argv):
    mnist_classifier.export_savedmodel(FLAGS.export_dir, input_fn)
-if __name__ == '__main__':
+class MNISTArgParser(argparse.ArgumentParser):
-  parser = argparse.ArgumentParser()
-  parser.add_argument(
+  def __init__(self):
-      '--batch_size',
+    super(MNISTArgParser, self).__init__()
-      type=int,
-      default=100,
-      help='Number of images to process in a batch')
-  parser.add_argument(
-      '--data_dir',
-      type=str,
-      default='/tmp/mnist_data',
-      help='Path to directory containing the MNIST dataset')
-  parser.add_argument(
-      '--model_dir',
-      type=str,
-      default='/tmp/mnist_model',
-      help='The directory where the model will be stored.')
-  parser.add_argument(
-      '--train_epochs', type=int, default=40, help='Number of epochs to train.')
-  parser.add_argument(
-      '--data_format',
-      type=str,
-      default=None,
-      choices=['channels_first', 'channels_last'],
-      help='A flag to override the data format used in the model. channels_first '
-      'provides a performance boost on GPU but is not always compatible '
-      'with CPU. If left unspecified, the data format will be chosen '
-      'automatically based on whether TensorFlow was built for CPU or GPU.')
-  parser.add_argument(
-      '--export_dir',
-      type=str,
-      help='The directory where the exported SavedModel will be stored.')
+    self.add_argument(
+        '--multi_gpu', action='store_true',
+        help='If set, run across all available GPUs.')
+    self.add_argument(
+        '--batch_size',
+        type=int,
+        default=100,
+        help='Number of images to process in a batch')
+    self.add_argument(
+        '--data_dir',
+        type=str,
+        default='/tmp/mnist_data',
+        help='Path to directory containing the MNIST dataset')
+    self.add_argument(
+        '--model_dir',
+        type=str,
+        default='/tmp/mnist_model',
+        help='The directory where the model will be stored.')
+    self.add_argument(
+        '--train_epochs',
+        type=int,
+        default=40,
+        help='Number of epochs to train.')
+    self.add_argument(
+        '--data_format',
+        type=str,
+        default=None,
+        choices=['channels_first', 'channels_last'],
+        help='A flag to override the data format used in the model. '
+        'channels_first provides a performance boost on GPU but is not always '
+        'compatible with CPU. If left unspecified, the data format will be '
+        'chosen automatically based on whether TensorFlow was built for CPU or '
+        'GPU.')
+    self.add_argument(
+        '--export_dir',
+        type=str,
+        help='The directory where the exported SavedModel will be stored.')
+if __name__ == '__main__':
+  parser = MNISTArgParser()
  tf.logging.set_verbosity(tf.logging.INFO)
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
--- a/official/mnist/mnist_test.py
+++ b/official/mnist/mnist_test.py
@@ -27,8 +27,8 @@ BATCH_SIZE = 100
 def dummy_input_fn():
  image = tf.random_uniform([BATCH_SIZE, 784])
-  labels = tf.random_uniform([BATCH_SIZE], maxval=9, dtype=tf.int32)
+  labels = tf.random_uniform([BATCH_SIZE, 1], maxval=9, dtype=tf.int32)
-  return image, tf.one_hot(labels, 10)
+  return image, labels
 def make_estimator():
@@ -62,11 +62,12 @@ class Tests(tf.test.TestCase):
      self.assertEqual(predictions['probabilities'].shape, (10,))
      self.assertEqual(predictions['classes'].shape, ())
-  def mnist_model_fn_helper(self, mode):
+  def mnist_model_fn_helper(self, mode, multi_gpu=False):
    features, labels = dummy_input_fn()
    image_count = features.shape[0]
    spec = mnist.model_fn(features, labels, mode, {
-        'data_format': 'channels_last'
+        'data_format': 'channels_last',
+        'multi_gpu': multi_gpu
    })
    if mode == tf.estimator.ModeKeys.PREDICT:
@@ -91,6 +92,9 @@ class Tests(tf.test.TestCase):
  def test_mnist_model_fn_train_mode(self):
    self.mnist_model_fn_helper(tf.estimator.ModeKeys.TRAIN)
+  def test_mnist_model_fn_train_mode_multi_gpu(self):
+    self.mnist_model_fn_helper(tf.estimator.ModeKeys.TRAIN, multi_gpu=True)
  def test_mnist_model_fn_eval_mode(self):
    self.mnist_model_fn_helper(tf.estimator.ModeKeys.EVAL)

--- a/official/mnist/mnist_tpu.py
+++ b/official/mnist/mnist_tpu.py
@@ -50,7 +50,7 @@ FLAGS = tf.flags.FLAGS
 def metric_fn(labels, logits):
  accuracy = tf.metrics.accuracy(
-      labels=tf.argmax(labels, axis=1), predictions=tf.argmax(logits, axis=1))
+      labels=labels, predictions=tf.argmax(logits, axis=1))
  return {"accuracy": accuracy}
@@ -64,7 +64,7 @@ def model_fn(features, labels, mode, params):
  model = mnist.Model("channels_last")
  logits = model(image, training=(mode == tf.estimator.ModeKeys.TRAIN))
-  loss = tf.losses.softmax_cross_entropy(onehot_labels=labels, logits=logits)
+  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
  if mode == tf.estimator.ModeKeys.TRAIN:
    learning_rate = tf.train.exponential_decay(

--- a/official/resnet/cifar10_main.py
+++ b/official/resnet/cifar10_main.py
@@ -18,63 +18,33 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import argparse
 import os
 import sys
 import tensorflow as tf
 import resnet_model
+import resnet_shared
-parser = argparse.ArgumentParser()
-# Basic model parameters.
-parser.add_argument('--data_dir', type=str, default='/tmp/cifar10_data',
-                    help='The path to the CIFAR-10 data directory.')
-parser.add_argument('--model_dir', type=str, default='/tmp/cifar10_model',
-                    help='The directory where the model will be stored.')
-parser.add_argument('--resnet_size', type=int, default=32,
-                    help='The size of the ResNet model to use.')
-parser.add_argument('--train_epochs', type=int, default=250,
-                    help='The number of epochs to train.')
-parser.add_argument('--epochs_per_eval', type=int, default=10,
-                    help='The number of epochs to run in between evaluations.')
-parser.add_argument('--batch_size', type=int, default=128,
-                    help='The number of images per batch.')
-parser.add_argument(
-    '--data_format', type=str, default=None,
-    choices=['channels_first', 'channels_last'],
-    help='A flag to override the data format used in the model. channels_first '
-         'provides a performance boost on GPU but is not always compatible '
-         'with CPU. If left unspecified, the data format will be chosen '
-         'automatically based on whether TensorFlow was built for CPU or GPU.')
 _HEIGHT = 32
 _WIDTH = 32
-_DEPTH = 3
+_NUM_CHANNELS = 3
+_DEFAULT_IMAGE_BYTES = _HEIGHT * _WIDTH * _NUM_CHANNELS
 _NUM_CLASSES = 10
 _NUM_DATA_FILES = 5
-# We use a weight decay of 0.0002, which performs better than the 0.0001 that
-# was originally suggested.
-_WEIGHT_DECAY = 2e-4
-_MOMENTUM = 0.9
 _NUM_IMAGES = {
    'train': 50000,
    'validation': 10000,
 }
+###############################################################################
+# Data processing
+###############################################################################
 def record_dataset(filenames):
  """Returns an input pipeline Dataset from `filenames`."""
-  record_bytes = _HEIGHT * _WIDTH * _DEPTH + 1
+  record_bytes = _DEFAULT_IMAGE_BYTES + 1
  return tf.data.FixedLengthRecordDataset(filenames, record_bytes)
@@ -100,8 +70,7 @@ def parse_record(raw_record):
  # Every record consists of a label followed by the image, with a fixed number
  # of bytes for each.
  label_bytes = 1
-  image_bytes = _HEIGHT * _WIDTH * _DEPTH
+  record_bytes = label_bytes + _DEFAULT_IMAGE_BYTES
-  record_bytes = label_bytes + image_bytes
  # Convert bytes to a vector of uint8 that is record_bytes long.
  record_vector = tf.decode_raw(raw_record, tf.uint8)
@@ -113,8 +82,8 @@ def parse_record(raw_record):
  # The remaining bytes after the label represent the image, which we reshape
  # from [depth * height * width] to [depth, height, width].
-  depth_major = tf.reshape(
+  depth_major = tf.reshape(record_vector[label_bytes:record_bytes],
-      record_vector[label_bytes:record_bytes], [_DEPTH, _HEIGHT, _WIDTH])
+                           [_NUM_CHANNELS, _HEIGHT, _WIDTH])
  # Convert from [depth, height, width] to [height, width, depth], and cast as
  # float32.
@@ -131,7 +100,7 @@ def preprocess_image(image, is_training):
        image, _HEIGHT + 8, _WIDTH + 8)
    # Randomly crop a [_HEIGHT, _WIDTH] section of the image.
-    image = tf.random_crop(image, [_HEIGHT, _WIDTH, _DEPTH])
+    image = tf.random_crop(image, [_HEIGHT, _WIDTH, _NUM_CHANNELS])
    # Randomly flip the image horizontally.
    image = tf.image.random_flip_left_right(image)
@@ -180,116 +149,81 @@ def input_fn(is_training, data_dir, batch_size, num_epochs=1):
  return images, labels
-def cifar10_model_fn(features, labels, mode, params):
+###############################################################################
-  """Model function for CIFAR-10."""
+# Running the model
-  tf.summary.image('images', features, max_outputs=6)
+###############################################################################
+class Cifar10Model(resnet_model.Model):
-  network = resnet_model.cifar10_resnet_v2_generator(
-      params['resnet_size'], _NUM_CLASSES, params['data_format'])
+  def __init__(self, resnet_size, data_format=None):
+    """These are the parameters that work for CIFAR-10 data.
-  inputs = tf.reshape(features, [-1, _HEIGHT, _WIDTH, _DEPTH])
+    """
-  logits = network(inputs, mode == tf.estimator.ModeKeys.TRAIN)
+    if resnet_size % 6 != 2:
+      raise ValueError('resnet_size must be 6n + 2:', resnet_size)
-  predictions = {
-      'classes': tf.argmax(logits, axis=1),
+    num_blocks = (resnet_size - 2) // 6
-      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
-  }
+    super(Cifar10Model, self).__init__(
+        resnet_size=resnet_size,
-  if mode == tf.estimator.ModeKeys.PREDICT:
+        num_classes=_NUM_CLASSES,
-    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+        num_filters=16,
+        kernel_size=3,
-  # Calculate loss, which includes softmax cross entropy and L2 regularization.
+        conv_stride=1,
-  cross_entropy = tf.losses.softmax_cross_entropy(
+        first_pool_size=None,
-      logits=logits, onehot_labels=labels)
+        first_pool_stride=None,
+        second_pool_size=8,
-  # Create a tensor named cross_entropy for logging purposes.
+        second_pool_stride=1,
-  tf.identity(cross_entropy, name='cross_entropy')
+        block_fn=resnet_model.building_block,
-  tf.summary.scalar('cross_entropy', cross_entropy)
+        block_sizes=[num_blocks] * 3,
+        block_strides=[1, 2, 2],
-  # Add weight decay to the loss.
+        final_size=64,
-  loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
+        data_format=data_format)
-      [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
-  if mode == tf.estimator.ModeKeys.TRAIN:
-    # Scale the learning rate linearly with the batch size. When the batch size
-    # is 128, the learning rate should be 0.1.
-    initial_learning_rate = 0.1 * params['batch_size'] / 128
-    batches_per_epoch = _NUM_IMAGES['train'] / params['batch_size']
-    global_step = tf.train.get_or_create_global_step()
-    # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
-    boundaries = [int(batches_per_epoch * epoch) for epoch in [100, 150, 200]]
-    values = [initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001]]
-    learning_rate = tf.train.piecewise_constant(
-        tf.cast(global_step, tf.int32), boundaries, values)
-    # Create a tensor named learning_rate for logging purposes
-    tf.identity(learning_rate, name='learning_rate')
-    tf.summary.scalar('learning_rate', learning_rate)
-    optimizer = tf.train.MomentumOptimizer(
-        learning_rate=learning_rate,
-        momentum=_MOMENTUM)
-    # Batch norm requires update ops to be added as a dependency to the train_op
-    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    with tf.control_dependencies(update_ops):
-      train_op = optimizer.minimize(loss, global_step)
-  else:
-    train_op = None
-  accuracy = tf.metrics.accuracy(
-      tf.argmax(labels, axis=1), predictions['classes'])
-  metrics = {'accuracy': accuracy}
-  # Create a tensor named train_accuracy for logging purposes
-  tf.identity(accuracy[1], name='train_accuracy')
-  tf.summary.scalar('train_accuracy', accuracy[1])
-  return tf.estimator.EstimatorSpec(
+def cifar10_model_fn(features, labels, mode, params):
-      mode=mode,
+  """Model function for CIFAR-10."""
-      predictions=predictions,
+  features = tf.reshape(features, [-1, _HEIGHT, _WIDTH, _NUM_CHANNELS])
-      loss=loss,
-      train_op=train_op,
+  learning_rate_fn = resnet_shared.learning_rate_with_decay(
-      eval_metric_ops=metrics)
+      batch_size=params['batch_size'], batch_denom=128,
+      num_images=_NUM_IMAGES['train'], boundary_epochs=[100, 150, 200],
+      decay_rates=[1, 0.1, 0.01, 0.001])
+  # We use a weight decay of 0.0002, which performs better
+  # than the 0.0001 that was originally suggested.
+  weight_decay = 2e-4
+  # Empirical testing showed that including batch_normalization variables
+  # in the calculation of regularized loss helped validation accuracy
+  # for the CIFAR-10 dataset, perhaps because the regularization prevents
+  # overfitting on the small data set. We therefore include all vars when
+  # regularizing and computing loss during training.
+  def loss_filter_fn(name):
+    return True
+  return resnet_shared.resnet_model_fn(features, labels, mode, Cifar10Model,
+                                       resnet_size=params['resnet_size'],
+                                       weight_decay=weight_decay,
+                                       learning_rate_fn=learning_rate_fn,
+                                       momentum=0.9,
+                                       data_format=params['data_format'],
+                                       loss_filter_fn=loss_filter_fn)
 def main(unused_argv):
-  # Using the Winograd non-fused algorithms provides a small performance boost.
+  resnet_shared.resnet_main(FLAGS, cifar10_model_fn, input_fn)
-  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
-  # Set up a RunConfig to only save checkpoints once per training cycle.
-  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9)
-  cifar_classifier = tf.estimator.Estimator(
-      model_fn=cifar10_model_fn, model_dir=FLAGS.model_dir, config=run_config,
-      params={
-          'resnet_size': FLAGS.resnet_size,
-          'data_format': FLAGS.data_format,
-          'batch_size': FLAGS.batch_size,
-      })
-  for _ in range(FLAGS.train_epochs // FLAGS.epochs_per_eval):
-    tensors_to_log = {
-        'learning_rate': 'learning_rate',
-        'cross_entropy': 'cross_entropy',
-        'train_accuracy': 'train_accuracy'
-    }
-    logging_hook = tf.train.LoggingTensorHook(
-        tensors=tensors_to_log, every_n_iter=100)
-    cifar_classifier.train(
-        input_fn=lambda: input_fn(
-            True, FLAGS.data_dir, FLAGS.batch_size, FLAGS.epochs_per_eval),
-        hooks=[logging_hook])
-    # Evaluate the model and print results
-    eval_results = cifar_classifier.evaluate(
-        input_fn=lambda: input_fn(False, FLAGS.data_dir, FLAGS.batch_size))
-    print(eval_results)
 if __name__ == '__main__':
  tf.logging.set_verbosity(tf.logging.INFO)
+  parser = resnet_shared.ResnetArgParser()
+  # Set defaults that are reasonable for this model.
+  parser.set_defaults(data_dir='/tmp/cifar10_data',
+                      model_dir='/tmp/cifar10_model',
+                      resnet_size=32,
+                      train_epochs=250,
+                      epochs_per_eval=10,
+                      batch_size=128)
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(argv=[sys.argv[0]] + unparsed)
--- a/official/resnet/imagenet_main.py
+++ b/official/resnet/imagenet_main.py
@@ -18,55 +18,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
-import argparse
 import os
 import sys
 import tensorflow as tf
 import resnet_model
+import resnet_shared
 import vgg_preprocessing
-parser = argparse.ArgumentParser()
-parser.add_argument(
-    '--data_dir', type=str, default='',
-    help='The directory where the ImageNet input data is stored.')
-parser.add_argument(
-    '--model_dir', type=str, default='/tmp/resnet_model',
-    help='The directory where the model will be stored.')
-parser.add_argument(
-    '--resnet_size', type=int, default=50, choices=[18, 34, 50, 101, 152, 200],
-    help='The size of the ResNet model to use.')
-parser.add_argument(
-    '--train_epochs', type=int, default=100,
-    help='The number of epochs to use for training.')
-parser.add_argument(
-    '--epochs_per_eval', type=int, default=1,
-    help='The number of training epochs to run between evaluations.')
-parser.add_argument(
-    '--batch_size', type=int, default=32,
-    help='Batch size for training and evaluation.')
-parser.add_argument(
-    '--data_format', type=str, default=None,
-    choices=['channels_first', 'channels_last'],
-    help='A flag to override the data format used in the model. channels_first '
-         'provides a performance boost on GPU but is not always compatible '
-         'with CPU. If left unspecified, the data format will be chosen '
-         'automatically based on whether TensorFlow was built for CPU or GPU.')
 _DEFAULT_IMAGE_SIZE = 224
 _NUM_CHANNELS = 3
-_LABEL_CLASSES = 1001
+_NUM_CLASSES = 1001
-_MOMENTUM = 0.9
-_WEIGHT_DECAY = 1e-4
 _NUM_IMAGES = {
    'train': 1281167,
@@ -77,6 +40,9 @@ _FILE_SHUFFLE_BUFFER = 1024
 _SHUFFLE_BUFFER = 1500
+###############################################################################
+# Data processing
+###############################################################################
 def filenames(is_training, data_dir):
  """Return filenames for dataset."""
  if is_training:
@@ -89,7 +55,7 @@ def filenames(is_training, data_dir):
        for i in range(128)]
-def record_parser(value, is_training):
+def parse_record(raw_record, is_training):
  """Parse an ImageNet record from `value`."""
  keys_to_features = {
      'image/encoded':
@@ -112,7 +78,7 @@ def record_parser(value, is_training):
          tf.VarLenFeature(dtype=tf.int64),
  }
-  parsed = tf.parse_single_example(value, keys_to_features)
+  parsed = tf.parse_single_example(raw_record, keys_to_features)
  image = tf.image.decode_image(
      tf.reshape(parsed['image/encoded'], shape=[]),
@@ -129,18 +95,19 @@ def record_parser(value, is_training):
      tf.reshape(parsed['image/class/label'], shape=[]),
      dtype=tf.int32)
-  return image, tf.one_hot(label, _LABEL_CLASSES)
+  return image, tf.one_hot(label, _NUM_CLASSES)
 def input_fn(is_training, data_dir, batch_size, num_epochs=1):
  """Input function which provides batches for train or eval."""
-  dataset = tf.data.Dataset.from_tensor_slices(filenames(is_training, data_dir))
+  dataset = tf.data.Dataset.from_tensor_slices(
+      filenames(is_training, data_dir))
  if is_training:
    dataset = dataset.shuffle(buffer_size=_FILE_SHUFFLE_BUFFER)
  dataset = dataset.flat_map(tf.data.TFRecordDataset)
-  dataset = dataset.map(lambda value: record_parser(value, is_training),
+  dataset = dataset.map(lambda value: parse_record(value, is_training),
                        num_parallel_calls=5)
  dataset = dataset.prefetch(batch_size)
@@ -159,120 +126,86 @@ def input_fn(is_training, data_dir, batch_size, num_epochs=1):
  return images, labels
-def resnet_model_fn(features, labels, mode, params):
+###############################################################################
-  """Our model_fn for ResNet to be used with our Estimator."""
+# Running the model
-  tf.summary.image('images', features, max_outputs=6)
+###############################################################################
+class ImagenetModel(resnet_model.Model):
-  network = resnet_model.imagenet_resnet_v2(
+  def __init__(self, resnet_size, data_format=None):
-      params['resnet_size'], _LABEL_CLASSES, params['data_format'])
+    """These are the parameters that work for Imagenet data.
-  logits = network(
+    """
-      inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))
+    # For bigger models, we want to use "bottleneck" layers
-  predictions = {
+    if resnet_size < 50:
-      'classes': tf.argmax(logits, axis=1),
+      block_fn = resnet_model.building_block
-      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
+      final_size = 512
+    else:
+      block_fn = resnet_model.bottleneck_block
+      final_size = 2048
+    super(ImagenetModel, self).__init__(
+        resnet_size=resnet_size,
+        num_classes=_NUM_CLASSES,
+        num_filters=64,
+        kernel_size=7,
+        conv_stride=2,
+        first_pool_size=3,
+        first_pool_stride=2,
+        second_pool_size=7,
+        second_pool_stride=1,
+        block_fn=block_fn,
+        block_sizes=_get_block_sizes(resnet_size),
+        block_strides=[1, 2, 2, 2],
+        final_size=final_size,
+        data_format=data_format)
+def _get_block_sizes(resnet_size):
+  """The number of block layers used for the Resnet model varies according
+  to the size of the model. This helper grabs the layer set we want, throwing
+  an error if a non-standard size has been selected.
+  """
+  choices = {
+      18: [2, 2, 2, 2],
+      34: [3, 4, 6, 3],
+      50: [3, 4, 6, 3],
+      101: [3, 4, 23, 3],
+      152: [3, 8, 36, 3],
+      200: [3, 24, 36, 3]
  }
-  if mode == tf.estimator.ModeKeys.PREDICT:
+  try:
-    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+    return choices[resnet_size]
+  except KeyError:
-  # Calculate loss, which includes softmax cross entropy and L2 regularization.
+    err = ('Could not find layers for selected Resnet size.\n'
-  cross_entropy = tf.losses.softmax_cross_entropy(
+           'Size received: {}; sizes allowed: {}.'.format(
-      logits=logits, onehot_labels=labels)
+               resnet_size, choices.keys()))
+    raise ValueError(err)
-  # Create a tensor named cross_entropy for logging purposes.
-  tf.identity(cross_entropy, name='cross_entropy')
-  tf.summary.scalar('cross_entropy', cross_entropy)
-  # Add weight decay to the loss. We exclude the batch norm variables because
-  # doing so leads to a small improvement in accuracy.
-  loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
-      [tf.nn.l2_loss(v) for v in tf.trainable_variables()
-       if 'batch_normalization' not in v.name])
-  if mode == tf.estimator.ModeKeys.TRAIN:
-    # Scale the learning rate linearly with the batch size. When the batch size
-    # is 256, the learning rate should be 0.1.
-    initial_learning_rate = 0.1 * params['batch_size'] / 256
-    batches_per_epoch = _NUM_IMAGES['train'] / params['batch_size']
-    global_step = tf.train.get_or_create_global_step()
-    # Multiply the learning rate by 0.1 at 30, 60, 80, and 90 epochs.
-    boundaries = [
-        int(batches_per_epoch * epoch) for epoch in [30, 60, 80, 90]]
-    values = [
-        initial_learning_rate * decay for decay in [1, 0.1, 0.01, 1e-3, 1e-4]]
-    learning_rate = tf.train.piecewise_constant(
-        tf.cast(global_step, tf.int32), boundaries, values)
-    # Create a tensor named learning_rate for logging purposes.
-    tf.identity(learning_rate, name='learning_rate')
-    tf.summary.scalar('learning_rate', learning_rate)
-    optimizer = tf.train.MomentumOptimizer(
-        learning_rate=learning_rate,
-        momentum=_MOMENTUM)
-    # Batch norm requires update_ops to be added as a train_op dependency.
-    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    with tf.control_dependencies(update_ops):
-      train_op = optimizer.minimize(loss, global_step)
-  else:
-    train_op = None
-  accuracy = tf.metrics.accuracy(
-      tf.argmax(labels, axis=1), predictions['classes'])
-  metrics = {'accuracy': accuracy}
-  # Create a tensor named train_accuracy for logging purposes.
+def imagenet_model_fn(features, labels, mode, params):
-  tf.identity(accuracy[1], name='train_accuracy')
+  """Our model_fn for ResNet to be used with our Estimator."""
-  tf.summary.scalar('train_accuracy', accuracy[1])
+  learning_rate_fn = resnet_shared.learning_rate_with_decay(
+      batch_size=params['batch_size'], batch_denom=256,
+      num_images=_NUM_IMAGES['train'], boundary_epochs=[30, 60, 80, 90],
+      decay_rates=[1, 0.1, 0.01, 0.001, 1e-4])
-  return tf.estimator.EstimatorSpec(
+  return resnet_shared.resnet_model_fn(features, labels, mode, ImagenetModel,
-      mode=mode,
+                                       resnet_size=params['resnet_size'],
-      predictions=predictions,
+                                       weight_decay=1e-4,
-      loss=loss,
+                                       learning_rate_fn=learning_rate_fn,
-      train_op=train_op,
+                                       momentum=0.9,
-      eval_metric_ops=metrics)
+                                       data_format=params['data_format'],
+                                       loss_filter_fn=None)
 def main(unused_argv):
-  # Using the Winograd non-fused algorithms provides a small performance boost.
+  resnet_shared.resnet_main(FLAGS, imagenet_model_fn, input_fn)
-  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
-  # Set up a RunConfig to only save checkpoints once per training cycle.
-  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9)
-  resnet_classifier = tf.estimator.Estimator(
-      model_fn=resnet_model_fn, model_dir=FLAGS.model_dir, config=run_config,
-      params={
-          'resnet_size': FLAGS.resnet_size,
-          'data_format': FLAGS.data_format,
-          'batch_size': FLAGS.batch_size,
-      })
-  for _ in range(FLAGS.train_epochs // FLAGS.epochs_per_eval):
-    tensors_to_log = {
-        'learning_rate': 'learning_rate',
-        'cross_entropy': 'cross_entropy',
-        'train_accuracy': 'train_accuracy'
-    }
-    logging_hook = tf.train.LoggingTensorHook(
-        tensors=tensors_to_log, every_n_iter=100)
-    print('Starting a training cycle.')
-    resnet_classifier.train(
-        input_fn=lambda: input_fn(
-            True, FLAGS.data_dir, FLAGS.batch_size, FLAGS.epochs_per_eval),
-        hooks=[logging_hook])
-    print('Starting to evaluate.')
-    eval_results = resnet_classifier.evaluate(
-        input_fn=lambda: input_fn(False, FLAGS.data_dir, FLAGS.batch_size))
-    print(eval_results)
 if __name__ == '__main__':
  tf.logging.set_verbosity(tf.logging.INFO)
+  parser = resnet_shared.ResnetArgParser(
+      resnet_size_choices=[18, 34, 50, 101, 152, 200])
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(argv=[sys.argv[0]] + unparsed)
--- a/official/resnet/imagenet_test.py
+++ b/official/resnet/imagenet_test.py
@@ -22,7 +22,6 @@ import unittest
 import tensorflow as tf
 import imagenet_main
-import resnet_model
 tf.logging.set_verbosity(tf.logging.ERROR)
@@ -35,7 +34,9 @@ class BaseTest(tf.test.TestCase):
  def tensor_shapes_helper(self, resnet_size, with_gpu=False):
    """Checks the tensor shapes after each phase of the ResNet model."""
    def reshape(shape):
-      """Returns the expected dimensions depending on if a GPU is being used."""
+      """Returns the expected dimensions depending on if a
+      GPU is being used.
+      """
      # If a GPU is used for the test, the shape is returned (already in NCHW
      # form). When GPU is not used, the shape is converted to NHWC.
      if with_gpu:
@@ -46,11 +47,11 @@ class BaseTest(tf.test.TestCase):
    with graph.as_default(), self.test_session(
        use_gpu=with_gpu, force_gpu=with_gpu):
-      model = resnet_model.imagenet_resnet_v2(
+      model = imagenet_main.ImagenetModel(
-          resnet_size, 456,
+          resnet_size,
          data_format='channels_first' if with_gpu else 'channels_last')
      inputs = tf.random_uniform([1, 224, 224, 3])
-      output = model(inputs, is_training=True)
+      output = model(inputs, training=True)
      initial_conv = graph.get_tensor_by_name('initial_conv:0')
      max_pool = graph.get_tensor_by_name('initial_max_pool:0')
@@ -79,8 +80,8 @@ class BaseTest(tf.test.TestCase):
        self.assertAllEqual(block_layer4.shape, reshape((1, 2048, 7, 7)))
        self.assertAllEqual(avg_pool.shape, reshape((1, 2048, 1, 1)))
-      self.assertAllEqual(dense.shape, (1, 456))
+      self.assertAllEqual(dense.shape, (1, _LABEL_CLASSES))
-      self.assertAllEqual(output.shape, (1, 456))
+      self.assertAllEqual(output.shape, (1, _LABEL_CLASSES))
  def test_tensor_shapes_resnet_18(self):
    self.tensor_shapes_helper(18)
@@ -140,7 +141,7 @@ class BaseTest(tf.test.TestCase):
    tf.train.create_global_step()
    features, labels = self.input_fn()
-    spec = imagenet_main.resnet_model_fn(
+    spec = imagenet_main.imagenet_model_fn(
        features, labels, mode, {
            'resnet_size': 50,
            'data_format': 'channels_last',

--- a/official/resnet/resnet_model.py
+++ b/official/resnet/resnet_model.py
@@ -38,14 +38,14 @@ _BATCH_NORM_DECAY = 0.997
 _BATCH_NORM_EPSILON = 1e-5
-def batch_norm_relu(inputs, is_training, data_format):
+def batch_norm_relu(inputs, training, data_format):
  """Performs a batch normalization followed by a ReLU."""
  # We set fused=True for a significant performance boost. See
  # https://www.tensorflow.org/performance/performance_guide#common_fused_ops
  inputs = tf.layers.batch_normalization(
      inputs=inputs, axis=1 if data_format == 'channels_first' else 3,
      momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True,
-      scale=True, training=is_training, fused=True)
+      scale=True, training=training, fused=True)
  inputs = tf.nn.relu(inputs)
  return inputs
@@ -91,7 +91,7 @@ def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
      data_format=data_format)
-def building_block(inputs, filters, is_training, projection_shortcut, strides,
+def building_block(inputs, filters, training, projection_shortcut, strides,
                   data_format):
  """Standard building block for residual networks with BN before convolutions.
@@ -99,10 +99,10 @@ def building_block(inputs, filters, is_training, projection_shortcut, strides,
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
    filters: The number of filters for the convolutions.
-    is_training: A Boolean for whether the model is in training or inference
+    training: A Boolean for whether the model is in training or inference
      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts (typically
+    projection_shortcut: The function to use for projection shortcuts
-      a 1x1 convolution when downsampling the input).
+      (typically a 1x1 convolution when downsampling the input).
    strides: The block's stride. If greater than 1, this block will ultimately
      downsample the input.
    data_format: The input format ('channels_last' or 'channels_first').
@@ -111,7 +111,7 @@ def building_block(inputs, filters, is_training, projection_shortcut, strides,
    The output tensor of the block.
  """
  shortcut = inputs
-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)
  # The projection shortcut should come after the first batch norm and ReLU
  # since it performs a 1x1 convolution.
@@ -122,7 +122,7 @@ def building_block(inputs, filters, is_training, projection_shortcut, strides,
      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
      data_format=data_format)
-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)
  inputs = conv2d_fixed_padding(
      inputs=inputs, filters=filters, kernel_size=3, strides=1,
      data_format=data_format)
@@ -130,19 +130,19 @@ def building_block(inputs, filters, is_training, projection_shortcut, strides,
  return inputs + shortcut
-def bottleneck_block(inputs, filters, is_training, projection_shortcut,
+def bottleneck_block(inputs, filters, training, projection_shortcut,
                     strides, data_format):
  """Bottleneck block variant for residual networks with BN before convolutions.
  Args:
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the first two convolutions. Note that the
+    filters: The number of filters for the first two convolutions. Note
-      third and final convolution will use 4 times as many filters.
+      that the third and final convolution will use 4 times as many filters.
-    is_training: A Boolean for whether the model is in training or inference
+    training: A Boolean for whether the model is in training or inference
      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts (typically
+    projection_shortcut: The function to use for projection shortcuts
-      a 1x1 convolution when downsampling the input).
+      (typically a 1x1 convolution when downsampling the input).
    strides: The block's stride. If greater than 1, this block will ultimately
      downsample the input.
    data_format: The input format ('channels_last' or 'channels_first').
@@ -151,7 +151,7 @@ def bottleneck_block(inputs, filters, is_training, projection_shortcut,
    The output tensor of the block.
  """
  shortcut = inputs
-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)
  # The projection shortcut should come after the first batch norm and ReLU
  # since it performs a 1x1 convolution.
@@ -162,12 +162,12 @@ def bottleneck_block(inputs, filters, is_training, projection_shortcut,
      inputs=inputs, filters=filters, kernel_size=1, strides=1,
      data_format=data_format)
-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)
  inputs = conv2d_fixed_padding(
      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
      data_format=data_format)
-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)
  inputs = conv2d_fixed_padding(
      inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
      data_format=data_format)
@@ -175,7 +175,7 @@ def bottleneck_block(inputs, filters, is_training, projection_shortcut,
  return inputs + shortcut
-def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name,
+def block_layer(inputs, filters, block_fn, blocks, strides, training, name,
                data_format):
  """Creates one layer of blocks for the ResNet model.
@@ -188,7 +188,7 @@ def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name,
    blocks: The number of blocks contained in the layer.
    strides: The stride to use for the first convolution of the layer. If
      greater than 1, this layer will ultimately downsample the input.
-    is_training: Either True or False, whether we are currently training the
+    training: Either True or False, whether we are currently training the
      model. Needed for batch norm.
    name: A string name for the tensor output of the block layer.
    data_format: The input format ('channels_last' or 'channels_first').
@@ -205,162 +205,116 @@ def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name,
        data_format=data_format)
  # Only the first block per block_layer uses projection_shortcut and strides
-  inputs = block_fn(inputs, filters, is_training, projection_shortcut, strides,
+  inputs = block_fn(inputs, filters, training, projection_shortcut, strides,
                    data_format)
  for _ in range(1, blocks):
-    inputs = block_fn(inputs, filters, is_training, None, 1, data_format)
+    inputs = block_fn(inputs, filters, training, None, 1, data_format)
  return tf.identity(inputs, name)
-def cifar10_resnet_v2_generator(resnet_size, num_classes, data_format=None):
+class Model(object):
-  """Generator for CIFAR-10 ResNet v2 models.
+  """Base class for building the Resnet v2 Model.
-  Args:
-    resnet_size: A single integer for the size of the ResNet model.
-    num_classes: The number of possible classes for image classification.
-    data_format: The input format ('channels_last', 'channels_first', or None).
-      If set to None, the format is dependent on whether a GPU is available.
-  Returns:
-    The model function that takes in `inputs` and `is_training` and
-    returns the output tensor of the ResNet model.
-  Raises:
-    ValueError: If `resnet_size` is invalid.
  """
-  if resnet_size % 6 != 2:
-    raise ValueError('resnet_size must be 6n + 2:', resnet_size)
-  num_blocks = (resnet_size - 2) // 6
-  if data_format is None:
+  def __init__(self, resnet_size, num_classes, num_filters, kernel_size,
-    data_format = (
+               conv_stride, first_pool_size, first_pool_stride,
-        'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
+               second_pool_size, second_pool_stride, block_fn, block_sizes,
+               block_strides, final_size, data_format=None):
-  def model(inputs, is_training):
+    """Creates a model for classifying an image.
-    """Constructs the ResNet model given the inputs."""
-    if data_format == 'channels_first':
+    Args:
+      resnet_size: A single integer for the size of the ResNet model.
+      num_classes: The number of classes used as labels.
+      num_filters: The number of filters to use for the first block layer
+        of the model. This number is then doubled for each subsequent block
+        layer.
+      kernel_size: The kernel size to use for convolution.
+      conv_stride: stride size for the initial convolutional layer
+      first_pool_size: Pool size to be used for the first pooling layer.
+        If none, the first pooling layer is skipped.
+      first_pool_stride: stride size for the first pooling layer. Not used
+        if first_pool_size is None.
+      second_pool_size: Pool size to be used for the second pooling layer.
+      second_pool_stride: stride size for the final pooling layer
+      block_fn: Which block layer function should be used? Pass in one of
+        the two functions defined above: building_block or bottleneck_block
+      block_sizes: A list containing n values, where n is the number of sets of
+        block layers desired. Each value should be the number of blocks in the
+        i-th set.
+      block_strides: List of integers representing the desired stride size for
+        each of the sets of block layers. Should be same length as block_sizes.
+      final_size: The expected size of the model after the second pooling.
+      data_format: Input format ('channels_last', 'channels_first', or None).
+        If set to None, the format is dependent on whether a GPU is available.
+    """
+    self.resnet_size = resnet_size
+    if not data_format:
+      data_format = (
+          'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
+    self.data_format = data_format
+    self.num_classes = num_classes
+    self.num_filters = num_filters
+    self.kernel_size = kernel_size
+    self.conv_stride = conv_stride
+    self.first_pool_size = first_pool_size
+    self.first_pool_stride = first_pool_stride
+    self.second_pool_size = second_pool_size
+    self.second_pool_stride = second_pool_stride
+    self.block_fn = block_fn
+    self.block_sizes = block_sizes
+    self.block_strides = block_strides
+    self.final_size = final_size
+  def __call__(self, inputs, training):
+    """Add operations to classify a batch of input images.
+    Args:
+      inputs: A Tensor representing a batch of input images.
+      training: A boolean. Set to True to add operations required only when
+        training the classifier.
+    Returns:
+      A logits Tensor with shape [<batch_size>, self.num_classes].
+    """
+    if self.data_format == 'channels_first':
      # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
      # This provides a large performance boost on GPU. See
      # https://www.tensorflow.org/performance/performance_guide#data_formats
      inputs = tf.transpose(inputs, [0, 3, 1, 2])
    inputs = conv2d_fixed_padding(
-        inputs=inputs, filters=16, kernel_size=3, strides=1,
+        inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size,
-        data_format=data_format)
+        strides=self.conv_stride, data_format=self.data_format)
    inputs = tf.identity(inputs, 'initial_conv')
-    inputs = block_layer(
+    if self.first_pool_size:
-        inputs=inputs, filters=16, block_fn=building_block, blocks=num_blocks,
+      inputs = tf.layers.max_pooling2d(
-        strides=1, is_training=is_training, name='block_layer1',
+          inputs=inputs, pool_size=self.first_pool_size,
-        data_format=data_format)
+          strides=self.first_pool_stride, padding='SAME',
-    inputs = block_layer(
+          data_format=self.data_format)
-        inputs=inputs, filters=32, block_fn=building_block, blocks=num_blocks,
+      inputs = tf.identity(inputs, 'initial_max_pool')
-        strides=2, is_training=is_training, name='block_layer2',
-        data_format=data_format)
+    for i, num_blocks in enumerate(self.block_sizes):
-    inputs = block_layer(
+      num_filters = self.num_filters * (2**i)
-        inputs=inputs, filters=64, block_fn=building_block, blocks=num_blocks,
+      inputs = block_layer(
-        strides=2, is_training=is_training, name='block_layer3',
+          inputs=inputs, filters=num_filters, block_fn=self.block_fn,
-        data_format=data_format)
+          blocks=num_blocks, strides=self.block_strides[i],
+          training=training, name='block_layer{}'.format(i + 1),
-    inputs = batch_norm_relu(inputs, is_training, data_format)
+          data_format=self.data_format)
+    inputs = batch_norm_relu(inputs, training, self.data_format)
    inputs = tf.layers.average_pooling2d(
-        inputs=inputs, pool_size=8, strides=1, padding='VALID',
+        inputs=inputs, pool_size=self.second_pool_size,
-        data_format=data_format)
+        strides=self.second_pool_stride, padding='VALID',
+        data_format=self.data_format)
    inputs = tf.identity(inputs, 'final_avg_pool')
-    inputs = tf.reshape(inputs, [-1, 64])
-    inputs = tf.layers.dense(inputs=inputs, units=num_classes)
-    inputs = tf.identity(inputs, 'final_dense')
-    return inputs
-  return model
-def imagenet_resnet_v2_generator(block_fn, layers, num_classes,
-                                 data_format=None):
-  """Generator for ImageNet ResNet v2 models.
-  Args:
-    block_fn: The block to use within the model, either `building_block` or
-      `bottleneck_block`.
-    layers: A length-4 array denoting the number of blocks to include in each
-      layer. Each layer consists of blocks that take inputs of the same size.
-    num_classes: The number of possible classes for image classification.
-    data_format: The input format ('channels_last', 'channels_first', or None).
-      If set to None, the format is dependent on whether a GPU is available.
-  Returns:
-    The model function that takes in `inputs` and `is_training` and
-    returns the output tensor of the ResNet model.
-  """
-  if data_format is None:
-    data_format = (
-        'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
-  def model(inputs, is_training):
-    """Constructs the ResNet model given the inputs."""
-    if data_format == 'channels_first':
-      # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
-      # This provides a large performance boost on GPU. See
-      # https://www.tensorflow.org/performance/performance_guide#data_formats
-      inputs = tf.transpose(inputs, [0, 3, 1, 2])
-    inputs = conv2d_fixed_padding(
-        inputs=inputs, filters=64, kernel_size=7, strides=2,
-        data_format=data_format)
-    inputs = tf.identity(inputs, 'initial_conv')
-    inputs = tf.layers.max_pooling2d(
-        inputs=inputs, pool_size=3, strides=2, padding='SAME',
-        data_format=data_format)
-    inputs = tf.identity(inputs, 'initial_max_pool')
-    inputs = block_layer(
-        inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0],
-        strides=1, is_training=is_training, name='block_layer1',
-        data_format=data_format)
-    inputs = block_layer(
-        inputs=inputs, filters=128, block_fn=block_fn, blocks=layers[1],
-        strides=2, is_training=is_training, name='block_layer2',
-        data_format=data_format)
-    inputs = block_layer(
-        inputs=inputs, filters=256, block_fn=block_fn, blocks=layers[2],
-        strides=2, is_training=is_training, name='block_layer3',
-        data_format=data_format)
-    inputs = block_layer(
-        inputs=inputs, filters=512, block_fn=block_fn, blocks=layers[3],
-        strides=2, is_training=is_training, name='block_layer4',
-        data_format=data_format)
-    inputs = batch_norm_relu(inputs, is_training, data_format)
+    inputs = tf.reshape(inputs, [-1, self.final_size])
-    inputs = tf.layers.average_pooling2d(
+    inputs = tf.layers.dense(inputs=inputs, units=self.num_classes)
-        inputs=inputs, pool_size=7, strides=1, padding='VALID',
-        data_format=data_format)
-    inputs = tf.identity(inputs, 'final_avg_pool')
-    inputs = tf.reshape(inputs,
-                        [-1, 512 if block_fn is building_block else 2048])
-    inputs = tf.layers.dense(inputs=inputs, units=num_classes)
    inputs = tf.identity(inputs, 'final_dense')
    return inputs
-  return model
-def imagenet_resnet_v2(resnet_size, num_classes, data_format=None):
-  """Returns the ResNet model for a given size and number of output classes."""
-  model_params = {
-      18: {'block': building_block, 'layers': [2, 2, 2, 2]},
-      34: {'block': building_block, 'layers': [3, 4, 6, 3]},
-      50: {'block': bottleneck_block, 'layers': [3, 4, 6, 3]},
-      101: {'block': bottleneck_block, 'layers': [3, 4, 23, 3]},
-      152: {'block': bottleneck_block, 'layers': [3, 8, 36, 3]},
-      200: {'block': bottleneck_block, 'layers': [3, 24, 36, 3]}
-  }
-  if resnet_size not in model_params:
-    raise ValueError('Not a valid resnet_size:', resnet_size)
-  params = model_params[resnet_size]
-  return imagenet_resnet_v2_generator(
-      params['block'], params['layers'], num_classes, data_format)
--- a/official/resnet/resnet_shared.py
+++ b/official/resnet/resnet_shared.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions for running Resnet that are shared across datasets."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import argparse
+import os
+import tensorflow as tf
+def learning_rate_with_decay(
+    batch_size, batch_denom, num_images, boundary_epochs, decay_rates):
+  """Get a learning rate that decays step-wise as training progresses.
+  Args:
+    batch_size: the number of examples processed in each training batch.
+    batch_denom: this value will be used to scale the base learning rate.
+      `0.1 * batch size` is divided by this number, such that when
+      batch_denom == batch_size, the initial learning rate will be 0.1.
+    num_images: total number of images that will be used for training.
+    boundary_epochs: list of ints representing the epochs at which we
+      decay the learning rate.
+    decay_rates: list of floats representing the decay rates to be used
+      for scaling the learning rate. Should be the same length as
+      boundary_epochs.
+  Returns:
+    Returns a function that takes a single argument - the number of batches
+    trained so far (global_step)- and returns the learning rate to be used
+    for training the next batch.
+  """
+  initial_learning_rate = 0.1 * batch_size / batch_denom
+  batches_per_epoch = num_images / batch_size
+  # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
+  boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs]
+  vals = [initial_learning_rate * decay for decay in decay_rates]
+  def learning_rate_fn(global_step):
+    global_step = tf.cast(global_step, tf.int32)
+    return tf.train.piecewise_constant(global_step, boundaries, vals)
+  return learning_rate_fn
+def resnet_model_fn(features, labels, mode, model_class,
+                    resnet_size, weight_decay, learning_rate_fn, momentum,
+                    data_format, loss_filter_fn=None):
+  """Shared functionality for different resnet model_fns.
+  Initializes the ResnetModel representing the model layers
+  and uses that model to build the necessary EstimatorSpecs for
+  the `mode` in question. For training, this means building losses,
+  the optimizer, and the train op that get passed into the EstimatorSpec.
+  For evaluation and prediction, the EstimatorSpec is returned without
+  a train op, but with the necessary parameters for the given mode.
+  Args:
+    features: tensor representing input images
+    labels: tensor representing class labels for all input images
+    mode: current estimator mode; should be one of
+      `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`
+    model_class: a class representing a TensorFlow model that has a __call__
+      function. We assume here that this is a subclass of ResnetModel.
+    resnet_size: A single integer for the size of the ResNet model.
+    weight_decay: weight decay loss rate used to regularize learned variables.
+    learning_rate_fn: function that returns the current learning rate given
+      the current global_step
+    momentum: momentum term used for optimization
+    data_format: Input format ('channels_last', 'channels_first', or None).
+      If set to None, the format is dependent on whether a GPU is available.
+    loss_filter_fn: function that takes a string variable name and returns
+      True if the var should be included in loss calculation, and False
+      otherwise. If None, batch_normalization variables will be excluded
+      from the loss.
+  Returns:
+    EstimatorSpec parameterized according to the input params and the
+    current mode.
+  """
+  # Generate a summary node for the images
+  tf.summary.image('images', features, max_outputs=6)
+  model = model_class(resnet_size, data_format)
+  logits = model(features, mode == tf.estimator.ModeKeys.TRAIN)
+  predictions = {
+      'classes': tf.argmax(logits, axis=1),
+      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
+  }
+  if mode == tf.estimator.ModeKeys.PREDICT:
+    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+  # Calculate loss, which includes softmax cross entropy and L2 regularization.
+  cross_entropy = tf.losses.softmax_cross_entropy(
+      logits=logits, onehot_labels=labels)
+  # Create a tensor named cross_entropy for logging purposes.
+  tf.identity(cross_entropy, name='cross_entropy')
+  tf.summary.scalar('cross_entropy', cross_entropy)
+  # If no loss_filter_fn is passed, assume we want the default behavior,
+  # which is that batch_normalization variables are excluded from loss.
+  if not loss_filter_fn:
+    def loss_filter_fn(name):
+      return 'batch_normalization' not in name
+  # Add weight decay to the loss.
+  loss = cross_entropy + weight_decay * tf.add_n(
+      [tf.nn.l2_loss(v) for v in tf.trainable_variables()
+       if loss_filter_fn(v.name)])
+  if mode == tf.estimator.ModeKeys.TRAIN:
+    global_step = tf.train.get_or_create_global_step()
+    learning_rate = learning_rate_fn(global_step)
+    # Create a tensor named learning_rate for logging purposes
+    tf.identity(learning_rate, name='learning_rate')
+    tf.summary.scalar('learning_rate', learning_rate)
+    optimizer = tf.train.MomentumOptimizer(
+        learning_rate=learning_rate,
+        momentum=momentum)
+    # Batch norm requires update ops to be added as a dependency to train_op
+    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+    with tf.control_dependencies(update_ops):
+      train_op = optimizer.minimize(loss, global_step)
+  else:
+    train_op = None
+  accuracy = tf.metrics.accuracy(
+      tf.argmax(labels, axis=1), predictions['classes'])
+  metrics = {'accuracy': accuracy}
+  # Create a tensor named train_accuracy for logging purposes
+  tf.identity(accuracy[1], name='train_accuracy')
+  tf.summary.scalar('train_accuracy', accuracy[1])
+  return tf.estimator.EstimatorSpec(
+      mode=mode,
+      predictions=predictions,
+      loss=loss,
+      train_op=train_op,
+      eval_metric_ops=metrics)
+def resnet_main(flags, model_function, input_function):
+  # Using the Winograd non-fused algorithms provides a small performance boost.
+  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
+  # Set up a RunConfig to only save checkpoints once per training cycle.
+  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9)
+  classifier = tf.estimator.Estimator(
+      model_fn=model_function, model_dir=flags.model_dir, config=run_config,
+      params={
+          'resnet_size': flags.resnet_size,
+          'data_format': flags.data_format,
+          'batch_size': flags.batch_size,
+      })
+  for _ in range(flags.train_epochs // flags.epochs_per_eval):
+    tensors_to_log = {
+        'learning_rate': 'learning_rate',
+        'cross_entropy': 'cross_entropy',
+        'train_accuracy': 'train_accuracy'
+    }
+    logging_hook = tf.train.LoggingTensorHook(
+        tensors=tensors_to_log, every_n_iter=100)
+    print('Starting a training cycle.')
+    classifier.train(
+        input_fn=lambda: input_function(
+            True, flags.data_dir, flags.batch_size, flags.epochs_per_eval),
+        hooks=[logging_hook])
+    print('Starting to evaluate.')
+    # Evaluate the model and print results
+    eval_results = classifier.evaluate(input_fn=lambda: input_function(
+        False, flags.data_dir, flags.batch_size))
+    print(eval_results)
+class ResnetArgParser(argparse.ArgumentParser):
+  """Arguments for configuring and running a Resnet Model.
+  """
+  def __init__(self, resnet_size_choices=None):
+    super(ResnetArgParser, self).__init__()
+    self.add_argument(
+        '--data_dir', type=str, default='/tmp/resnet_data',
+        help='The directory where the input data is stored.')
+    self.add_argument(
+        '--model_dir', type=str, default='/tmp/resnet_model',
+        help='The directory where the model will be stored.')
+    self.add_argument(
+        '--resnet_size', type=int, default=50,
+        choices=resnet_size_choices,
+        help='The size of the ResNet model to use.')
+    self.add_argument(
+        '--train_epochs', type=int, default=100,
+        help='The number of epochs to use for training.')
+    self.add_argument(
+        '--epochs_per_eval', type=int, default=1,
+        help='The number of training epochs to run between evaluations.')
+    self.add_argument(
+        '--batch_size', type=int, default=32,
+        help='Batch size for training and evaluation.')
+    self.add_argument(
+        '--data_format', type=str, default=None,
+        choices=['channels_first', 'channels_last'],
+        help='A flag to override the data format used in the model. '
+             'channels_first provides a performance boost on GPU but '
+             'is not always compatible with CPU. If left unspecified, '
+             'the data format will be chosen automatically based on '
+             'whether TensorFlow was built for CPU or GPU.')
--- a/official/testing/docker_test.sh
+++ b/official/testing/docker_test.sh
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# 
+#
 # DO NOT MODIFY THIS FILE. Add tests to be executed in test_models.sh
 # Usage: docker_test.sh [--docker-image <DOCKER_IMG_NAME>]
 #
@@ -22,7 +22,7 @@
 #                  --docker-image flag), the default latest tensorflow docker
 #                  will be used.
 #
-# The script obeys the following required environment variables unless superceded by 
+# The script obeys the following required environment variables unless superceded by
 # the docker image flag:
 # PYTHON_VERSION:   (PYTHON2 | PYTHON3)
@@ -35,9 +35,9 @@ EXIT=0
 export WORKSPACE=${PWD}
 if [ "$PYTHON_VERSION" = "PYTHON3" ]; then
-  DOCKER_IMG_NAME="tensorflow/tensorflow:1.4.0-py3"
+  DOCKER_IMG_NAME="tensorflow/tensorflow:nightly-py3"
 else
-  DOCKER_IMG_NAME="tensorflow/tensorflow:1.4.0"
+  DOCKER_IMG_NAME="tensorflow/tensorflow:nightly"
  if [ "$PYTHON_VERSION" != "PYTHON2" ]; then
    echo "WARNING: Python version was not specified. Using Python2 by default."
    sleep 5
@@ -56,6 +56,9 @@ fi
 # Specify which test is to be run
 COMMAND="./official/testing/test_models.sh"
+# Check the recency of the desired image
+${DOCKER_BINARY} pull ${DOCKER_IMG_NAME}
 # RUN
 ${DOCKER_BINARY} run \
    -v ${WORKSPACE}:/workspace \

--- a/research/cognitive_mapping_and_planning/src/depth_utils.py
+++ b/research/cognitive_mapping_and_planning/src/depth_utils.py
@@ -16,7 +16,8 @@
 """Utilities for processing depth images.
 """
 import numpy as np
-import src.rotation_utils as ru 
+import src.rotation_utils as ru
+import src.utils as utils
 def get_camera_matrix(width, height, fov):
  """Returns a camera matrix from image size and fov."""

--- a/research/cognitive_mapping_and_planning/src/file_utils.py
+++ b/research/cognitive_mapping_and_planning/src/file_utils.py
@@ -16,6 +16,7 @@
 """Utilities for manipulating files.
 """
 import os
+import numpy as np
 import PIL
 from tensorflow.python.platform import gfile
 import cv2
@@ -33,7 +34,7 @@ def write_image(image_path, rgb):
    f.write(img_str)
 def read_image(image_path, type='rgb'):
-  with fopen(file_name, 'r') as f:
+  with fopen(image_path, 'r') as f:
    I = PIL.Image.open(f)
    II = np.array(I)
    if type == 'rgb':

--- a/research/cognitive_mapping_and_planning/src/graph_utils.py
+++ b/research/cognitive_mapping_and_planning/src/graph_utils.py
@@ -19,19 +19,21 @@ import skimage.morphology
 import numpy as np
 import networkx as nx
 import itertools
+import logging
+from datasets.nav_env import get_path_ids
 import graph_tool as gt
 import graph_tool.topology
-import graph_tool.generation 
+import graph_tool.generation
 import src.utils as utils
 # Compute shortest path from all nodes to or from all source nodes
 def get_distance_node_list(gtG, source_nodes, direction, weights=None):
  gtG_ = gt.Graph(gtG)
  v = gtG_.add_vertex()
  if weights is not None:
    weights = gtG_.edge_properties[weights]
  for s in source_nodes:
    e = gtG_.add_edge(s, int(v))
    if weights is not None:
@@ -109,12 +111,12 @@ def convert_traversible_to_graph(traversible, ff_cost=1., fo_cost=1.,
  for i, e in enumerate(g.edges()):
    edge_wts[e] = edge_wts[e] * wts[i]
  # d = edge_wts.get_array()*1.
-  # edge_wts.get_array()[:] = d*wts 
+  # edge_wts.get_array()[:] = d*wts
  return g, nodes
 def label_nodes_with_class(nodes_xyt, class_maps, pix):
  """
-  Returns: 
+  Returns:
    class_maps__: one-hot class_map for each class.
    node_class_label: one-hot class_map for each class, nodes_xyt.shape[0] x n_classes
  """
@@ -136,7 +138,7 @@ def label_nodes_with_class(nodes_xyt, class_maps, pix):
  class_maps_one_hot = np.zeros(class_maps.shape, dtype=np.bool)
  node_class_label_one_hot = np.zeros((node_class_label.shape[0], class_maps.shape[2]), dtype=np.bool)
  for i in range(class_maps.shape[2]):
-    class_maps_one_hot[:,:,i] = class_maps__ == i 
+    class_maps_one_hot[:,:,i] = class_maps__ == i
    node_class_label_one_hot[:,i] = node_class_label == i
  return class_maps_one_hot, node_class_label_one_hot
@@ -467,7 +469,7 @@ def rng_next_goal(start_node_ids, batch_size, gtG, rng, max_dist,
    if compute_path:
      path = get_path_ids(start_node_ids[i], end_node_ids[i], pred_map)
    paths.append(path)
  return start_node_ids, end_node_ids, dists, pred_maps, paths

--- a/research/cognitive_mapping_and_planning/src/map_utils.py
+++ b/research/cognitive_mapping_and_planning/src/map_utils.py
@@ -17,6 +17,7 @@
 """
 import copy
 import skimage.morphology
+import logging
 import numpy as np
 import scipy.ndimage
 import matplotlib.pyplot as plt

--- a/research/differential_privacy/README.md
+++ b/research/differential_privacy/README.md
 <font size=4><b>Deep Learning with Differential Privacy</b></font>
-Open Sourced By: Xin Pan (xpan@google.com, github: panyx0718)
+Open Sourced By: Xin Pan
 ### Introduction for [dp_sgd/README.md](dp_sgd/README.md)

--- a/research/inception/inception/data/build_imagenet_data.py
+++ b/research/inception/inception/data/build_imagenet_data.py
@@ -93,6 +93,7 @@ import sys
 import threading
 import numpy as np
+import six
 import tensorflow as tf
 tf.app.flags.DEFINE_string('train_directory', '/tmp/',
@@ -170,6 +171,8 @@ def _float_feature(value):
 def _bytes_feature(value):
  """Wrapper for inserting bytes features into Example proto."""
+  if isinstance(value, six.string_types):           
+    value = six.binary_type(value, encoding='utf-8') 
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
@@ -312,7 +315,7 @@ def _process_image(filename, coder):
    width: integer, image width in pixels.
  """
  # Read the image file.
-  with tf.gfile.FastGFile(filename, 'r') as f:
+  with tf.gfile.FastGFile(filename, 'rb') as f:
    image_data = f.read()
  # Clean the dirty data.

--- a/research/lm_1b/README.md
+++ b/research/lm_1b/README.md
@@ -3,7 +3,7 @@
 <b>Authors:</b>
 Oriol Vinyals (vinyals@google.com, github: OriolVinyals),
-Xin Pan (xpan@google.com, github: panyx0718)
+Xin Pan
 <b>Paper Authors:</b>

--- a/research/next_frame_prediction/README.md
+++ b/research/next_frame_prediction/README.md
@@ -8,7 +8,7 @@ This is an implementation based on my understanding, with small
 variations. It doesn't necessarily represents the paper published
 by the original authors.
-Authors: Xin Pan (Github: panyx0718), Anelia Angelova
+Authors: Xin Pan, Anelia Angelova
 <b>Results:</b>

--- a/research/resnet/README.md
+++ b/research/resnet/README.md
 <font size=4><b>Reproduced ResNet on CIFAR-10 and CIFAR-100 dataset.</b></font>
-contact: panyx0718 (xpan@google.com)
+Xin Pan
 <b>Dataset:</b>