Merge branch 'master' into undefined-v-in-python3

70629555 · cclauss · GitHub · 459f6ec1 · 31adae53 · 70629555
Unverified Commit 70629555 authored Feb 02, 2018 by cclauss Committed by GitHub Feb 02, 2018
13 changed files
--- a/official/mnist/mnist.py
+++ b/official/mnist/mnist.py
@@ -122,7 +122,7 @@ def model_fn(features, labels, mode, params):
        eval_metric_ops={
            'accuracy':
                tf.metrics.accuracy(
-                    labels=tf.argmax(labels, axis=1),
+                    labels=labels,
                    predictions=tf.argmax(logits, axis=1)),
        })


--- a/official/resnet/cifar10_main.py
+++ b/official/resnet/cifar10_main.py
@@ -18,63 +18,33 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import argparse
 import os
 import sys

 import tensorflow as tf

 import resnet_model
-
-parser = argparse.ArgumentParser()
-
-# Basic model parameters.
-parser.add_argument('--data_dir', type=str, default='/tmp/cifar10_data',
-                    help='The path to the CIFAR-10 data directory.')
-
-parser.add_argument('--model_dir', type=str, default='/tmp/cifar10_model',
-                    help='The directory where the model will be stored.')
-
-parser.add_argument('--resnet_size', type=int, default=32,
-                    help='The size of the ResNet model to use.')
-
-parser.add_argument('--train_epochs', type=int, default=250,
-                    help='The number of epochs to train.')
-
-parser.add_argument('--epochs_per_eval', type=int, default=10,
-                    help='The number of epochs to run in between evaluations.')
-
-parser.add_argument('--batch_size', type=int, default=128,
-                    help='The number of images per batch.')
-
-parser.add_argument(
-    '--data_format', type=str, default=None,
-    choices=['channels_first', 'channels_last'],
-    help='A flag to override the data format used in the model. channels_first '
-         'provides a performance boost on GPU but is not always compatible '
-         'with CPU. If left unspecified, the data format will be chosen '
-         'automatically based on whether TensorFlow was built for CPU or GPU.')
+import resnet_shared

 _HEIGHT = 32
 _WIDTH = 32
-_DEPTH = 3
+_NUM_CHANNELS = 3
+_DEFAULT_IMAGE_BYTES = _HEIGHT * _WIDTH * _NUM_CHANNELS
 _NUM_CLASSES = 10
 _NUM_DATA_FILES = 5

-# We use a weight decay of 0.0002, which performs better than the 0.0001 that
-# was originally suggested.
-_WEIGHT_DECAY = 2e-4
-_MOMENTUM = 0.9
-
 _NUM_IMAGES = {
    'train': 50000,
    'validation': 10000,
 }


+###############################################################################
+# Data processing
+###############################################################################
 def record_dataset(filenames):
  """Returns an input pipeline Dataset from `filenames`."""
-  record_bytes = _HEIGHT * _WIDTH * _DEPTH + 1
+  record_bytes = _DEFAULT_IMAGE_BYTES + 1
  return tf.data.FixedLengthRecordDataset(filenames, record_bytes)


@@ -100,8 +70,7 @@ def parse_record(raw_record):
  # Every record consists of a label followed by the image, with a fixed number
  # of bytes for each.
  label_bytes = 1
-  image_bytes = _HEIGHT * _WIDTH * _DEPTH
-  record_bytes = label_bytes + image_bytes
+  record_bytes = label_bytes + _DEFAULT_IMAGE_BYTES

  # Convert bytes to a vector of uint8 that is record_bytes long.
  record_vector = tf.decode_raw(raw_record, tf.uint8)
@@ -113,8 +82,8 @@ def parse_record(raw_record):

  # The remaining bytes after the label represent the image, which we reshape
  # from [depth * height * width] to [depth, height, width].
-  depth_major = tf.reshape(
-      record_vector[label_bytes:record_bytes], [_DEPTH, _HEIGHT, _WIDTH])
+  depth_major = tf.reshape(record_vector[label_bytes:record_bytes],
+                           [_NUM_CHANNELS, _HEIGHT, _WIDTH])

  # Convert from [depth, height, width] to [height, width, depth], and cast as
  # float32.
@@ -131,7 +100,7 @@ def preprocess_image(image, is_training):
        image, _HEIGHT + 8, _WIDTH + 8)

    # Randomly crop a [_HEIGHT, _WIDTH] section of the image.
-    image = tf.random_crop(image, [_HEIGHT, _WIDTH, _DEPTH])
+    image = tf.random_crop(image, [_HEIGHT, _WIDTH, _NUM_CHANNELS])

    # Randomly flip the image horizontally.
    image = tf.image.random_flip_left_right(image)
@@ -180,116 +149,81 @@ def input_fn(is_training, data_dir, batch_size, num_epochs=1):
  return images, labels


-def cifar10_model_fn(features, labels, mode, params):
-  """Model function for CIFAR-10."""
-  tf.summary.image('images', features, max_outputs=6)
-
-  network = resnet_model.cifar10_resnet_v2_generator(
-      params['resnet_size'], _NUM_CLASSES, params['data_format'])
-
-  inputs = tf.reshape(features, [-1, _HEIGHT, _WIDTH, _DEPTH])
-  logits = network(inputs, mode == tf.estimator.ModeKeys.TRAIN)
-
-  predictions = {
-      'classes': tf.argmax(logits, axis=1),
-      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
-  }
-
-  if mode == tf.estimator.ModeKeys.PREDICT:
-    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-  # Calculate loss, which includes softmax cross entropy and L2 regularization.
-  cross_entropy = tf.losses.softmax_cross_entropy(
-      logits=logits, onehot_labels=labels)
-
-  # Create a tensor named cross_entropy for logging purposes.
-  tf.identity(cross_entropy, name='cross_entropy')
-  tf.summary.scalar('cross_entropy', cross_entropy)
-
-  # Add weight decay to the loss.
-  loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
-      [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
-
-  if mode == tf.estimator.ModeKeys.TRAIN:
-    # Scale the learning rate linearly with the batch size. When the batch size
-    # is 128, the learning rate should be 0.1.
-    initial_learning_rate = 0.1 * params['batch_size'] / 128
-    batches_per_epoch = _NUM_IMAGES['train'] / params['batch_size']
-    global_step = tf.train.get_or_create_global_step()
-
-    # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
-    boundaries = [int(batches_per_epoch * epoch) for epoch in [100, 150, 200]]
-    values = [initial_learning_rate * decay for decay in [1, 0.1, 0.01, 0.001]]
-    learning_rate = tf.train.piecewise_constant(
-        tf.cast(global_step, tf.int32), boundaries, values)
-
-    # Create a tensor named learning_rate for logging purposes
-    tf.identity(learning_rate, name='learning_rate')
-    tf.summary.scalar('learning_rate', learning_rate)
-
-    optimizer = tf.train.MomentumOptimizer(
-        learning_rate=learning_rate,
-        momentum=_MOMENTUM)
-
-    # Batch norm requires update ops to be added as a dependency to the train_op
-    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    with tf.control_dependencies(update_ops):
-      train_op = optimizer.minimize(loss, global_step)
-  else:
-    train_op = None
-
-  accuracy = tf.metrics.accuracy(
-      tf.argmax(labels, axis=1), predictions['classes'])
-  metrics = {'accuracy': accuracy}
+###############################################################################
+# Running the model
+###############################################################################
+class Cifar10Model(resnet_model.Model):
+
+  def __init__(self, resnet_size, data_format=None):
+    """These are the parameters that work for CIFAR-10 data.
+    """
+    if resnet_size % 6 != 2:
+      raise ValueError('resnet_size must be 6n + 2:', resnet_size)
+
+    num_blocks = (resnet_size - 2) // 6
+
+    super(Cifar10Model, self).__init__(
+        resnet_size=resnet_size,
+        num_classes=_NUM_CLASSES,
+        num_filters=16,
+        kernel_size=3,
+        conv_stride=1,
+        first_pool_size=None,
+        first_pool_stride=None,
+        second_pool_size=8,
+        second_pool_stride=1,
+        block_fn=resnet_model.building_block,
+        block_sizes=[num_blocks] * 3,
+        block_strides=[1, 2, 2],
+        final_size=64,
+        data_format=data_format)

-  # Create a tensor named train_accuracy for logging purposes
-  tf.identity(accuracy[1], name='train_accuracy')
-  tf.summary.scalar('train_accuracy', accuracy[1])

-  return tf.estimator.EstimatorSpec(
-      mode=mode,
-      predictions=predictions,
-      loss=loss,
-      train_op=train_op,
-      eval_metric_ops=metrics)
+def cifar10_model_fn(features, labels, mode, params):
+  """Model function for CIFAR-10."""
+  features = tf.reshape(features, [-1, _HEIGHT, _WIDTH, _NUM_CHANNELS])
+
+  learning_rate_fn = resnet_shared.learning_rate_with_decay(
+      batch_size=params['batch_size'], batch_denom=128,
+      num_images=_NUM_IMAGES['train'], boundary_epochs=[100, 150, 200],
+      decay_rates=[1, 0.1, 0.01, 0.001])
+
+  # We use a weight decay of 0.0002, which performs better
+  # than the 0.0001 that was originally suggested.
+  weight_decay = 2e-4
+
+  # Empirical testing showed that including batch_normalization variables
+  # in the calculation of regularized loss helped validation accuracy
+  # for the CIFAR-10 dataset, perhaps because the regularization prevents
+  # overfitting on the small data set. We therefore include all vars when
+  # regularizing and computing loss during training.
+  def loss_filter_fn(name):
+    return True
+
+  return resnet_shared.resnet_model_fn(features, labels, mode, Cifar10Model,
+                                       resnet_size=params['resnet_size'],
+                                       weight_decay=weight_decay,
+                                       learning_rate_fn=learning_rate_fn,
+                                       momentum=0.9,
+                                       data_format=params['data_format'],
+                                       loss_filter_fn=loss_filter_fn)


 def main(unused_argv):
-  # Using the Winograd non-fused algorithms provides a small performance boost.
-  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
-
-  # Set up a RunConfig to only save checkpoints once per training cycle.
-  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9)
-  cifar_classifier = tf.estimator.Estimator(
-      model_fn=cifar10_model_fn, model_dir=FLAGS.model_dir, config=run_config,
-      params={
-          'resnet_size': FLAGS.resnet_size,
-          'data_format': FLAGS.data_format,
-          'batch_size': FLAGS.batch_size,
-      })
-
-  for _ in range(FLAGS.train_epochs // FLAGS.epochs_per_eval):
-    tensors_to_log = {
-        'learning_rate': 'learning_rate',
-        'cross_entropy': 'cross_entropy',
-        'train_accuracy': 'train_accuracy'
-    }
-
-    logging_hook = tf.train.LoggingTensorHook(
-        tensors=tensors_to_log, every_n_iter=100)
-
-    cifar_classifier.train(
-        input_fn=lambda: input_fn(
-            True, FLAGS.data_dir, FLAGS.batch_size, FLAGS.epochs_per_eval),
-        hooks=[logging_hook])
-
-    # Evaluate the model and print results
-    eval_results = cifar_classifier.evaluate(
-        input_fn=lambda: input_fn(False, FLAGS.data_dir, FLAGS.batch_size))
-    print(eval_results)
+  resnet_shared.resnet_main(FLAGS, cifar10_model_fn, input_fn)


 if __name__ == '__main__':
  tf.logging.set_verbosity(tf.logging.INFO)
+
+  parser = resnet_shared.ResnetArgParser()
+  # Set defaults that are reasonable for this model.
+  parser.set_defaults(data_dir='/tmp/cifar10_data',
+                      model_dir='/tmp/cifar10_model',
+                      resnet_size=32,
+                      train_epochs=250,
+                      epochs_per_eval=10,
+                      batch_size=128)
+
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(argv=[sys.argv[0]] + unparsed)
--- a/official/resnet/imagenet_main.py
+++ b/official/resnet/imagenet_main.py
@@ -18,55 +18,18 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function

-import argparse
 import os
 import sys

 import tensorflow as tf

 import resnet_model
+import resnet_shared
 import vgg_preprocessing

-parser = argparse.ArgumentParser()
-
-parser.add_argument(
-    '--data_dir', type=str, default='',
-    help='The directory where the ImageNet input data is stored.')
-
-parser.add_argument(
-    '--model_dir', type=str, default='/tmp/resnet_model',
-    help='The directory where the model will be stored.')
-
-parser.add_argument(
-    '--resnet_size', type=int, default=50, choices=[18, 34, 50, 101, 152, 200],
-    help='The size of the ResNet model to use.')
-
-parser.add_argument(
-    '--train_epochs', type=int, default=100,
-    help='The number of epochs to use for training.')
-
-parser.add_argument(
-    '--epochs_per_eval', type=int, default=1,
-    help='The number of training epochs to run between evaluations.')
-
-parser.add_argument(
-    '--batch_size', type=int, default=32,
-    help='Batch size for training and evaluation.')
-
-parser.add_argument(
-    '--data_format', type=str, default=None,
-    choices=['channels_first', 'channels_last'],
-    help='A flag to override the data format used in the model. channels_first '
-         'provides a performance boost on GPU but is not always compatible '
-         'with CPU. If left unspecified, the data format will be chosen '
-         'automatically based on whether TensorFlow was built for CPU or GPU.')
-
 _DEFAULT_IMAGE_SIZE = 224
 _NUM_CHANNELS = 3
-_LABEL_CLASSES = 1001
-
-_MOMENTUM = 0.9
-_WEIGHT_DECAY = 1e-4
+_NUM_CLASSES = 1001

 _NUM_IMAGES = {
    'train': 1281167,
@@ -77,6 +40,9 @@ _FILE_SHUFFLE_BUFFER = 1024
 _SHUFFLE_BUFFER = 1500


+###############################################################################
+# Data processing
+###############################################################################
 def filenames(is_training, data_dir):
  """Return filenames for dataset."""
  if is_training:
@@ -89,7 +55,7 @@ def filenames(is_training, data_dir):
        for i in range(128)]


-def record_parser(value, is_training):
+def parse_record(raw_record, is_training):
  """Parse an ImageNet record from `value`."""
  keys_to_features = {
      'image/encoded':
@@ -112,7 +78,7 @@ def record_parser(value, is_training):
          tf.VarLenFeature(dtype=tf.int64),
  }

-  parsed = tf.parse_single_example(value, keys_to_features)
+  parsed = tf.parse_single_example(raw_record, keys_to_features)

  image = tf.image.decode_image(
      tf.reshape(parsed['image/encoded'], shape=[]),
@@ -129,18 +95,19 @@ def record_parser(value, is_training):
      tf.reshape(parsed['image/class/label'], shape=[]),
      dtype=tf.int32)

-  return image, tf.one_hot(label, _LABEL_CLASSES)
+  return image, tf.one_hot(label, _NUM_CLASSES)


 def input_fn(is_training, data_dir, batch_size, num_epochs=1):
  """Input function which provides batches for train or eval."""
-  dataset = tf.data.Dataset.from_tensor_slices(filenames(is_training, data_dir))
+  dataset = tf.data.Dataset.from_tensor_slices(
+      filenames(is_training, data_dir))

  if is_training:
    dataset = dataset.shuffle(buffer_size=_FILE_SHUFFLE_BUFFER)

  dataset = dataset.flat_map(tf.data.TFRecordDataset)
-  dataset = dataset.map(lambda value: record_parser(value, is_training),
+  dataset = dataset.map(lambda value: parse_record(value, is_training),
                        num_parallel_calls=5)
  dataset = dataset.prefetch(batch_size)

@@ -159,120 +126,86 @@ def input_fn(is_training, data_dir, batch_size, num_epochs=1):
  return images, labels


-def resnet_model_fn(features, labels, mode, params):
-  """Our model_fn for ResNet to be used with our Estimator."""
-  tf.summary.image('images', features, max_outputs=6)
-
-  network = resnet_model.imagenet_resnet_v2(
-      params['resnet_size'], _LABEL_CLASSES, params['data_format'])
-  logits = network(
-      inputs=features, is_training=(mode == tf.estimator.ModeKeys.TRAIN))
-
-  predictions = {
-      'classes': tf.argmax(logits, axis=1),
-      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
+###############################################################################
+# Running the model
+###############################################################################
+class ImagenetModel(resnet_model.Model):
+  def __init__(self, resnet_size, data_format=None):
+    """These are the parameters that work for Imagenet data.
+    """
+
+    # For bigger models, we want to use "bottleneck" layers
+    if resnet_size < 50:
+      block_fn = resnet_model.building_block
+      final_size = 512
+    else:
+      block_fn = resnet_model.bottleneck_block
+      final_size = 2048
+
+    super(ImagenetModel, self).__init__(
+        resnet_size=resnet_size,
+        num_classes=_NUM_CLASSES,
+        num_filters=64,
+        kernel_size=7,
+        conv_stride=2,
+        first_pool_size=3,
+        first_pool_stride=2,
+        second_pool_size=7,
+        second_pool_stride=1,
+        block_fn=block_fn,
+        block_sizes=_get_block_sizes(resnet_size),
+        block_strides=[1, 2, 2, 2],
+        final_size=final_size,
+        data_format=data_format)
+
+
+def _get_block_sizes(resnet_size):
+  """The number of block layers used for the Resnet model varies according
+  to the size of the model. This helper grabs the layer set we want, throwing
+  an error if a non-standard size has been selected.
+  """
+  choices = {
+      18: [2, 2, 2, 2],
+      34: [3, 4, 6, 3],
+      50: [3, 4, 6, 3],
+      101: [3, 4, 23, 3],
+      152: [3, 8, 36, 3],
+      200: [3, 24, 36, 3]
  }

-  if mode == tf.estimator.ModeKeys.PREDICT:
-    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
-
-  # Calculate loss, which includes softmax cross entropy and L2 regularization.
-  cross_entropy = tf.losses.softmax_cross_entropy(
-      logits=logits, onehot_labels=labels)
-
-  # Create a tensor named cross_entropy for logging purposes.
-  tf.identity(cross_entropy, name='cross_entropy')
-  tf.summary.scalar('cross_entropy', cross_entropy)
-
-  # Add weight decay to the loss. We exclude the batch norm variables because
-  # doing so leads to a small improvement in accuracy.
-  loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
-      [tf.nn.l2_loss(v) for v in tf.trainable_variables()
-       if 'batch_normalization' not in v.name])
-
-  if mode == tf.estimator.ModeKeys.TRAIN:
-    # Scale the learning rate linearly with the batch size. When the batch size
-    # is 256, the learning rate should be 0.1.
-    initial_learning_rate = 0.1 * params['batch_size'] / 256
-    batches_per_epoch = _NUM_IMAGES['train'] / params['batch_size']
-    global_step = tf.train.get_or_create_global_step()
-
-    # Multiply the learning rate by 0.1 at 30, 60, 80, and 90 epochs.
-    boundaries = [
-        int(batches_per_epoch * epoch) for epoch in [30, 60, 80, 90]]
-    values = [
-        initial_learning_rate * decay for decay in [1, 0.1, 0.01, 1e-3, 1e-4]]
-    learning_rate = tf.train.piecewise_constant(
-        tf.cast(global_step, tf.int32), boundaries, values)
-
-    # Create a tensor named learning_rate for logging purposes.
-    tf.identity(learning_rate, name='learning_rate')
-    tf.summary.scalar('learning_rate', learning_rate)
-
-    optimizer = tf.train.MomentumOptimizer(
-        learning_rate=learning_rate,
-        momentum=_MOMENTUM)
-
-    # Batch norm requires update_ops to be added as a train_op dependency.
-    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    with tf.control_dependencies(update_ops):
-      train_op = optimizer.minimize(loss, global_step)
-  else:
-    train_op = None
+  try:
+    return choices[resnet_size]
+  except KeyError:
+    err = ('Could not find layers for selected Resnet size.\n'
+           'Size received: {}; sizes allowed: {}.'.format(
+               resnet_size, choices.keys()))
+    raise ValueError(err)

-  accuracy = tf.metrics.accuracy(
-      tf.argmax(labels, axis=1), predictions['classes'])
-  metrics = {'accuracy': accuracy}

-  # Create a tensor named train_accuracy for logging purposes.
-  tf.identity(accuracy[1], name='train_accuracy')
-  tf.summary.scalar('train_accuracy', accuracy[1])
+def imagenet_model_fn(features, labels, mode, params):
+  """Our model_fn for ResNet to be used with our Estimator."""
+  learning_rate_fn = resnet_shared.learning_rate_with_decay(
+      batch_size=params['batch_size'], batch_denom=256,
+      num_images=_NUM_IMAGES['train'], boundary_epochs=[30, 60, 80, 90],
+      decay_rates=[1, 0.1, 0.01, 0.001, 1e-4])

-  return tf.estimator.EstimatorSpec(
-      mode=mode,
-      predictions=predictions,
-      loss=loss,
-      train_op=train_op,
-      eval_metric_ops=metrics)
+  return resnet_shared.resnet_model_fn(features, labels, mode, ImagenetModel,
+                                       resnet_size=params['resnet_size'],
+                                       weight_decay=1e-4,
+                                       learning_rate_fn=learning_rate_fn,
+                                       momentum=0.9,
+                                       data_format=params['data_format'],
+                                       loss_filter_fn=None)


 def main(unused_argv):
-  # Using the Winograd non-fused algorithms provides a small performance boost.
-  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
-
-  # Set up a RunConfig to only save checkpoints once per training cycle.
-  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9)
-  resnet_classifier = tf.estimator.Estimator(
-      model_fn=resnet_model_fn, model_dir=FLAGS.model_dir, config=run_config,
-      params={
-          'resnet_size': FLAGS.resnet_size,
-          'data_format': FLAGS.data_format,
-          'batch_size': FLAGS.batch_size,
-      })
-
-  for _ in range(FLAGS.train_epochs // FLAGS.epochs_per_eval):
-    tensors_to_log = {
-        'learning_rate': 'learning_rate',
-        'cross_entropy': 'cross_entropy',
-        'train_accuracy': 'train_accuracy'
-    }
-
-    logging_hook = tf.train.LoggingTensorHook(
-        tensors=tensors_to_log, every_n_iter=100)
-
-    print('Starting a training cycle.')
-    resnet_classifier.train(
-        input_fn=lambda: input_fn(
-            True, FLAGS.data_dir, FLAGS.batch_size, FLAGS.epochs_per_eval),
-        hooks=[logging_hook])
-
-    print('Starting to evaluate.')
-    eval_results = resnet_classifier.evaluate(
-        input_fn=lambda: input_fn(False, FLAGS.data_dir, FLAGS.batch_size))
-    print(eval_results)
+  resnet_shared.resnet_main(FLAGS, imagenet_model_fn, input_fn)


 if __name__ == '__main__':
  tf.logging.set_verbosity(tf.logging.INFO)
+
+  parser = resnet_shared.ResnetArgParser(
+      resnet_size_choices=[18, 34, 50, 101, 152, 200])
  FLAGS, unparsed = parser.parse_known_args()
  tf.app.run(argv=[sys.argv[0]] + unparsed)
--- a/official/resnet/imagenet_test.py
+++ b/official/resnet/imagenet_test.py
@@ -22,7 +22,6 @@ import unittest
 import tensorflow as tf

 import imagenet_main
-import resnet_model

 tf.logging.set_verbosity(tf.logging.ERROR)

@@ -35,7 +34,9 @@ class BaseTest(tf.test.TestCase):
  def tensor_shapes_helper(self, resnet_size, with_gpu=False):
    """Checks the tensor shapes after each phase of the ResNet model."""
    def reshape(shape):
-      """Returns the expected dimensions depending on if a GPU is being used."""
+      """Returns the expected dimensions depending on if a
+      GPU is being used.
+      """
      # If a GPU is used for the test, the shape is returned (already in NCHW
      # form). When GPU is not used, the shape is converted to NHWC.
      if with_gpu:
@@ -46,11 +47,11 @@ class BaseTest(tf.test.TestCase):

    with graph.as_default(), self.test_session(
        use_gpu=with_gpu, force_gpu=with_gpu):
-      model = resnet_model.imagenet_resnet_v2(
-          resnet_size, 456,
+      model = imagenet_main.ImagenetModel(
+          resnet_size,
          data_format='channels_first' if with_gpu else 'channels_last')
      inputs = tf.random_uniform([1, 224, 224, 3])
-      output = model(inputs, is_training=True)
+      output = model(inputs, training=True)

      initial_conv = graph.get_tensor_by_name('initial_conv:0')
      max_pool = graph.get_tensor_by_name('initial_max_pool:0')
@@ -79,8 +80,8 @@ class BaseTest(tf.test.TestCase):
        self.assertAllEqual(block_layer4.shape, reshape((1, 2048, 7, 7)))
        self.assertAllEqual(avg_pool.shape, reshape((1, 2048, 1, 1)))

-      self.assertAllEqual(dense.shape, (1, 456))
-      self.assertAllEqual(output.shape, (1, 456))
+      self.assertAllEqual(dense.shape, (1, _LABEL_CLASSES))
+      self.assertAllEqual(output.shape, (1, _LABEL_CLASSES))

  def test_tensor_shapes_resnet_18(self):
    self.tensor_shapes_helper(18)
@@ -140,7 +141,7 @@ class BaseTest(tf.test.TestCase):
    tf.train.create_global_step()

    features, labels = self.input_fn()
-    spec = imagenet_main.resnet_model_fn(
+    spec = imagenet_main.imagenet_model_fn(
        features, labels, mode, {
            'resnet_size': 50,
            'data_format': 'channels_last',

--- a/official/resnet/resnet_model.py
+++ b/official/resnet/resnet_model.py
@@ -38,14 +38,14 @@ _BATCH_NORM_DECAY = 0.997
 _BATCH_NORM_EPSILON = 1e-5


-def batch_norm_relu(inputs, is_training, data_format):
+def batch_norm_relu(inputs, training, data_format):
  """Performs a batch normalization followed by a ReLU."""
  # We set fused=True for a significant performance boost. See
  # https://www.tensorflow.org/performance/performance_guide#common_fused_ops
  inputs = tf.layers.batch_normalization(
      inputs=inputs, axis=1 if data_format == 'channels_first' else 3,
      momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True,
-      scale=True, training=is_training, fused=True)
+      scale=True, training=training, fused=True)
  inputs = tf.nn.relu(inputs)
  return inputs

@@ -91,7 +91,7 @@ def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
      data_format=data_format)


-def building_block(inputs, filters, is_training, projection_shortcut, strides,
+def building_block(inputs, filters, training, projection_shortcut, strides,
                   data_format):
  """Standard building block for residual networks with BN before convolutions.

@@ -99,10 +99,10 @@ def building_block(inputs, filters, is_training, projection_shortcut, strides,
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
    filters: The number of filters for the convolutions.
-    is_training: A Boolean for whether the model is in training or inference
+    training: A Boolean for whether the model is in training or inference
      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts (typically
-      a 1x1 convolution when downsampling the input).
+    projection_shortcut: The function to use for projection shortcuts
+      (typically a 1x1 convolution when downsampling the input).
    strides: The block's stride. If greater than 1, this block will ultimately
      downsample the input.
    data_format: The input format ('channels_last' or 'channels_first').
@@ -111,7 +111,7 @@ def building_block(inputs, filters, is_training, projection_shortcut, strides,
    The output tensor of the block.
  """
  shortcut = inputs
-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)

  # The projection shortcut should come after the first batch norm and ReLU
  # since it performs a 1x1 convolution.
@@ -122,7 +122,7 @@ def building_block(inputs, filters, is_training, projection_shortcut, strides,
      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
      data_format=data_format)

-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)
  inputs = conv2d_fixed_padding(
      inputs=inputs, filters=filters, kernel_size=3, strides=1,
      data_format=data_format)
@@ -130,19 +130,19 @@ def building_block(inputs, filters, is_training, projection_shortcut, strides,
  return inputs + shortcut


-def bottleneck_block(inputs, filters, is_training, projection_shortcut,
+def bottleneck_block(inputs, filters, training, projection_shortcut,
                     strides, data_format):
  """Bottleneck block variant for residual networks with BN before convolutions.

  Args:
    inputs: A tensor of size [batch, channels, height_in, width_in] or
      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the first two convolutions. Note that the
-      third and final convolution will use 4 times as many filters.
-    is_training: A Boolean for whether the model is in training or inference
+    filters: The number of filters for the first two convolutions. Note
+      that the third and final convolution will use 4 times as many filters.
+    training: A Boolean for whether the model is in training or inference
      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts (typically
-      a 1x1 convolution when downsampling the input).
+    projection_shortcut: The function to use for projection shortcuts
+      (typically a 1x1 convolution when downsampling the input).
    strides: The block's stride. If greater than 1, this block will ultimately
      downsample the input.
    data_format: The input format ('channels_last' or 'channels_first').
@@ -151,7 +151,7 @@ def bottleneck_block(inputs, filters, is_training, projection_shortcut,
    The output tensor of the block.
  """
  shortcut = inputs
-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)

  # The projection shortcut should come after the first batch norm and ReLU
  # since it performs a 1x1 convolution.
@@ -162,12 +162,12 @@ def bottleneck_block(inputs, filters, is_training, projection_shortcut,
      inputs=inputs, filters=filters, kernel_size=1, strides=1,
      data_format=data_format)

-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)
  inputs = conv2d_fixed_padding(
      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
      data_format=data_format)

-  inputs = batch_norm_relu(inputs, is_training, data_format)
+  inputs = batch_norm_relu(inputs, training, data_format)
  inputs = conv2d_fixed_padding(
      inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
      data_format=data_format)
@@ -175,7 +175,7 @@ def bottleneck_block(inputs, filters, is_training, projection_shortcut,
  return inputs + shortcut


-def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name,
+def block_layer(inputs, filters, block_fn, blocks, strides, training, name,
                data_format):
  """Creates one layer of blocks for the ResNet model.

@@ -188,7 +188,7 @@ def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name,
    blocks: The number of blocks contained in the layer.
    strides: The stride to use for the first convolution of the layer. If
      greater than 1, this layer will ultimately downsample the input.
-    is_training: Either True or False, whether we are currently training the
+    training: Either True or False, whether we are currently training the
      model. Needed for batch norm.
    name: A string name for the tensor output of the block layer.
    data_format: The input format ('channels_last' or 'channels_first').
@@ -205,162 +205,116 @@ def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name,
        data_format=data_format)

  # Only the first block per block_layer uses projection_shortcut and strides
-  inputs = block_fn(inputs, filters, is_training, projection_shortcut, strides,
+  inputs = block_fn(inputs, filters, training, projection_shortcut, strides,
                    data_format)

  for _ in range(1, blocks):
-    inputs = block_fn(inputs, filters, is_training, None, 1, data_format)
+    inputs = block_fn(inputs, filters, training, None, 1, data_format)

  return tf.identity(inputs, name)


-def cifar10_resnet_v2_generator(resnet_size, num_classes, data_format=None):
-  """Generator for CIFAR-10 ResNet v2 models.
-
-  Args:
-    resnet_size: A single integer for the size of the ResNet model.
-    num_classes: The number of possible classes for image classification.
-    data_format: The input format ('channels_last', 'channels_first', or None).
-      If set to None, the format is dependent on whether a GPU is available.
-
-  Returns:
-    The model function that takes in `inputs` and `is_training` and
-    returns the output tensor of the ResNet model.
-
-  Raises:
-    ValueError: If `resnet_size` is invalid.
+class Model(object):
+  """Base class for building the Resnet v2 Model.
  """
-  if resnet_size % 6 != 2:
-    raise ValueError('resnet_size must be 6n + 2:', resnet_size)
-
-  num_blocks = (resnet_size - 2) // 6

-  if data_format is None:
-    data_format = (
-        'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
-
-  def model(inputs, is_training):
-    """Constructs the ResNet model given the inputs."""
-    if data_format == 'channels_first':
+  def __init__(self, resnet_size, num_classes, num_filters, kernel_size,
+               conv_stride, first_pool_size, first_pool_stride,
+               second_pool_size, second_pool_stride, block_fn, block_sizes,
+               block_strides, final_size, data_format=None):
+    """Creates a model for classifying an image.
+
+    Args:
+      resnet_size: A single integer for the size of the ResNet model.
+      num_classes: The number of classes used as labels.
+      num_filters: The number of filters to use for the first block layer
+        of the model. This number is then doubled for each subsequent block
+        layer.
+      kernel_size: The kernel size to use for convolution.
+      conv_stride: stride size for the initial convolutional layer
+      first_pool_size: Pool size to be used for the first pooling layer.
+        If none, the first pooling layer is skipped.
+      first_pool_stride: stride size for the first pooling layer. Not used
+        if first_pool_size is None.
+      second_pool_size: Pool size to be used for the second pooling layer.
+      second_pool_stride: stride size for the final pooling layer
+      block_fn: Which block layer function should be used? Pass in one of
+        the two functions defined above: building_block or bottleneck_block
+      block_sizes: A list containing n values, where n is the number of sets of
+        block layers desired. Each value should be the number of blocks in the
+        i-th set.
+      block_strides: List of integers representing the desired stride size for
+        each of the sets of block layers. Should be same length as block_sizes.
+      final_size: The expected size of the model after the second pooling.
+      data_format: Input format ('channels_last', 'channels_first', or None).
+        If set to None, the format is dependent on whether a GPU is available.
+    """
+    self.resnet_size = resnet_size
+
+    if not data_format:
+      data_format = (
+          'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
+
+    self.data_format = data_format
+    self.num_classes = num_classes
+    self.num_filters = num_filters
+    self.kernel_size = kernel_size
+    self.conv_stride = conv_stride
+    self.first_pool_size = first_pool_size
+    self.first_pool_stride = first_pool_stride
+    self.second_pool_size = second_pool_size
+    self.second_pool_stride = second_pool_stride
+    self.block_fn = block_fn
+    self.block_sizes = block_sizes
+    self.block_strides = block_strides
+    self.final_size = final_size
+
+  def __call__(self, inputs, training):
+    """Add operations to classify a batch of input images.
+
+    Args:
+      inputs: A Tensor representing a batch of input images.
+      training: A boolean. Set to True to add operations required only when
+        training the classifier.
+
+    Returns:
+      A logits Tensor with shape [<batch_size>, self.num_classes].
+    """
+
+    if self.data_format == 'channels_first':
      # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
      # This provides a large performance boost on GPU. See
      # https://www.tensorflow.org/performance/performance_guide#data_formats
      inputs = tf.transpose(inputs, [0, 3, 1, 2])

    inputs = conv2d_fixed_padding(
-        inputs=inputs, filters=16, kernel_size=3, strides=1,
-        data_format=data_format)
+        inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size,
+        strides=self.conv_stride, data_format=self.data_format)
    inputs = tf.identity(inputs, 'initial_conv')

-    inputs = block_layer(
-        inputs=inputs, filters=16, block_fn=building_block, blocks=num_blocks,
-        strides=1, is_training=is_training, name='block_layer1',
-        data_format=data_format)
-    inputs = block_layer(
-        inputs=inputs, filters=32, block_fn=building_block, blocks=num_blocks,
-        strides=2, is_training=is_training, name='block_layer2',
-        data_format=data_format)
-    inputs = block_layer(
-        inputs=inputs, filters=64, block_fn=building_block, blocks=num_blocks,
-        strides=2, is_training=is_training, name='block_layer3',
-        data_format=data_format)
-
-    inputs = batch_norm_relu(inputs, is_training, data_format)
+    if self.first_pool_size:
+      inputs = tf.layers.max_pooling2d(
+          inputs=inputs, pool_size=self.first_pool_size,
+          strides=self.first_pool_stride, padding='SAME',
+          data_format=self.data_format)
+      inputs = tf.identity(inputs, 'initial_max_pool')
+
+    for i, num_blocks in enumerate(self.block_sizes):
+      num_filters = self.num_filters * (2**i)
+      inputs = block_layer(
+          inputs=inputs, filters=num_filters, block_fn=self.block_fn,
+          blocks=num_blocks, strides=self.block_strides[i],
+          training=training, name='block_layer{}'.format(i + 1),
+          data_format=self.data_format)
+
+    inputs = batch_norm_relu(inputs, training, self.data_format)
    inputs = tf.layers.average_pooling2d(
-        inputs=inputs, pool_size=8, strides=1, padding='VALID',
-        data_format=data_format)
+        inputs=inputs, pool_size=self.second_pool_size,
+        strides=self.second_pool_stride, padding='VALID',
+        data_format=self.data_format)
    inputs = tf.identity(inputs, 'final_avg_pool')
-    inputs = tf.reshape(inputs, [-1, 64])
-    inputs = tf.layers.dense(inputs=inputs, units=num_classes)
-    inputs = tf.identity(inputs, 'final_dense')
-    return inputs
-
-  return model
-
-
-def imagenet_resnet_v2_generator(block_fn, layers, num_classes,
-                                 data_format=None):
-  """Generator for ImageNet ResNet v2 models.
-
-  Args:
-    block_fn: The block to use within the model, either `building_block` or
-      `bottleneck_block`.
-    layers: A length-4 array denoting the number of blocks to include in each
-      layer. Each layer consists of blocks that take inputs of the same size.
-    num_classes: The number of possible classes for image classification.
-    data_format: The input format ('channels_last', 'channels_first', or None).
-      If set to None, the format is dependent on whether a GPU is available.
-
-  Returns:
-    The model function that takes in `inputs` and `is_training` and
-    returns the output tensor of the ResNet model.
-  """
-  if data_format is None:
-    data_format = (
-        'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
-
-  def model(inputs, is_training):
-    """Constructs the ResNet model given the inputs."""
-    if data_format == 'channels_first':
-      # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
-      # This provides a large performance boost on GPU. See
-      # https://www.tensorflow.org/performance/performance_guide#data_formats
-      inputs = tf.transpose(inputs, [0, 3, 1, 2])
-
-    inputs = conv2d_fixed_padding(
-        inputs=inputs, filters=64, kernel_size=7, strides=2,
-        data_format=data_format)
-    inputs = tf.identity(inputs, 'initial_conv')
-    inputs = tf.layers.max_pooling2d(
-        inputs=inputs, pool_size=3, strides=2, padding='SAME',
-        data_format=data_format)
-    inputs = tf.identity(inputs, 'initial_max_pool')
-
-    inputs = block_layer(
-        inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0],
-        strides=1, is_training=is_training, name='block_layer1',
-        data_format=data_format)
-    inputs = block_layer(
-        inputs=inputs, filters=128, block_fn=block_fn, blocks=layers[1],
-        strides=2, is_training=is_training, name='block_layer2',
-        data_format=data_format)
-    inputs = block_layer(
-        inputs=inputs, filters=256, block_fn=block_fn, blocks=layers[2],
-        strides=2, is_training=is_training, name='block_layer3',
-        data_format=data_format)
-    inputs = block_layer(
-        inputs=inputs, filters=512, block_fn=block_fn, blocks=layers[3],
-        strides=2, is_training=is_training, name='block_layer4',
-        data_format=data_format)

-    inputs = batch_norm_relu(inputs, is_training, data_format)
-    inputs = tf.layers.average_pooling2d(
-        inputs=inputs, pool_size=7, strides=1, padding='VALID',
-        data_format=data_format)
-    inputs = tf.identity(inputs, 'final_avg_pool')
-    inputs = tf.reshape(inputs,
-                        [-1, 512 if block_fn is building_block else 2048])
-    inputs = tf.layers.dense(inputs=inputs, units=num_classes)
+    inputs = tf.reshape(inputs, [-1, self.final_size])
+    inputs = tf.layers.dense(inputs=inputs, units=self.num_classes)
    inputs = tf.identity(inputs, 'final_dense')
    return inputs
-
-  return model
-
-
-def imagenet_resnet_v2(resnet_size, num_classes, data_format=None):
-  """Returns the ResNet model for a given size and number of output classes."""
-  model_params = {
-      18: {'block': building_block, 'layers': [2, 2, 2, 2]},
-      34: {'block': building_block, 'layers': [3, 4, 6, 3]},
-      50: {'block': bottleneck_block, 'layers': [3, 4, 6, 3]},
-      101: {'block': bottleneck_block, 'layers': [3, 4, 23, 3]},
-      152: {'block': bottleneck_block, 'layers': [3, 8, 36, 3]},
-      200: {'block': bottleneck_block, 'layers': [3, 24, 36, 3]}
-  }
-
-  if resnet_size not in model_params:
-    raise ValueError('Not a valid resnet_size:', resnet_size)
-
-  params = model_params[resnet_size]
-  return imagenet_resnet_v2_generator(
-      params['block'], params['layers'], num_classes, data_format)
--- a/official/resnet/resnet_shared.py
+++ b/official/resnet/resnet_shared.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions for running Resnet that are shared across datasets."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+import os
+
+import tensorflow as tf
+
+
+def learning_rate_with_decay(
+    batch_size, batch_denom, num_images, boundary_epochs, decay_rates):
+  """Get a learning rate that decays step-wise as training progresses.
+
+  Args:
+    batch_size: the number of examples processed in each training batch.
+    batch_denom: this value will be used to scale the base learning rate.
+      `0.1 * batch size` is divided by this number, such that when
+      batch_denom == batch_size, the initial learning rate will be 0.1.
+    num_images: total number of images that will be used for training.
+    boundary_epochs: list of ints representing the epochs at which we
+      decay the learning rate.
+    decay_rates: list of floats representing the decay rates to be used
+      for scaling the learning rate. Should be the same length as
+      boundary_epochs.
+
+  Returns:
+    Returns a function that takes a single argument - the number of batches
+    trained so far (global_step)- and returns the learning rate to be used
+    for training the next batch.
+  """
+  initial_learning_rate = 0.1 * batch_size / batch_denom
+  batches_per_epoch = num_images / batch_size
+
+  # Multiply the learning rate by 0.1 at 100, 150, and 200 epochs.
+  boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs]
+  vals = [initial_learning_rate * decay for decay in decay_rates]
+
+  def learning_rate_fn(global_step):
+    global_step = tf.cast(global_step, tf.int32)
+    return tf.train.piecewise_constant(global_step, boundaries, vals)
+
+  return learning_rate_fn
+
+
+def resnet_model_fn(features, labels, mode, model_class,
+                    resnet_size, weight_decay, learning_rate_fn, momentum,
+                    data_format, loss_filter_fn=None):
+  """Shared functionality for different resnet model_fns.
+
+  Initializes the ResnetModel representing the model layers
+  and uses that model to build the necessary EstimatorSpecs for
+  the `mode` in question. For training, this means building losses,
+  the optimizer, and the train op that get passed into the EstimatorSpec.
+  For evaluation and prediction, the EstimatorSpec is returned without
+  a train op, but with the necessary parameters for the given mode.
+
+  Args:
+    features: tensor representing input images
+    labels: tensor representing class labels for all input images
+    mode: current estimator mode; should be one of
+      `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`
+    model_class: a class representing a TensorFlow model that has a __call__
+      function. We assume here that this is a subclass of ResnetModel.
+    resnet_size: A single integer for the size of the ResNet model.
+    weight_decay: weight decay loss rate used to regularize learned variables.
+    learning_rate_fn: function that returns the current learning rate given
+      the current global_step
+    momentum: momentum term used for optimization
+    data_format: Input format ('channels_last', 'channels_first', or None).
+      If set to None, the format is dependent on whether a GPU is available.
+    loss_filter_fn: function that takes a string variable name and returns
+      True if the var should be included in loss calculation, and False
+      otherwise. If None, batch_normalization variables will be excluded
+      from the loss.
+  Returns:
+    EstimatorSpec parameterized according to the input params and the
+    current mode.
+  """
+
+  # Generate a summary node for the images
+  tf.summary.image('images', features, max_outputs=6)
+
+  model = model_class(resnet_size, data_format)
+  logits = model(features, mode == tf.estimator.ModeKeys.TRAIN)
+
+  predictions = {
+      'classes': tf.argmax(logits, axis=1),
+      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
+  }
+
+  if mode == tf.estimator.ModeKeys.PREDICT:
+    return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
+
+  # Calculate loss, which includes softmax cross entropy and L2 regularization.
+  cross_entropy = tf.losses.softmax_cross_entropy(
+      logits=logits, onehot_labels=labels)
+
+  # Create a tensor named cross_entropy for logging purposes.
+  tf.identity(cross_entropy, name='cross_entropy')
+  tf.summary.scalar('cross_entropy', cross_entropy)
+
+  # If no loss_filter_fn is passed, assume we want the default behavior,
+  # which is that batch_normalization variables are excluded from loss.
+  if not loss_filter_fn:
+    def loss_filter_fn(name):
+      return 'batch_normalization' not in name
+
+  # Add weight decay to the loss.
+  loss = cross_entropy + weight_decay * tf.add_n(
+      [tf.nn.l2_loss(v) for v in tf.trainable_variables()
+       if loss_filter_fn(v.name)])
+
+  if mode == tf.estimator.ModeKeys.TRAIN:
+    global_step = tf.train.get_or_create_global_step()
+
+    learning_rate = learning_rate_fn(global_step)
+
+    # Create a tensor named learning_rate for logging purposes
+    tf.identity(learning_rate, name='learning_rate')
+    tf.summary.scalar('learning_rate', learning_rate)
+
+    optimizer = tf.train.MomentumOptimizer(
+        learning_rate=learning_rate,
+        momentum=momentum)
+
+    # Batch norm requires update ops to be added as a dependency to train_op
+    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+    with tf.control_dependencies(update_ops):
+      train_op = optimizer.minimize(loss, global_step)
+  else:
+    train_op = None
+
+  accuracy = tf.metrics.accuracy(
+      tf.argmax(labels, axis=1), predictions['classes'])
+  metrics = {'accuracy': accuracy}
+
+  # Create a tensor named train_accuracy for logging purposes
+  tf.identity(accuracy[1], name='train_accuracy')
+  tf.summary.scalar('train_accuracy', accuracy[1])
+
+  return tf.estimator.EstimatorSpec(
+      mode=mode,
+      predictions=predictions,
+      loss=loss,
+      train_op=train_op,
+      eval_metric_ops=metrics)
+
+
+def resnet_main(flags, model_function, input_function):
+  # Using the Winograd non-fused algorithms provides a small performance boost.
+  os.environ['TF_ENABLE_WINOGRAD_NONFUSED'] = '1'
+
+  # Set up a RunConfig to only save checkpoints once per training cycle.
+  run_config = tf.estimator.RunConfig().replace(save_checkpoints_secs=1e9)
+  classifier = tf.estimator.Estimator(
+      model_fn=model_function, model_dir=flags.model_dir, config=run_config,
+      params={
+          'resnet_size': flags.resnet_size,
+          'data_format': flags.data_format,
+          'batch_size': flags.batch_size,
+      })
+
+  for _ in range(flags.train_epochs // flags.epochs_per_eval):
+    tensors_to_log = {
+        'learning_rate': 'learning_rate',
+        'cross_entropy': 'cross_entropy',
+        'train_accuracy': 'train_accuracy'
+    }
+
+    logging_hook = tf.train.LoggingTensorHook(
+        tensors=tensors_to_log, every_n_iter=100)
+
+    print('Starting a training cycle.')
+    classifier.train(
+        input_fn=lambda: input_function(
+            True, flags.data_dir, flags.batch_size, flags.epochs_per_eval),
+        hooks=[logging_hook])
+
+    print('Starting to evaluate.')
+    # Evaluate the model and print results
+    eval_results = classifier.evaluate(input_fn=lambda: input_function(
+        False, flags.data_dir, flags.batch_size))
+    print(eval_results)
+
+
+class ResnetArgParser(argparse.ArgumentParser):
+  """Arguments for configuring and running a Resnet Model.
+  """
+
+  def __init__(self, resnet_size_choices=None):
+    super(ResnetArgParser, self).__init__()
+    self.add_argument(
+        '--data_dir', type=str, default='/tmp/resnet_data',
+        help='The directory where the input data is stored.')
+
+    self.add_argument(
+        '--model_dir', type=str, default='/tmp/resnet_model',
+        help='The directory where the model will be stored.')
+
+    self.add_argument(
+        '--resnet_size', type=int, default=50,
+        choices=resnet_size_choices,
+        help='The size of the ResNet model to use.')
+
+    self.add_argument(
+        '--train_epochs', type=int, default=100,
+        help='The number of epochs to use for training.')
+
+    self.add_argument(
+        '--epochs_per_eval', type=int, default=1,
+        help='The number of training epochs to run between evaluations.')
+
+    self.add_argument(
+        '--batch_size', type=int, default=32,
+        help='Batch size for training and evaluation.')
+
+    self.add_argument(
+        '--data_format', type=str, default=None,
+        choices=['channels_first', 'channels_last'],
+        help='A flag to override the data format used in the model. '
+             'channels_first provides a performance boost on GPU but '
+             'is not always compatible with CPU. If left unspecified, '
+             'the data format will be chosen automatically based on '
+             'whether TensorFlow was built for CPU or GPU.')
--- a/research/differential_privacy/README.md
+++ b/research/differential_privacy/README.md
 <font size=4><b>Deep Learning with Differential Privacy</b></font>

-Open Sourced By: Xin Pan (xpan@google.com, github: panyx0718)
+Open Sourced By: Xin Pan


 ### Introduction for [dp_sgd/README.md](dp_sgd/README.md)

--- a/research/inception/inception/data/build_imagenet_data.py
+++ b/research/inception/inception/data/build_imagenet_data.py
@@ -93,6 +93,7 @@ import sys
 import threading

 import numpy as np
+import six
 import tensorflow as tf

 tf.app.flags.DEFINE_string('train_directory', '/tmp/',

--- a/research/lm_1b/README.md
+++ b/research/lm_1b/README.md
@@ -3,7 +3,7 @@
 <b>Authors:</b>

 Oriol Vinyals (vinyals@google.com, github: OriolVinyals),
-Xin Pan (xpan@google.com, github: panyx0718)
+Xin Pan

 <b>Paper Authors:</b>


--- a/research/next_frame_prediction/README.md
+++ b/research/next_frame_prediction/README.md
@@ -8,7 +8,7 @@ This is an implementation based on my understanding, with small
 variations. It doesn't necessarily represents the paper published
 by the original authors.

-Authors: Xin Pan (Github: panyx0718), Anelia Angelova
+Authors: Xin Pan, Anelia Angelova

 <b>Results:</b>


--- a/research/resnet/README.md
+++ b/research/resnet/README.md
 <font size=4><b>Reproduced ResNet on CIFAR-10 and CIFAR-100 dataset.</b></font>

-contact: panyx0718 (xpan@google.com)
+Xin Pan

 <b>Dataset:</b>


--- a/research/textsum/README.md
+++ b/research/textsum/README.md
@@ -2,7 +2,7 @@ Sequence-to-Sequence with Attention Model for Text Summarization.

 Authors:

-Xin Pan (xpan@google.com, github:panyx0718),
+Xin Pan
 Peter Liu (peterjliu@google.com, github:peterjliu)

 <b>Introduction</b>

--- a/tutorials/image/cifar10/cifar10.py
+++ b/tutorials/image/cifar10/cifar10.py
@@ -204,7 +204,7 @@ def inference(images):
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 3, 64],
                                         stddev=5e-2,
-                                         wd=0.0)
+                                         wd=None)
    conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0))
    pre_activation = tf.nn.bias_add(conv, biases)
@@ -223,7 +223,7 @@ def inference(images):
    kernel = _variable_with_weight_decay('weights',
                                         shape=[5, 5, 64, 64],
                                         stddev=5e-2,
-                                         wd=0.0)
+                                         wd=None)
    conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME')
    biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1))
    pre_activation = tf.nn.bias_add(conv, biases)
@@ -262,7 +262,7 @@ def inference(images):
  # and performs the softmax internally for efficiency.
  with tf.variable_scope('softmax_linear') as scope:
    weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES],
-                                          stddev=1/192.0, wd=0.0)
+                                          stddev=1/192.0, wd=None)
    biases = _variable_on_cpu('biases', [NUM_CLASSES],
                              tf.constant_initializer(0.0))
    softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name)