Remove this r1 folder from the master branch in June, 2020.

PiperOrigin-RevId: 317772122

Remove this r1 folder from the master branch in June, 2020.
PiperOrigin-RevId: 317772122
f9ac9618 · Hongkun Yu · A. Unique TensorFlower · d4f5c193 · d4f5c193 · d4f5c193
Commit f9ac9618 authored Jun 22, 2020 by Hongkun Yu Committed by A. Unique TensorFlower Jun 22, 2020
20 changed files
--- a/official/r1/resnet/cifar10_download_and_extract.py
+++ b/official/r1/resnet/cifar10_download_and_extract.py
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Downloads and extracts the binary version of the CIFAR-10 dataset."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import argparse
-import os
-import sys
-import tarfile
-
-from six.moves import urllib
-import tensorflow as tf
-
-DATA_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
-
-parser = argparse.ArgumentParser()
-
-parser.add_argument(
-    '--data_dir', type=str, default='/tmp/cifar10_data',
-    help='Directory to download data and extract the tarball')
-
-
-def main(_):
-  """Download and extract the tarball from Alex's website."""
-  if not os.path.exists(FLAGS.data_dir):
-    os.makedirs(FLAGS.data_dir)
-
-  filename = DATA_URL.split('/')[-1]
-  filepath = os.path.join(FLAGS.data_dir, filename)
-
-  if not os.path.exists(filepath):
-    def _progress(count, block_size, total_size):
-      sys.stdout.write('\r>> Downloading %s %.1f%%' % (
-          filename, 100.0 * count * block_size / total_size))
-      sys.stdout.flush()
-
-    filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress)
-    print()
-    statinfo = os.stat(filepath)
-    print('Successfully downloaded', filename, statinfo.st_size, 'bytes.')
-
-  tarfile.open(filepath, 'r:gz').extractall(FLAGS.data_dir)
-
-
-if __name__ == '__main__':
-  FLAGS, unparsed = parser.parse_known_args()
-  tf.compat.v1.app.run(argv=[sys.argv[0]] + unparsed)
--- a/official/r1/resnet/cifar10_main.py
+++ b/official/r1/resnet/cifar10_main.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Runs a ResNet model on the CIFAR-10 dataset."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-from absl import app as absl_app
-from absl import flags
-from absl import logging
-from six.moves import range
-import tensorflow as tf
-
-from official.r1.resnet import resnet_model
-from official.r1.resnet import resnet_run_loop
-from official.r1.utils.logs import logger
-from official.utils.flags import core as flags_core
-
-HEIGHT = 32
-WIDTH = 32
-NUM_CHANNELS = 3
-_DEFAULT_IMAGE_BYTES = HEIGHT * WIDTH * NUM_CHANNELS
-# The record is the image plus a one-byte label
-_RECORD_BYTES = _DEFAULT_IMAGE_BYTES + 1
-NUM_CLASSES = 10
-_NUM_DATA_FILES = 5
-
-# TODO(tobyboyd): Change to best practice 45K(train)/5K(val)/10K(test) splits.
-NUM_IMAGES = {
-    'train': 50000,
-    'validation': 10000,
-}
-
-DATASET_NAME = 'CIFAR-10'
-
-
-###############################################################################
-# Data processing
-###############################################################################
-def get_filenames(is_training, data_dir):
-  """Returns a list of filenames."""
-  assert tf.io.gfile.exists(data_dir), (
-      'Run cifar10_download_and_extract.py first to download and extract the '
-      'CIFAR-10 data.')
-
-  if is_training:
-    return [
-        os.path.join(data_dir, 'data_batch_%d.bin' % i)
-        for i in range(1, _NUM_DATA_FILES + 1)
-    ]
-  else:
-    return [os.path.join(data_dir, 'test_batch.bin')]
-
-
-def parse_record(raw_record, is_training, dtype):
-  """Parse CIFAR-10 image and label from a raw record."""
-  # Convert bytes to a vector of uint8 that is record_bytes long.
-  record_vector = tf.io.decode_raw(raw_record, tf.uint8)
-
-  # The first byte represents the label, which we convert from uint8 to int32
-  # and then to one-hot.
-  label = tf.cast(record_vector[0], tf.int32)
-
-  # The remaining bytes after the label represent the image, which we reshape
-  # from [depth * height * width] to [depth, height, width].
-  depth_major = tf.reshape(record_vector[1:_RECORD_BYTES],
-                           [NUM_CHANNELS, HEIGHT, WIDTH])
-
-  # Convert from [depth, height, width] to [height, width, depth], and cast as
-  # float32.
-  image = tf.cast(tf.transpose(a=depth_major, perm=[1, 2, 0]), tf.float32)
-
-  image = preprocess_image(image, is_training)
-  image = tf.cast(image, dtype)
-
-  return image, label
-
-
-def preprocess_image(image, is_training):
-  """Preprocess a single image of layout [height, width, depth]."""
-  if is_training:
-    # Resize the image to add four extra pixels on each side.
-    image = tf.image.resize_with_crop_or_pad(
-        image, HEIGHT + 8, WIDTH + 8)
-
-    # Randomly crop a [HEIGHT, WIDTH] section of the image.
-    image = tf.image.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS])
-
-    # Randomly flip the image horizontally.
-    image = tf.image.random_flip_left_right(image)
-
-  # Subtract off the mean and divide by the variance of the pixels.
-  image = tf.image.per_image_standardization(image)
-  return image
-
-
-def input_fn(is_training,
-             data_dir,
-             batch_size,
-             num_epochs=1,
-             dtype=tf.float32,
-             datasets_num_private_threads=None,
-             parse_record_fn=parse_record,
-             input_context=None,
-             drop_remainder=False):
-  """Input function which provides batches for train or eval.
-
-  Args:
-    is_training: A boolean denoting whether the input is for training.
-    data_dir: The directory containing the input data.
-    batch_size: The number of samples per batch.
-    num_epochs: The number of epochs to repeat the dataset.
-    dtype: Data type to use for images/features
-    datasets_num_private_threads: Number of private threads for tf.data.
-    parse_record_fn: Function to use for parsing the records.
-    input_context: A `tf.distribute.InputContext` object passed in by
-      `tf.distribute.Strategy`.
-    drop_remainder: A boolean indicates whether to drop the remainder of the
-      batches. If True, the batch dimension will be static.
-
-  Returns:
-    A dataset that can be used for iteration.
-  """
-  filenames = get_filenames(is_training, data_dir)
-  dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)
-
-  if input_context:
-    logging.info(
-        'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d',
-        input_context.input_pipeline_id, input_context.num_input_pipelines)
-    dataset = dataset.shard(input_context.num_input_pipelines,
-                            input_context.input_pipeline_id)
-
-  return resnet_run_loop.process_record_dataset(
-      dataset=dataset,
-      is_training=is_training,
-      batch_size=batch_size,
-      shuffle_buffer=NUM_IMAGES['train'],
-      parse_record_fn=parse_record_fn,
-      num_epochs=num_epochs,
-      dtype=dtype,
-      datasets_num_private_threads=datasets_num_private_threads,
-      drop_remainder=drop_remainder
-  )
-
-
-def get_synth_input_fn(dtype):
-  return resnet_run_loop.get_synth_input_fn(
-      HEIGHT, WIDTH, NUM_CHANNELS, NUM_CLASSES, dtype=dtype)
-
-
-###############################################################################
-# Running the model
-###############################################################################
-class Cifar10Model(resnet_model.Model):
-  """Model class with appropriate defaults for CIFAR-10 data."""
-
-  def __init__(self, resnet_size, data_format=None, num_classes=NUM_CLASSES,
-               resnet_version=resnet_model.DEFAULT_VERSION,
-               dtype=resnet_model.DEFAULT_DTYPE):
-    """These are the parameters that work for CIFAR-10 data.
-
-    Args:
-      resnet_size: The number of convolutional layers needed in the model.
-      data_format: Either 'channels_first' or 'channels_last', specifying which
-        data format to use when setting up the model.
-      num_classes: The number of output classes needed from the model. This
-        enables users to extend the same model to their own datasets.
-      resnet_version: Integer representing which version of the ResNet network
-      to use. See README for details. Valid values: [1, 2]
-      dtype: The TensorFlow dtype to use for calculations.
-
-    Raises:
-      ValueError: if invalid resnet_size is chosen
-    """
-    if resnet_size % 6 != 2:
-      raise ValueError('resnet_size must be 6n + 2:', resnet_size)
-
-    num_blocks = (resnet_size - 2) // 6
-
-    super(Cifar10Model, self).__init__(
-        resnet_size=resnet_size,
-        bottleneck=False,
-        num_classes=num_classes,
-        num_filters=16,
-        kernel_size=3,
-        conv_stride=1,
-        first_pool_size=None,
-        first_pool_stride=None,
-        block_sizes=[num_blocks] * 3,
-        block_strides=[1, 2, 2],
-        resnet_version=resnet_version,
-        data_format=data_format,
-        dtype=dtype
-    )
-
-
-def cifar10_model_fn(features, labels, mode, params):
-  """Model function for CIFAR-10."""
-  features = tf.reshape(features, [-1, HEIGHT, WIDTH, NUM_CHANNELS])
-  # Learning rate schedule follows arXiv:1512.03385 for ResNet-56 and under.
-  learning_rate_fn = resnet_run_loop.learning_rate_with_decay(
-      batch_size=params['batch_size'] * params.get('num_workers', 1),
-      batch_denom=128, num_images=NUM_IMAGES['train'],
-      boundary_epochs=[91, 136, 182], decay_rates=[1, 0.1, 0.01, 0.001])
-
-  # Weight decay of 2e-4 diverges from 1e-4 decay used in the ResNet paper
-  # and seems more stable in testing. The difference was nominal for ResNet-56.
-  weight_decay = 2e-4
-
-  # Empirical testing showed that including batch_normalization variables
-  # in the calculation of regularized loss helped validation accuracy
-  # for the CIFAR-10 dataset, perhaps because the regularization prevents
-  # overfitting on the small data set. We therefore include all vars when
-  # regularizing and computing loss during training.
-  def loss_filter_fn(_):
-    return True
-
-  return resnet_run_loop.resnet_model_fn(
-      features=features,
-      labels=labels,
-      mode=mode,
-      model_class=Cifar10Model,
-      resnet_size=params['resnet_size'],
-      weight_decay=weight_decay,
-      learning_rate_fn=learning_rate_fn,
-      momentum=0.9,
-      data_format=params['data_format'],
-      resnet_version=params['resnet_version'],
-      loss_scale=params['loss_scale'],
-      loss_filter_fn=loss_filter_fn,
-      dtype=params['dtype'],
-      fine_tune=params['fine_tune']
-  )
-
-
-def define_cifar_flags():
-  resnet_run_loop.define_resnet_flags()
-  flags.adopt_module_key_flags(resnet_run_loop)
-  flags_core.set_defaults(data_dir='/tmp/cifar10_data/cifar-10-batches-bin',
-                          model_dir='/tmp/cifar10_model',
-                          resnet_size='56',
-                          train_epochs=182,
-                          epochs_between_evals=10,
-                          batch_size=128,
-                          image_bytes_as_serving_input=False)
-
-
-def run_cifar(flags_obj):
-  """Run ResNet CIFAR-10 training and eval loop.
-
-  Args:
-    flags_obj: An object containing parsed flag values.
-
-  Returns:
-    Dictionary of results. Including final accuracy.
-  """
-  if flags_obj.image_bytes_as_serving_input:
-    logging.fatal(
-        '--image_bytes_as_serving_input cannot be set to True for CIFAR. '
-        'This flag is only applicable to ImageNet.')
-    return
-
-  input_function = (flags_obj.use_synthetic_data and
-                    get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
-                    input_fn)
-  result = resnet_run_loop.resnet_main(
-      flags_obj, cifar10_model_fn, input_function, DATASET_NAME,
-      shape=[HEIGHT, WIDTH, NUM_CHANNELS])
-
-  return result
-
-
-def main(_):
-  with logger.benchmark_context(flags.FLAGS):
-    run_cifar(flags.FLAGS)
-
-
-if __name__ == '__main__':
-  logging.set_verbosity(logging.INFO)
-  define_cifar_flags()
-  absl_app.run(main)
--- a/official/r1/resnet/cifar10_test.py
+++ b/official/r1/resnet/cifar10_test.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tempfile import mkstemp
-
-from absl import logging
-import numpy as np
-import tensorflow as tf
-
-from official.r1.resnet import cifar10_main
-from official.utils.testing import integration
-
-logging.set_verbosity(logging.ERROR)
-
-_BATCH_SIZE = 128
-_HEIGHT = 32
-_WIDTH = 32
-_NUM_CHANNELS = 3
-
-
-class BaseTest(tf.test.TestCase):
-  """Tests for the Cifar10 version of Resnet.
-  """
-
-  _num_validation_images = None
-
-  @classmethod
-  def setUpClass(cls):  # pylint: disable=invalid-name
-    super(BaseTest, cls).setUpClass()
-    tf.compat.v1.disable_eager_execution()
-    cifar10_main.define_cifar_flags()
-
-  def setUp(self):
-    super(BaseTest, self).setUp()
-    self._num_validation_images = cifar10_main.NUM_IMAGES['validation']
-    cifar10_main.NUM_IMAGES['validation'] = 4
-
-  def tearDown(self):
-    super(BaseTest, self).tearDown()
-    tf.io.gfile.rmtree(self.get_temp_dir())
-    cifar10_main.NUM_IMAGES['validation'] = self._num_validation_images
-
-  def test_dataset_input_fn(self):
-    fake_data = bytearray()
-    fake_data.append(7)
-    for i in range(_NUM_CHANNELS):
-      for _ in range(_HEIGHT * _WIDTH):
-        fake_data.append(i)
-
-    _, filename = mkstemp(dir=self.get_temp_dir())
-    data_file = open(filename, 'wb')
-    data_file.write(fake_data)
-    data_file.close()
-
-    fake_dataset = tf.data.FixedLengthRecordDataset(
-        filename, cifar10_main._RECORD_BYTES)  # pylint: disable=protected-access
-    fake_dataset = fake_dataset.map(
-        lambda val: cifar10_main.parse_record(val, False, tf.float32))
-    image, label = tf.compat.v1.data.make_one_shot_iterator(
-        fake_dataset).get_next()
-
-    self.assertAllEqual(label.shape, ())
-    self.assertAllEqual(image.shape, (_HEIGHT, _WIDTH, _NUM_CHANNELS))
-
-    with self.session() as sess:
-      image, label = sess.run([image, label])
-
-      self.assertEqual(label, 7)
-
-      for row in image:
-        for pixel in row:
-          self.assertAllClose(pixel, np.array([-1.225, 0., 1.225]), rtol=1e-3)
-
-  def cifar10_model_fn_helper(self, mode, resnet_version, dtype):
-    input_fn = cifar10_main.get_synth_input_fn(dtype)
-    dataset = input_fn(True, '', _BATCH_SIZE)
-    iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
-    features, labels = iterator.get_next()
-    spec = cifar10_main.cifar10_model_fn(
-        features, labels, mode, {
-            'dtype': dtype,
-            'resnet_size': 32,
-            'data_format': 'channels_last',
-            'batch_size': _BATCH_SIZE,
-            'resnet_version': resnet_version,
-            'loss_scale': 128 if dtype == tf.float16 else 1,
-            'fine_tune': False,
-        })
-
-    predictions = spec.predictions
-    self.assertAllEqual(predictions['probabilities'].shape,
-                        (_BATCH_SIZE, 10))
-    self.assertEqual(predictions['probabilities'].dtype, tf.float32)
-    self.assertAllEqual(predictions['classes'].shape, (_BATCH_SIZE,))
-    self.assertEqual(predictions['classes'].dtype, tf.int64)
-
-    if mode != tf.estimator.ModeKeys.PREDICT:
-      loss = spec.loss
-      self.assertAllEqual(loss.shape, ())
-      self.assertEqual(loss.dtype, tf.float32)
-
-    if mode == tf.estimator.ModeKeys.EVAL:
-      eval_metric_ops = spec.eval_metric_ops
-      self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ())
-      self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ())
-      self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32)
-      self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32)
-
-  def test_cifar10_model_fn_train_mode_v1(self):
-    self.cifar10_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=1,
-                                 dtype=tf.float32)
-
-  def test_cifar10_model_fn_trainmode__v2(self):
-    self.cifar10_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=2,
-                                 dtype=tf.float32)
-
-  def test_cifar10_model_fn_eval_mode_v1(self):
-    self.cifar10_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=1,
-                                 dtype=tf.float32)
-
-  def test_cifar10_model_fn_eval_mode_v2(self):
-    self.cifar10_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=2,
-                                 dtype=tf.float32)
-
-  def test_cifar10_model_fn_predict_mode_v1(self):
-    self.cifar10_model_fn_helper(tf.estimator.ModeKeys.PREDICT,
-                                 resnet_version=1, dtype=tf.float32)
-
-  def test_cifar10_model_fn_predict_mode_v2(self):
-    self.cifar10_model_fn_helper(tf.estimator.ModeKeys.PREDICT,
-                                 resnet_version=2, dtype=tf.float32)
-
-  def _test_cifar10model_shape(self, resnet_version):
-    batch_size = 135
-    num_classes = 246
-
-    model = cifar10_main.Cifar10Model(32, data_format='channels_last',
-                                      num_classes=num_classes,
-                                      resnet_version=resnet_version)
-    fake_input = tf.random.uniform([batch_size, _HEIGHT, _WIDTH, _NUM_CHANNELS])
-    output = model(fake_input, training=True)
-
-    self.assertAllEqual(output.shape, (batch_size, num_classes))
-
-  def test_cifar10model_shape_v1(self):
-    self._test_cifar10model_shape(resnet_version=1)
-
-  def test_cifar10model_shape_v2(self):
-    self._test_cifar10model_shape(resnet_version=2)
-
-  def test_cifar10_end_to_end_synthetic_v1(self):
-    integration.run_synthetic(
-        main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(),
-        extra_flags=['-resnet_version', '1', '-batch_size', '4',
-                     '--max_train_steps', '1']
-    )
-
-  def test_cifar10_end_to_end_synthetic_v2(self):
-    integration.run_synthetic(
-        main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(),
-        extra_flags=['-resnet_version', '2', '-batch_size', '4',
-                     '--max_train_steps', '1']
-    )
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/r1/resnet/estimator_benchmark.py
+++ b/official/r1/resnet/estimator_benchmark.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Executes Estimator benchmarks and accuracy tests."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import time
-
-from absl import flags
-from absl import logging
-from absl.testing import flagsaver
-import tensorflow as tf
-
-from official.r1.resnet import cifar10_main as cifar_main
-from official.r1.resnet import imagenet_main
-from official.r1.utils.logs import hooks
-from official.utils.flags import core as flags_core
-
-IMAGENET_DATA_DIR_NAME = 'imagenet'
-CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
-FLAGS = flags.FLAGS
-
-
-class EstimatorBenchmark(tf.test.Benchmark):
-  """Base class to hold methods common to test classes in the module.
-
-     Code under test for Estimator models (ResNet50 and 56) report mostly the
-     same data and require the same FLAG setup.
-  """
-
-  local_flags = None
-
-  def __init__(self, output_dir=None, default_flags=None, flag_methods=None):
-    if not output_dir:
-      output_dir = '/tmp'
-    self.output_dir = output_dir
-    self.default_flags = default_flags or {}
-    self.flag_methods = flag_methods or {}
-
-  def _get_model_dir(self, folder_name):
-    """Returns directory to store info, e.g. saved model and event log."""
-    return os.path.join(self.output_dir, folder_name)
-
-  def _setup(self):
-    """Sets up and resets flags before each test."""
-    logging.set_verbosity(logging.INFO)
-    if EstimatorBenchmark.local_flags is None:
-      for flag_method in self.flag_methods:
-        flag_method()
-      # Loads flags to get defaults to then override. List cannot be empty.
-      flags.FLAGS(['foo'])
-      # Overrides flag values with defaults for the class of tests.
-      for k, v in self.default_flags.items():
-        setattr(FLAGS, k, v)
-      saved_flag_values = flagsaver.save_flag_values()
-      EstimatorBenchmark.local_flags = saved_flag_values
-    else:
-      flagsaver.restore_flag_values(EstimatorBenchmark.local_flags)
-
-  def _report_benchmark(self,
-                        stats,
-                        wall_time_sec,
-                        top_1_max=None,
-                        top_1_min=None):
-    """Report benchmark results by writing to local protobuf file.
-
-    Args:
-      stats: dict returned from estimator models with known entries.
-      wall_time_sec: the during of the benchmark execution in seconds
-      top_1_max: highest passing level for top_1 accuracy.
-      top_1_min: lowest passing level for top_1 accuracy.
-    """
-
-    examples_per_sec_hook = None
-    for hook in stats['train_hooks']:
-      if isinstance(hook, hooks.ExamplesPerSecondHook):
-        examples_per_sec_hook = hook
-        break
-
-    eval_results = stats['eval_results']
-    metrics = []
-    if 'accuracy' in eval_results:
-      metrics.append({'name': 'accuracy_top_1',
-                      'value': float(eval_results['accuracy']),
-                      'min_value': top_1_min,
-                      'max_value': top_1_max})
-    if 'accuracy_top_5' in eval_results:
-      metrics.append({'name': 'accuracy_top_5',
-                      'value': float(eval_results['accuracy_top_5'])})
-
-    if examples_per_sec_hook:
-      exp_per_second_list = examples_per_sec_hook.current_examples_per_sec_list
-      # ExamplesPerSecondHook skips the first 10 steps.
-      exp_per_sec = sum(exp_per_second_list) / (len(exp_per_second_list))
-      metrics.append({'name': 'exp_per_second',
-                      'value': exp_per_sec})
-    flags_str = flags_core.get_nondefault_flags_as_str()
-    self.report_benchmark(
-        iters=eval_results.get('global_step', None),
-        wall_time=wall_time_sec,
-        metrics=metrics,
-        extras={'flags': flags_str})
-
-
-class Resnet50EstimatorAccuracy(EstimatorBenchmark):
-  """Benchmark accuracy tests for ResNet50 w/ Estimator."""
-
-  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
-    """Benchmark accuracy tests for ResNet50 w/ Estimator.
-
-    Args:
-      output_dir: directory where to output e.g. log files
-      root_data_dir: directory under which to look for dataset
-      **kwargs: arbitrary named arguments. This is needed to make the
-                constructor forward compatible in case PerfZero provides more
-                named arguments before updating the constructor.
-    """
-    flag_methods = [imagenet_main.define_imagenet_flags]
-
-    self.data_dir = os.path.join(root_data_dir, IMAGENET_DATA_DIR_NAME)
-    super(Resnet50EstimatorAccuracy, self).__init__(
-        output_dir=output_dir, flag_methods=flag_methods)
-
-  def benchmark_graph_8_gpu(self):
-    """Test 8 GPUs graph mode."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 128 * 8
-    FLAGS.train_epochs = 90
-    FLAGS.epochs_between_evals = 10
-    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
-    FLAGS.dtype = 'fp32'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_8_gpu(self):
-    """Test FP16 8 GPUs graph mode."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 256 * 8
-    FLAGS.train_epochs = 90
-    FLAGS.epochs_between_evals = 10
-    FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu')
-    FLAGS.dtype = 'fp16'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_graph_rewrite_8_gpu(self):
-    """Test FP16 graph rewrite 8 GPUs graph mode."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.data_dir = self.data_dir
-    FLAGS.batch_size = 256 * 8
-    FLAGS.train_epochs = 90
-    FLAGS.epochs_between_evals = 10
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_graph_fp16_graph_rewrite_8_gpu')
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def _run_and_report_benchmark(self):
-    start_time_sec = time.time()
-    stats = imagenet_main.run_imagenet(flags.FLAGS)
-    wall_time_sec = time.time() - start_time_sec
-    self._report_benchmark(stats,
-                           wall_time_sec,
-                           top_1_min=0.762,
-                           top_1_max=0.766)
-
-
-class Resnet50EstimatorBenchmarkBase(EstimatorBenchmark):
-  """Base class for benchmarks for ResNet50 using Estimator."""
-  local_flags = None
-
-  def __init__(self, output_dir=None, default_flags=None):
-    flag_methods = [imagenet_main.define_imagenet_flags]
-
-    super(Resnet50EstimatorBenchmarkBase, self).__init__(
-        output_dir=output_dir,
-        default_flags=default_flags,
-        flag_methods=flag_methods)
-
-  def _run_and_report_benchmark(self):
-    start_time_sec = time.time()
-    stats = imagenet_main.run_imagenet(FLAGS)
-    wall_time_sec = time.time() - start_time_sec
-    print(stats)
-    # Remove values to skip triggering accuracy check.
-    stats['eval_results'].pop('accuracy', None)
-    stats['eval_results'].pop('accuracy_top_5', None)
-
-    self._report_benchmark(stats, wall_time_sec)
-
-
-class Resnet50EstimatorBenchmark(Resnet50EstimatorBenchmarkBase):
-  """Benchmarks for ResNet50 using Estimator with 1 worker."""
-
-  def __init__(self, output_dir=None, default_flags=None):
-    super(Resnet50EstimatorBenchmark, self).__init__(
-        output_dir=output_dir,
-        default_flags=default_flags)
-
-  def benchmark_graph_fp16_1_gpu(self):
-    """Benchmarks graph fp16 1 gpu."""
-    self._setup()
-
-    FLAGS.num_gpus = 1
-    FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu')
-    FLAGS.batch_size = 128
-    FLAGS.dtype = 'fp16'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_1_gpu_tweaked(self):
-    """Benchmarks graph fp16 1 gpu tweaked."""
-    self._setup()
-
-    FLAGS.num_gpus = 1
-    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    FLAGS.intra_op_parallelism_threads = 1
-    FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu_tweaked')
-    FLAGS.batch_size = 256
-    FLAGS.dtype = 'fp16'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_graph_rewrite_1_gpu_tweaked(self):
-    """Benchmarks graph fp16 graph rewrite 1 gpu tweaked."""
-    self._setup()
-
-    FLAGS.num_gpus = 1
-    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    FLAGS.intra_op_parallelism_threads = 1
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_graph_fp16_graph_rewrite_1_gpu_tweaked')
-    FLAGS.batch_size = 256
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_1_gpu(self):
-    """Benchmarks graph 1 gpu."""
-    self._setup()
-
-    FLAGS.num_gpus = 1
-    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
-    FLAGS.batch_size = 128
-    FLAGS.dtype = 'fp32'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_8_gpu(self):
-    """Benchmarks graph 8 gpus."""
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
-    FLAGS.batch_size = 128*8
-    FLAGS.dtype = 'fp32'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_8_gpu(self):
-    """Benchmarks graph fp16 8 gpus."""
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu')
-    FLAGS.batch_size = 256*8
-    FLAGS.dtype = 'fp16'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_8_gpu_tweaked(self):
-    """Benchmarks graph fp16 8 gpus tweaked."""
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    FLAGS.intra_op_parallelism_threads = 1
-    FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_8_gpu_tweaked')
-    FLAGS.batch_size = 256*8
-    FLAGS.dtype = 'fp16'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_graph_rewrite_8_gpu_tweaked(self):
-    """Benchmarks graph fp16 graph rewrite 8 gpus tweaked."""
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    FLAGS.intra_op_parallelism_threads = 1
-    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_graph_fp16_graph_rewrite_8_gpu_tweaked')
-    FLAGS.batch_size = 256*8
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-
-class Resnet50EstimatorBenchmarkSynth(Resnet50EstimatorBenchmark):
-  """Resnet50 synthetic benchmark tests."""
-
-  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
-    def_flags = {}
-    def_flags['use_synthetic_data'] = True
-    def_flags['max_train_steps'] = 110
-    def_flags['train_epochs'] = 1
-
-    super(Resnet50EstimatorBenchmarkSynth, self).__init__(
-        output_dir=output_dir, default_flags=def_flags)
-
-
-class Resnet50EstimatorBenchmarkReal(Resnet50EstimatorBenchmark):
-  """Resnet50 real data benchmark tests."""
-
-  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
-    def_flags = {}
-    def_flags['data_dir'] = os.path.join(root_data_dir, IMAGENET_DATA_DIR_NAME)
-    def_flags['max_train_steps'] = 110
-    def_flags['train_epochs'] = 1
-
-    super(Resnet50EstimatorBenchmarkReal, self).__init__(
-        output_dir=output_dir, default_flags=def_flags)
-
-
-class Resnet50MultiWorkerEstimatorBenchmark(Resnet50EstimatorBenchmarkBase):
-  """Benchmarks for ResNet50 using Estimator with multiple workers."""
-
-  def __init__(self, output_dir=None, default_flags=None):
-    super(Resnet50MultiWorkerEstimatorBenchmark, self).__init__(
-        output_dir=output_dir,
-        default_flags=default_flags)
-
-  def benchmark_graph_fp16_8_gpu_ring_tweaked(self):
-    """Benchmarks graph fp16 8 gpus with ring collective tweaked."""
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.distribution_strategy = 'multi_worker_mirrored'
-    FLAGS.all_reduce_alg = 'ring'
-    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    FLAGS.intra_op_parallelism_threads = 1
-    FLAGS.datasets_num_private_threads = 32
-    FLAGS.model_dir = self._get_model_dir(
-        folder_name='benchmark_graph_fp16_8_gpu_ring_tweaked')
-    FLAGS.batch_size = 256*8
-    FLAGS.dtype = 'fp16'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_8_gpu_nccl_tweaked(self):
-    """Benchmarks graph fp16 8 gpus with nccl collective tweaked."""
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.distribution_strategy = 'multi_worker_mirrored'
-    FLAGS.all_reduce_alg = 'nccl'
-    FLAGS.tf_gpu_thread_mode = 'gpu_private'
-    FLAGS.intra_op_parallelism_threads = 1
-    FLAGS.datasets_num_private_threads = 32
-    FLAGS.model_dir = self._get_model_dir(
-        folder_name='benchmark_graph_fp16_8_gpu_nccl_tweaked')
-    FLAGS.batch_size = 256*8
-    FLAGS.dtype = 'fp16'
-    FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-
-class Resnet50MultiWorkerEstimatorBenchmarkSynth(
-    Resnet50MultiWorkerEstimatorBenchmark):
-  """ResNet50, multi-worker, Estimator, synthetic data."""
-
-  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
-    def_flags = {}
-    def_flags['use_synthetic_data'] = True
-    def_flags['max_train_steps'] = 110
-    def_flags['train_epochs'] = 1
-
-    super(Resnet50MultiWorkerEstimatorBenchmarkSynth, self).__init__(
-        output_dir=output_dir, default_flags=def_flags)
-
-
-class Resnet56EstimatorAccuracy(EstimatorBenchmark):
-  """Accuracy tests for Estimator ResNet56."""
-
-  local_flags = None
-
-  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
-    """A benchmark class.
-
-    Args:
-      output_dir: directory where to output e.g. log files
-      root_data_dir: directory under which to look for dataset
-      **kwargs: arbitrary named arguments. This is needed to make the
-                constructor forward compatible in case PerfZero provides more
-                named arguments before updating the constructor.
-    """
-    flag_methods = [cifar_main.define_cifar_flags]
-
-    self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
-    super(Resnet56EstimatorAccuracy, self).__init__(
-        output_dir=output_dir, flag_methods=flag_methods)
-
-  def benchmark_graph_1_gpu(self):
-    """Test layers model with Estimator and distribution strategies."""
-    self._setup()
-    flags.FLAGS.num_gpus = 1
-    flags.FLAGS.data_dir = self.data_dir
-    flags.FLAGS.batch_size = 128
-    flags.FLAGS.train_epochs = 182
-    flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
-    flags.FLAGS.resnet_size = 56
-    flags.FLAGS.dtype = 'fp32'
-    flags.FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_1_gpu(self):
-    """Test layers FP16 model with Estimator and distribution strategies."""
-    self._setup()
-    flags.FLAGS.num_gpus = 1
-    flags.FLAGS.data_dir = self.data_dir
-    flags.FLAGS.batch_size = 128
-    flags.FLAGS.train_epochs = 182
-    flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_1_gpu')
-    flags.FLAGS.resnet_size = 56
-    flags.FLAGS.dtype = 'fp16'
-    flags.FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_2_gpu(self):
-    """Test layers model with Estimator and dist_strat. 2 GPUs."""
-    self._setup()
-    flags.FLAGS.num_gpus = 2
-    flags.FLAGS.data_dir = self.data_dir
-    flags.FLAGS.batch_size = 128
-    flags.FLAGS.train_epochs = 182
-    flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
-    flags.FLAGS.resnet_size = 56
-    flags.FLAGS.dtype = 'fp32'
-    flags.FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_fp16_2_gpu(self):
-    """Test layers FP16 model with Estimator and dist_strat. 2 GPUs."""
-    self._setup()
-    flags.FLAGS.num_gpus = 2
-    flags.FLAGS.data_dir = self.data_dir
-    flags.FLAGS.batch_size = 128
-    flags.FLAGS.train_epochs = 182
-    flags.FLAGS.model_dir = self._get_model_dir('benchmark_graph_fp16_2_gpu')
-    flags.FLAGS.resnet_size = 56
-    flags.FLAGS.dtype = 'fp16'
-    flags.FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def unit_test(self):
-    """A lightweight test that can finish quickly."""
-    self._setup()
-    flags.FLAGS.num_gpus = 1
-    flags.FLAGS.data_dir = self.data_dir
-    flags.FLAGS.batch_size = 128
-    flags.FLAGS.train_epochs = 1
-    flags.FLAGS.model_dir = self._get_model_dir('unit_test')
-    flags.FLAGS.resnet_size = 8
-    flags.FLAGS.dtype = 'fp32'
-    flags.FLAGS.hooks = ['ExamplesPerSecondHook']
-    self._run_and_report_benchmark()
-
-  def _run_and_report_benchmark(self):
-    """Executes benchmark and reports result."""
-    start_time_sec = time.time()
-    stats = cifar_main.run_cifar(flags.FLAGS)
-    wall_time_sec = time.time() - start_time_sec
-
-    self._report_benchmark(stats,
-                           wall_time_sec,
-                           top_1_min=0.926,
-                           top_1_max=0.938)
--- a/official/r1/resnet/imagenet_main.py
+++ b/official/r1/resnet/imagenet_main.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Runs a ResNet model on the ImageNet dataset."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-
-from absl import app as absl_app
-from absl import flags
-from absl import logging
-from six.moves import range
-import tensorflow as tf
-
-from official.r1.resnet import imagenet_preprocessing
-from official.r1.resnet import resnet_model
-from official.r1.resnet import resnet_run_loop
-from official.r1.utils.logs import logger
-from official.utils.flags import core as flags_core
-
-DEFAULT_IMAGE_SIZE = 224
-NUM_CHANNELS = 3
-NUM_CLASSES = 1001
-
-NUM_IMAGES = {
-    'train': 1281167,
-    'validation': 50000,
-}
-
-_NUM_TRAIN_FILES = 1024
-_SHUFFLE_BUFFER = 10000
-
-DATASET_NAME = 'ImageNet'
-
-###############################################################################
-# Data processing
-###############################################################################
-def get_filenames(is_training, data_dir):
-  """Return filenames for dataset."""
-  if is_training:
-    return [
-        os.path.join(data_dir, 'train-%05d-of-01024' % i)
-        for i in range(_NUM_TRAIN_FILES)]
-  else:
-    return [
-        os.path.join(data_dir, 'validation-%05d-of-00128' % i)
-        for i in range(128)]
-
-
-def _parse_example_proto(example_serialized):
-  """Parses an Example proto containing a training example of an image.
-
-  The output of the build_image_data.py image preprocessing script is a dataset
-  containing serialized Example protocol buffers. Each Example proto contains
-  the following fields (values are included as examples):
-
-    image/height: 462
-    image/width: 581
-    image/colorspace: 'RGB'
-    image/channels: 3
-    image/class/label: 615
-    image/class/synset: 'n03623198'
-    image/class/text: 'knee pad'
-    image/object/bbox/xmin: 0.1
-    image/object/bbox/xmax: 0.9
-    image/object/bbox/ymin: 0.2
-    image/object/bbox/ymax: 0.6
-    image/object/bbox/label: 615
-    image/format: 'JPEG'
-    image/filename: 'ILSVRC2012_val_00041207.JPEG'
-    image/encoded: <JPEG encoded string>
-
-  Args:
-    example_serialized: scalar Tensor tf.string containing a serialized
-      Example protocol buffer.
-
-  Returns:
-    image_buffer: Tensor tf.string containing the contents of a JPEG file.
-    label: Tensor tf.int32 containing the label.
-    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
-      where each coordinate is [0, 1) and the coordinates are arranged as
-      [ymin, xmin, ymax, xmax].
-  """
-  # Dense features in Example proto.
-  feature_map = {
-      'image/encoded': tf.io.FixedLenFeature([], dtype=tf.string,
-                                             default_value=''),
-      'image/class/label': tf.io.FixedLenFeature([], dtype=tf.int64,
-                                                 default_value=-1),
-      'image/class/text': tf.io.FixedLenFeature([], dtype=tf.string,
-                                                default_value=''),
-  }
-  sparse_float32 = tf.io.VarLenFeature(dtype=tf.float32)
-  # Sparse features in Example proto.
-  feature_map.update(
-      {k: sparse_float32 for k in ['image/object/bbox/xmin',
-                                   'image/object/bbox/ymin',
-                                   'image/object/bbox/xmax',
-                                   'image/object/bbox/ymax']})
-
-  features = tf.io.parse_single_example(serialized=example_serialized,
-                                        features=feature_map)
-  label = tf.cast(features['image/class/label'], dtype=tf.int32)
-
-  xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
-  ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
-  xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
-  ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
-
-  # Note that we impose an ordering of (y, x) just to make life difficult.
-  bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
-
-  # Force the variable number of bounding boxes into the shape
-  # [1, num_boxes, coords].
-  bbox = tf.expand_dims(bbox, 0)
-  bbox = tf.transpose(a=bbox, perm=[0, 2, 1])
-
-  return features['image/encoded'], label, bbox
-
-
-def parse_record(raw_record, is_training, dtype):
-  """Parses a record containing a training example of an image.
-
-  The input record is parsed into a label and image, and the image is passed
-  through preprocessing steps (cropping, flipping, and so on).
-
-  Args:
-    raw_record: scalar Tensor tf.string containing a serialized
-      Example protocol buffer.
-    is_training: A boolean denoting whether the input is for training.
-    dtype: data type to use for images/features.
-
-  Returns:
-    Tuple with processed image tensor and one-hot-encoded label tensor.
-  """
-  image_buffer, label, bbox = _parse_example_proto(raw_record)
-
-  image = imagenet_preprocessing.preprocess_image(
-      image_buffer=image_buffer,
-      bbox=bbox,
-      output_height=DEFAULT_IMAGE_SIZE,
-      output_width=DEFAULT_IMAGE_SIZE,
-      num_channels=NUM_CHANNELS,
-      is_training=is_training)
-  image = tf.cast(image, dtype)
-
-  return image, label
-
-
-def input_fn(is_training,
-             data_dir,
-             batch_size,
-             num_epochs=1,
-             dtype=tf.float32,
-             datasets_num_private_threads=None,
-             parse_record_fn=parse_record,
-             input_context=None,
-             drop_remainder=False,
-             tf_data_experimental_slack=False):
-  """Input function which provides batches for train or eval.
-
-  Args:
-    is_training: A boolean denoting whether the input is for training.
-    data_dir: The directory containing the input data.
-    batch_size: The number of samples per batch.
-    num_epochs: The number of epochs to repeat the dataset.
-    dtype: Data type to use for images/features
-    datasets_num_private_threads: Number of private threads for tf.data.
-    parse_record_fn: Function to use for parsing the records.
-    input_context: A `tf.distribute.InputContext` object passed in by
-      `tf.distribute.Strategy`.
-    drop_remainder: A boolean indicates whether to drop the remainder of the
-      batches. If True, the batch dimension will be static.
-    tf_data_experimental_slack: Whether to enable tf.data's
-      `experimental_slack` option.
-
-  Returns:
-    A dataset that can be used for iteration.
-  """
-  filenames = get_filenames(is_training, data_dir)
-  dataset = tf.data.Dataset.from_tensor_slices(filenames)
-
-  if input_context:
-    logging.info(
-        'Sharding the dataset: input_pipeline_id=%d num_input_pipelines=%d',
-        input_context.input_pipeline_id, input_context.num_input_pipelines)
-    dataset = dataset.shard(input_context.num_input_pipelines,
-                            input_context.input_pipeline_id)
-
-  if is_training:
-    # Shuffle the input files
-    dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)
-
-  # Convert to individual records.
-  # cycle_length = 10 means that up to 10 files will be read and deserialized in
-  # parallel. You may want to increase this number if you have a large number of
-  # CPU cores.
-  dataset = dataset.interleave(
-      tf.data.TFRecordDataset,
-      cycle_length=10,
-      num_parallel_calls=tf.data.experimental.AUTOTUNE)
-
-  return resnet_run_loop.process_record_dataset(
-      dataset=dataset,
-      is_training=is_training,
-      batch_size=batch_size,
-      shuffle_buffer=_SHUFFLE_BUFFER,
-      parse_record_fn=parse_record_fn,
-      num_epochs=num_epochs,
-      dtype=dtype,
-      datasets_num_private_threads=datasets_num_private_threads,
-      drop_remainder=drop_remainder,
-      tf_data_experimental_slack=tf_data_experimental_slack,
-  )
-
-
-def get_synth_input_fn(dtype):
-  return resnet_run_loop.get_synth_input_fn(
-      DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS, NUM_CLASSES,
-      dtype=dtype)
-
-
-###############################################################################
-# Running the model
-###############################################################################
-class ImagenetModel(resnet_model.Model):
-  """Model class with appropriate defaults for Imagenet data."""
-
-  def __init__(self, resnet_size, data_format=None, num_classes=NUM_CLASSES,
-               resnet_version=resnet_model.DEFAULT_VERSION,
-               dtype=resnet_model.DEFAULT_DTYPE):
-    """These are the parameters that work for Imagenet data.
-
-    Args:
-      resnet_size: The number of convolutional layers needed in the model.
-      data_format: Either 'channels_first' or 'channels_last', specifying which
-        data format to use when setting up the model.
-      num_classes: The number of output classes needed from the model. This
-        enables users to extend the same model to their own datasets.
-      resnet_version: Integer representing which version of the ResNet network
-        to use. See README for details. Valid values: [1, 2]
-      dtype: The TensorFlow dtype to use for calculations.
-    """
-
-    # For bigger models, we want to use "bottleneck" layers
-    if resnet_size < 50:
-      bottleneck = False
-    else:
-      bottleneck = True
-
-    super(ImagenetModel, self).__init__(
-        resnet_size=resnet_size,
-        bottleneck=bottleneck,
-        num_classes=num_classes,
-        num_filters=64,
-        kernel_size=7,
-        conv_stride=2,
-        first_pool_size=3,
-        first_pool_stride=2,
-        block_sizes=_get_block_sizes(resnet_size),
-        block_strides=[1, 2, 2, 2],
-        resnet_version=resnet_version,
-        data_format=data_format,
-        dtype=dtype
-    )
-
-
-def _get_block_sizes(resnet_size):
-  """Retrieve the size of each block_layer in the ResNet model.
-
-  The number of block layers used for the Resnet model varies according
-  to the size of the model. This helper grabs the layer set we want, throwing
-  an error if a non-standard size has been selected.
-
-  Args:
-    resnet_size: The number of convolutional layers needed in the model.
-
-  Returns:
-    A list of block sizes to use in building the model.
-
-  Raises:
-    KeyError: if invalid resnet_size is received.
-  """
-  choices = {
-      18: [2, 2, 2, 2],
-      34: [3, 4, 6, 3],
-      50: [3, 4, 6, 3],
-      101: [3, 4, 23, 3],
-      152: [3, 8, 36, 3],
-      200: [3, 24, 36, 3]
-  }
-
-  try:
-    return choices[resnet_size]
-  except KeyError:
-    err = ('Could not find layers for selected Resnet size.\n'
-           'Size received: {}; sizes allowed: {}.'.format(
-               resnet_size, list(choices.keys())))
-    raise ValueError(err)
-
-
-def imagenet_model_fn(features, labels, mode, params):
-  """Our model_fn for ResNet to be used with our Estimator."""
-
-  # Warmup and higher lr may not be valid for fine tuning with small batches
-  # and smaller numbers of training images.
-  if params['fine_tune']:
-    warmup = False
-    base_lr = .1
-  else:
-    warmup = True
-    base_lr = .128
-
-  learning_rate_fn = resnet_run_loop.learning_rate_with_decay(
-      batch_size=params['batch_size'] * params.get('num_workers', 1),
-      batch_denom=256, num_images=NUM_IMAGES['train'],
-      boundary_epochs=[30, 60, 80, 90], decay_rates=[1, 0.1, 0.01, 0.001, 1e-4],
-      warmup=warmup, base_lr=base_lr)
-
-  return resnet_run_loop.resnet_model_fn(
-      features=features,
-      labels=labels,
-      mode=mode,
-      model_class=ImagenetModel,
-      resnet_size=params['resnet_size'],
-      weight_decay=flags.FLAGS.weight_decay,
-      learning_rate_fn=learning_rate_fn,
-      momentum=0.9,
-      data_format=params['data_format'],
-      resnet_version=params['resnet_version'],
-      loss_scale=params['loss_scale'],
-      loss_filter_fn=None,
-      dtype=params['dtype'],
-      fine_tune=params['fine_tune'],
-      label_smoothing=flags.FLAGS.label_smoothing
-  )
-
-
-def define_imagenet_flags():
-  resnet_run_loop.define_resnet_flags(
-      resnet_size_choices=['18', '34', '50', '101', '152', '200'],
-      dynamic_loss_scale=True,
-      fp16_implementation=True)
-  flags.adopt_module_key_flags(resnet_run_loop)
-  flags_core.set_defaults(train_epochs=90)
-
-
-def run_imagenet(flags_obj):
-  """Run ResNet ImageNet training and eval loop.
-
-  Args:
-    flags_obj: An object containing parsed flag values.
-
-  Returns:
-    Dict of results of the run.  Contains the keys `eval_results` and
-      `train_hooks`. `eval_results` contains accuracy (top_1) and
-      accuracy_top_5. `train_hooks` is a list the instances of hooks used during
-      training.
-  """
-  input_function = (flags_obj.use_synthetic_data and
-                    get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
-                    input_fn)
-
-  result = resnet_run_loop.resnet_main(
-      flags_obj, imagenet_model_fn, input_function, DATASET_NAME,
-      shape=[DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS])
-
-  return result
-
-
-def main(_):
-  with logger.benchmark_context(flags.FLAGS):
-    run_imagenet(flags.FLAGS)
-
-
-if __name__ == '__main__':
-  logging.set_verbosity(logging.INFO)
-  define_imagenet_flags()
-  absl_app.run(main)
--- a/official/r1/resnet/imagenet_preprocessing.py
+++ b/official/r1/resnet/imagenet_preprocessing.py
-# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Provides utilities to preprocess images.
-
-Training images are sampled using the provided bounding boxes, and subsequently
-cropped to the sampled bounding box. Images are additionally flipped randomly,
-then resized to the target output size (without aspect-ratio preservation).
-
-Images used during evaluation are resized (with aspect-ratio preservation) and
-centrally cropped.
-
-All images undergo mean color subtraction.
-
-Note that these steps are colloquially referred to as "ResNet preprocessing,"
-and they differ from "VGG preprocessing," which does not use bounding boxes
-and instead does an aspect-preserving resize followed by random crop during
-training. (These both differ from "Inception preprocessing," which introduces
-color distortion steps.)
-
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-_R_MEAN = 123.68
-_G_MEAN = 116.78
-_B_MEAN = 103.94
-_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
-
-# The lower bound for the smallest side of the image for aspect-preserving
-# resizing. For example, if an image is 500 x 1000, it will be resized to
-# _RESIZE_MIN x (_RESIZE_MIN * 2).
-_RESIZE_MIN = 256
-
-
-def _decode_crop_and_flip(image_buffer, bbox, num_channels):
-  """Crops the given image to a random part of the image, and randomly flips.
-
-  We use the fused decode_and_crop op, which performs better than the two ops
-  used separately in series, but note that this requires that the image be
-  passed in as an un-decoded string Tensor.
-
-  Args:
-    image_buffer: scalar string Tensor representing the raw JPEG image buffer.
-    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
-      where each coordinate is [0, 1) and the coordinates are arranged as
-      [ymin, xmin, ymax, xmax].
-    num_channels: Integer depth of the image buffer for decoding.
-
-  Returns:
-    3-D tensor with cropped image.
-
-  """
-  # A large fraction of image datasets contain a human-annotated bounding box
-  # delineating the region of the image containing the object of interest.  We
-  # choose to create a new bounding box for the object which is a randomly
-  # distorted version of the human-annotated bounding box that obeys an
-  # allowed range of aspect ratios, sizes and overlap with the human-annotated
-  # bounding box. If no box is supplied, then we assume the bounding box is
-  # the entire image.
-  sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
-      tf.image.extract_jpeg_shape(image_buffer),
-      bounding_boxes=bbox,
-      min_object_covered=0.1,
-      aspect_ratio_range=[0.75, 1.33],
-      area_range=[0.05, 1.0],
-      max_attempts=100,
-      use_image_if_no_bounding_boxes=True)
-  bbox_begin, bbox_size, _ = sample_distorted_bounding_box
-
-  # Reassemble the bounding box in the format the crop op requires.
-  offset_y, offset_x, _ = tf.unstack(bbox_begin)
-  target_height, target_width, _ = tf.unstack(bbox_size)
-  crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
-
-  # Use the fused decode and crop op here, which is faster than each in series.
-  cropped = tf.image.decode_and_crop_jpeg(
-      image_buffer, crop_window, channels=num_channels)
-
-  # Flip to add a little more random distortion in.
-  cropped = tf.image.random_flip_left_right(cropped)
-  return cropped
-
-
-def _central_crop(image, crop_height, crop_width):
-  """Performs central crops of the given image list.
-
-  Args:
-    image: a 3-D image tensor
-    crop_height: the height of the image following the crop.
-    crop_width: the width of the image following the crop.
-
-  Returns:
-    3-D tensor with cropped image.
-  """
-  shape = tf.shape(input=image)
-  height, width = shape[0], shape[1]
-
-  amount_to_be_cropped_h = (height - crop_height)
-  crop_top = amount_to_be_cropped_h // 2
-  amount_to_be_cropped_w = (width - crop_width)
-  crop_left = amount_to_be_cropped_w // 2
-  return tf.slice(
-      image, [crop_top, crop_left, 0], [crop_height, crop_width, -1])
-
-
-def _mean_image_subtraction(image, means, num_channels):
-  """Subtracts the given means from each image channel.
-
-  For example:
-    means = [123.68, 116.779, 103.939]
-    image = _mean_image_subtraction(image, means)
-
-  Note that the rank of `image` must be known.
-
-  Args:
-    image: a tensor of size [height, width, C].
-    means: a C-vector of values to subtract from each channel.
-    num_channels: number of color channels in the image that will be distorted.
-
-  Returns:
-    the centered image.
-
-  Raises:
-    ValueError: If the rank of `image` is unknown, if `image` has a rank other
-      than three or if the number of channels in `image` doesn't match the
-      number of values in `means`.
-  """
-  if image.get_shape().ndims != 3:
-    raise ValueError('Input must be of size [height, width, C>0]')
-
-  if len(means) != num_channels:
-    raise ValueError('len(means) must match the number of channels')
-
-  # We have a 1-D tensor of means; convert to 3-D.
-  # Note(b/130245863): we explicitly call `broadcast` instead of simply
-  # expanding dimensions for better performance.
-  means = tf.broadcast_to(means, tf.shape(image))
-
-  return image - means
-
-
-def _smallest_size_at_least(height, width, resize_min):
-  """Computes new shape with the smallest side equal to `smallest_side`.
-
-  Computes new shape with the smallest side equal to `smallest_side` while
-  preserving the original aspect ratio.
-
-  Args:
-    height: an int32 scalar tensor indicating the current height.
-    width: an int32 scalar tensor indicating the current width.
-    resize_min: A python integer or scalar `Tensor` indicating the size of
-      the smallest side after resize.
-
-  Returns:
-    new_height: an int32 scalar tensor indicating the new height.
-    new_width: an int32 scalar tensor indicating the new width.
-  """
-  resize_min = tf.cast(resize_min, tf.float32)
-
-  # Convert to floats to make subsequent calculations go smoothly.
-  height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32)
-
-  smaller_dim = tf.minimum(height, width)
-  scale_ratio = resize_min / smaller_dim
-
-  # Convert back to ints to make heights and widths that TF ops will accept.
-  new_height = tf.cast(height * scale_ratio, tf.int32)
-  new_width = tf.cast(width * scale_ratio, tf.int32)
-
-  return new_height, new_width
-
-
-def _aspect_preserving_resize(image, resize_min):
-  """Resize images preserving the original aspect ratio.
-
-  Args:
-    image: A 3-D image `Tensor`.
-    resize_min: A python integer or scalar `Tensor` indicating the size of
-      the smallest side after resize.
-
-  Returns:
-    resized_image: A 3-D tensor containing the resized image.
-  """
-  shape = tf.shape(input=image)
-  height, width = shape[0], shape[1]
-
-  new_height, new_width = _smallest_size_at_least(height, width, resize_min)
-
-  return _resize_image(image, new_height, new_width)
-
-
-def _resize_image(image, height, width):
-  """Simple wrapper around tf.resize_images.
-
-  This is primarily to make sure we use the same `ResizeMethod` and other
-  details each time.
-
-  Args:
-    image: A 3-D image `Tensor`.
-    height: The target height for the resized image.
-    width: The target width for the resized image.
-
-  Returns:
-    resized_image: A 3-D tensor containing the resized image. The first two
-      dimensions have the shape [height, width].
-  """
-  return tf.compat.v1.image.resize(
-      image, [height, width], method=tf.image.ResizeMethod.BILINEAR,
-      align_corners=False)
-
-
-def preprocess_image(image_buffer, bbox, output_height, output_width,
-                     num_channels, is_training=False):
-  """Preprocesses the given image.
-
-  Preprocessing includes decoding, cropping, and resizing for both training
-  and eval images. Training preprocessing, however, introduces some random
-  distortion of the image to improve accuracy.
-
-  Args:
-    image_buffer: scalar string Tensor representing the raw JPEG image buffer.
-    bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
-      where each coordinate is [0, 1) and the coordinates are arranged as
-      [ymin, xmin, ymax, xmax].
-    output_height: The height of the image after preprocessing.
-    output_width: The width of the image after preprocessing.
-    num_channels: Integer depth of the image buffer for decoding.
-    is_training: `True` if we're preprocessing the image for training and
-      `False` otherwise.
-
-  Returns:
-    A preprocessed image.
-  """
-  if is_training:
-    # For training, we want to randomize some of the distortions.
-    image = _decode_crop_and_flip(image_buffer, bbox, num_channels)
-    image = _resize_image(image, output_height, output_width)
-  else:
-    # For validation, we want to decode, resize, then just crop the middle.
-    image = tf.image.decode_jpeg(image_buffer, channels=num_channels)
-    image = _aspect_preserving_resize(image, _RESIZE_MIN)
-    image = _central_crop(image, output_height, output_width)
-
-  image.set_shape([output_height, output_width, num_channels])
-
-  return _mean_image_subtraction(image, _CHANNEL_MEANS, num_channels)
--- a/official/r1/resnet/imagenet_test.py
+++ b/official/r1/resnet/imagenet_test.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import unittest
-
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-from absl import logging
-
-from official.r1.resnet import imagenet_main
-from official.utils.testing import integration
-
-logging.set_verbosity(logging.ERROR)
-
-_BATCH_SIZE = 32
-_LABEL_CLASSES = 1001
-
-
-class BaseTest(tf.test.TestCase):
-
-  _num_validation_images = None
-
-  @classmethod
-  def setUpClass(cls):  # pylint: disable=invalid-name
-    super(BaseTest, cls).setUpClass()
-    imagenet_main.define_imagenet_flags()
-
-  def setUp(self):
-    super(BaseTest, self).setUp()
-    tf.compat.v1.disable_eager_execution()
-    self._num_validation_images = imagenet_main.NUM_IMAGES['validation']
-    imagenet_main.NUM_IMAGES['validation'] = 4
-
-  def tearDown(self):
-    super(BaseTest, self).tearDown()
-    tf.io.gfile.rmtree(self.get_temp_dir())
-    imagenet_main.NUM_IMAGES['validation'] = self._num_validation_images
-
-  def _tensor_shapes_helper(self, resnet_size, resnet_version, dtype, with_gpu):
-    """Checks the tensor shapes after each phase of the ResNet model."""
-    def reshape(shape):
-      """Returns the expected dimensions depending on if a GPU is being used."""
-
-      # If a GPU is used for the test, the shape is returned (already in NCHW
-      # form). When GPU is not used, the shape is converted to NHWC.
-      if with_gpu:
-        return shape
-      return shape[0], shape[2], shape[3], shape[1]
-
-    graph = tf.Graph()
-
-    with graph.as_default(), self.test_session(
-        graph=graph, use_gpu=with_gpu, force_gpu=with_gpu):
-      model = imagenet_main.ImagenetModel(
-          resnet_size=resnet_size,
-          data_format='channels_first' if with_gpu else 'channels_last',
-          resnet_version=resnet_version,
-          dtype=dtype
-      )
-      inputs = tf.random.uniform([1, 224, 224, 3])
-      output = model(inputs, training=True)
-
-      initial_conv = graph.get_tensor_by_name('resnet_model/initial_conv:0')
-      max_pool = graph.get_tensor_by_name('resnet_model/initial_max_pool:0')
-      block_layer1 = graph.get_tensor_by_name('resnet_model/block_layer1:0')
-      block_layer2 = graph.get_tensor_by_name('resnet_model/block_layer2:0')
-      block_layer3 = graph.get_tensor_by_name('resnet_model/block_layer3:0')
-      block_layer4 = graph.get_tensor_by_name('resnet_model/block_layer4:0')
-      reduce_mean = graph.get_tensor_by_name('resnet_model/final_reduce_mean:0')
-      dense = graph.get_tensor_by_name('resnet_model/final_dense:0')
-
-      self.assertAllEqual(initial_conv.shape, reshape((1, 64, 112, 112)))
-      self.assertAllEqual(max_pool.shape, reshape((1, 64, 56, 56)))
-
-      # The number of channels after each block depends on whether we're
-      # using the building_block or the bottleneck_block.
-      if resnet_size < 50:
-        self.assertAllEqual(block_layer1.shape, reshape((1, 64, 56, 56)))
-        self.assertAllEqual(block_layer2.shape, reshape((1, 128, 28, 28)))
-        self.assertAllEqual(block_layer3.shape, reshape((1, 256, 14, 14)))
-        self.assertAllEqual(block_layer4.shape, reshape((1, 512, 7, 7)))
-        self.assertAllEqual(reduce_mean.shape, reshape((1, 512, 1, 1)))
-      else:
-        self.assertAllEqual(block_layer1.shape, reshape((1, 256, 56, 56)))
-        self.assertAllEqual(block_layer2.shape, reshape((1, 512, 28, 28)))
-        self.assertAllEqual(block_layer3.shape, reshape((1, 1024, 14, 14)))
-        self.assertAllEqual(block_layer4.shape, reshape((1, 2048, 7, 7)))
-        self.assertAllEqual(reduce_mean.shape, reshape((1, 2048, 1, 1)))
-
-      self.assertAllEqual(dense.shape, (1, _LABEL_CLASSES))
-      self.assertAllEqual(output.shape, (1, _LABEL_CLASSES))
-
-  def tensor_shapes_helper(self, resnet_size, resnet_version, with_gpu=False):
-    self._tensor_shapes_helper(resnet_size=resnet_size,
-                               resnet_version=resnet_version,
-                               dtype=tf.float32, with_gpu=with_gpu)
-    self._tensor_shapes_helper(resnet_size=resnet_size,
-                               resnet_version=resnet_version,
-                               dtype=tf.float16, with_gpu=with_gpu)
-
-  def test_tensor_shapes_resnet_18_v1(self):
-    self.tensor_shapes_helper(18, resnet_version=1)
-
-  def test_tensor_shapes_resnet_18_v2(self):
-    self.tensor_shapes_helper(18, resnet_version=2)
-
-  def test_tensor_shapes_resnet_34_v1(self):
-    self.tensor_shapes_helper(34, resnet_version=1)
-
-  def test_tensor_shapes_resnet_34_v2(self):
-    self.tensor_shapes_helper(34, resnet_version=2)
-
-  def test_tensor_shapes_resnet_50_v1(self):
-    self.tensor_shapes_helper(50, resnet_version=1)
-
-  def test_tensor_shapes_resnet_50_v2(self):
-    self.tensor_shapes_helper(50, resnet_version=2)
-
-  def test_tensor_shapes_resnet_101_v1(self):
-    self.tensor_shapes_helper(101, resnet_version=1)
-
-  def test_tensor_shapes_resnet_101_v2(self):
-    self.tensor_shapes_helper(101, resnet_version=2)
-
-  def test_tensor_shapes_resnet_152_v1(self):
-    self.tensor_shapes_helper(152, resnet_version=1)
-
-  def test_tensor_shapes_resnet_152_v2(self):
-    self.tensor_shapes_helper(152, resnet_version=2)
-
-  def test_tensor_shapes_resnet_200_v1(self):
-    self.tensor_shapes_helper(200, resnet_version=1)
-
-  def test_tensor_shapes_resnet_200_v2(self):
-    self.tensor_shapes_helper(200, resnet_version=2)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_18_with_gpu_v1(self):
-    self.tensor_shapes_helper(18, resnet_version=1, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_18_with_gpu_v2(self):
-    self.tensor_shapes_helper(18, resnet_version=2, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_34_with_gpu_v1(self):
-    self.tensor_shapes_helper(34, resnet_version=1, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_34_with_gpu_v2(self):
-    self.tensor_shapes_helper(34, resnet_version=2, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_50_with_gpu_v1(self):
-    self.tensor_shapes_helper(50, resnet_version=1, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_50_with_gpu_v2(self):
-    self.tensor_shapes_helper(50, resnet_version=2, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_101_with_gpu_v1(self):
-    self.tensor_shapes_helper(101, resnet_version=1, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_101_with_gpu_v2(self):
-    self.tensor_shapes_helper(101, resnet_version=2, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_152_with_gpu_v1(self):
-    self.tensor_shapes_helper(152, resnet_version=1, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_152_with_gpu_v2(self):
-    self.tensor_shapes_helper(152, resnet_version=2, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_200_with_gpu_v1(self):
-    self.tensor_shapes_helper(200, resnet_version=1, with_gpu=True)
-
-  @unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
-  def test_tensor_shapes_resnet_200_with_gpu_v2(self):
-    self.tensor_shapes_helper(200, resnet_version=2, with_gpu=True)
-
-  def resnet_model_fn_helper(self, mode, resnet_version, dtype):
-    """Tests that the EstimatorSpec is given the appropriate arguments."""
-    tf.compat.v1.train.create_global_step()
-
-    input_fn = imagenet_main.get_synth_input_fn(dtype)
-    dataset = input_fn(True, '', _BATCH_SIZE)
-    iterator = tf.compat.v1.data.make_initializable_iterator(dataset)
-    features, labels = iterator.get_next()
-    spec = imagenet_main.imagenet_model_fn(
-        features, labels, mode, {
-            'dtype': dtype,
-            'resnet_size': 50,
-            'data_format': 'channels_last',
-            'batch_size': _BATCH_SIZE,
-            'resnet_version': resnet_version,
-            'loss_scale': 128 if dtype == tf.float16 else 1,
-            'fine_tune': False,
-        })
-
-    predictions = spec.predictions
-    self.assertAllEqual(predictions['probabilities'].shape,
-                        (_BATCH_SIZE, _LABEL_CLASSES))
-    self.assertEqual(predictions['probabilities'].dtype, tf.float32)
-    self.assertAllEqual(predictions['classes'].shape, (_BATCH_SIZE,))
-    self.assertEqual(predictions['classes'].dtype, tf.int64)
-
-    if mode != tf.estimator.ModeKeys.PREDICT:
-      loss = spec.loss
-      self.assertAllEqual(loss.shape, ())
-      self.assertEqual(loss.dtype, tf.float32)
-
-    if mode == tf.estimator.ModeKeys.EVAL:
-      eval_metric_ops = spec.eval_metric_ops
-      self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ())
-      self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ())
-      self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32)
-      self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32)
-
-  def test_resnet_model_fn_train_mode_v1(self):
-    self.resnet_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=1,
-                                dtype=tf.float32)
-
-  def test_resnet_model_fn_train_mode_v2(self):
-    self.resnet_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=2,
-                                dtype=tf.float32)
-
-  def test_resnet_model_fn_eval_mode_v1(self):
-    self.resnet_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=1,
-                                dtype=tf.float32)
-
-  def test_resnet_model_fn_eval_mode_v2(self):
-    self.resnet_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=2,
-                                dtype=tf.float32)
-
-  def test_resnet_model_fn_predict_mode_v1(self):
-    self.resnet_model_fn_helper(tf.estimator.ModeKeys.PREDICT, resnet_version=1,
-                                dtype=tf.float32)
-
-  def test_resnet_model_fn_predict_mode_v2(self):
-    self.resnet_model_fn_helper(tf.estimator.ModeKeys.PREDICT, resnet_version=2,
-                                dtype=tf.float32)
-
-  def _test_imagenetmodel_shape(self, resnet_version):
-    batch_size = 135
-    num_classes = 246
-
-    model = imagenet_main.ImagenetModel(
-        50, data_format='channels_last', num_classes=num_classes,
-        resnet_version=resnet_version)
-
-    fake_input = tf.random.uniform([batch_size, 224, 224, 3])
-    output = model(fake_input, training=True)
-
-    self.assertAllEqual(output.shape, (batch_size, num_classes))
-
-  def test_imagenetmodel_shape_v1(self):
-    self._test_imagenetmodel_shape(resnet_version=1)
-
-  def test_imagenetmodel_shape_v2(self):
-    self._test_imagenetmodel_shape(resnet_version=2)
-
-  def test_imagenet_end_to_end_synthetic_v1(self):
-    integration.run_synthetic(
-        main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
-        extra_flags=['-resnet_version', '1', '-batch_size', '4',
-                     '--max_train_steps', '1']
-    )
-
-  def test_imagenet_end_to_end_synthetic_v2(self):
-    integration.run_synthetic(
-        main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
-        extra_flags=['-resnet_version', '2', '-batch_size', '4',
-                     '--max_train_steps', '1']
-    )
-
-  def test_imagenet_end_to_end_synthetic_v1_tiny(self):
-    integration.run_synthetic(
-        main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
-        extra_flags=['-resnet_version', '1', '-batch_size', '4',
-                     '-resnet_size', '18', '--max_train_steps', '1']
-    )
-
-  def test_imagenet_end_to_end_synthetic_v2_tiny(self):
-    integration.run_synthetic(
-        main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
-        extra_flags=['-resnet_version', '2', '-batch_size', '4',
-                     '-resnet_size', '18', '--max_train_steps', '1']
-    )
-
-  def test_imagenet_end_to_end_synthetic_v1_huge(self):
-    integration.run_synthetic(
-        main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
-        extra_flags=['-resnet_version', '1', '-batch_size', '4',
-                     '-resnet_size', '200', '--max_train_steps', '1']
-    )
-
-  def test_imagenet_end_to_end_synthetic_v2_huge(self):
-    integration.run_synthetic(
-        main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
-        extra_flags=['-resnet_version', '2', '-batch_size', '4',
-                     '-resnet_size', '200', '--max_train_steps', '1']
-    )
-
-
-if __name__ == '__main__':
-  tf.test.main()
--- a/official/r1/resnet/resnet_model.py
+++ b/official/r1/resnet/resnet_model.py
--- a/official/r1/resnet/resnet_run_loop.py
+++ b/official/r1/resnet/resnet_run_loop.py
--- a/official/r1/transformer/README.md
+++ b/official/r1/transformer/README.md
-![No Maintenance Intended](https://img.shields.io/badge/No%20Maintenance%20Intended-%E2%9C%95-red.svg)
-![TensorFlow Requirement: 1.x](https://img.shields.io/badge/TensorFlow%20Requirement-1.x-brightgreen)
-![TensorFlow 2 Not Supported](https://img.shields.io/badge/TensorFlow%202%20Not%20Supported-%E2%9C%95-red.svg)
-
-# Transformer Translation Model
-This is an implementation of the Transformer translation model as described in the [Attention is All You Need](https://arxiv.org/abs/1706.03762) paper. Based on the code provided by the authors: [Transformer code](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py) from [Tensor2Tensor](https://github.com/tensorflow/tensor2tensor). Also, check out the [tutorial](https://www.tensorflow.org/beta/tutorials/text/transformer) on Transformer in TF 2.0.
-
-**Please follow the [README](https://github.com/tensorflow/models/official/transformer/README.md), the new Keras-based TF 2 implementation, to walk through the new Transformer.**
-
-Transformer is a neural network architecture that solves sequence to sequence problems using attention mechanisms. Unlike traditional neural seq2seq models, Transformer does not involve recurrent connections. The attention mechanism learns dependencies between tokens in two sequences. Since attention weights apply to all tokens in the sequences, the Transformer model is able to easily capture long-distance dependencies.
-
-Transformer's overall structure follows the standard encoder-decoder pattern. The encoder uses self-attention to compute a representation of the input sequence. The decoder generates the output sequence one token at a time, taking the encoder output and previous decoder-outputted tokens as inputs.
-
-The model also applies embeddings on the input and output tokens, and adds a constant positional encoding. The positional encoding adds information about the position of each token.
-
-## Contents
-  * [Contents](#contents)
-  * [Walkthrough](#walkthrough)
-  * [Benchmarks](#benchmarks)
-    * [Training times](#training-times)
-    * [Evaluation results](#evaluation-results)
-  * [Detailed instructions](#detailed-instructions)
-    * [Environment preparation](#environment-preparation)
-    * [Download and preprocess datasets](#download-and-preprocess-datasets)
-    * [Model training and evaluation](#model-training-and-evaluation)
-    * [Translate using the model](#translate-using-the-model)
-    * [Compute official BLEU score](#compute-official-bleu-score)
-    * [TPU](#tpu)
-  * [Export trained model](#export-trained-model)
-    * [Example translation](#example-translation)
-  * [Implementation overview](#implementation-overview)
-    * [Model Definition](#model-definition)
-    * [Model Estimator](#model-estimator)
-    * [Other scripts](#other-scripts)
-    * [Test dataset](#test-dataset)
-  * [Term definitions](#term-definitions)
-
-## Walkthrough
-
-Below are the commands for running the Transformer model. See the
-[Detailed instructions](#detailed-instructions) for more details on running the
-model.
-
-```
-cd /path/to/models/official/transformer
-
-# Ensure that PYTHONPATH is correctly defined as described in
-# https://github.com/tensorflow/models/tree/master/official#requirements
-# export PYTHONPATH="$PYTHONPATH:/path/to/models"
-
-# Export variables
-PARAM_SET=big
-DATA_DIR=$HOME/transformer/data
-MODEL_DIR=$HOME/transformer/model_$PARAM_SET
-VOCAB_FILE=$DATA_DIR/vocab.ende.32768
-
-# Download training/evaluation/test datasets
-python data_download.py --data_dir=$DATA_DIR
-
-# Train the model for 10 epochs, and evaluate after every epoch.
-python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
-    --vocab_file=$VOCAB_FILE --param_set=$PARAM_SET \
-    --bleu_source=$DATA_DIR/newstest2014.en --bleu_ref=$DATA_DIR/newstest2014.de
-
-# Run during training in a separate process to get continuous updates,
-# or after training is complete.
-tensorboard --logdir=$MODEL_DIR
-
-# Translate some text using the trained model
-python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
-    --param_set=$PARAM_SET --text="hello world"
-
-# Compute model's BLEU score using the newstest2014 dataset.
-python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
-    --param_set=$PARAM_SET --file=$DATA_DIR/newstest2014.en --file_out=translation.en
-python compute_bleu.py --translation=translation.en --reference=$DATA_DIR/newstest2014.de
-```
-
-## Benchmarks
-### Training times
-
-Currently, both big and base parameter sets run on a single GPU. The measurements below
-are reported from running the model on a P100 GPU.
-
-Param Set | batches/sec | batches per epoch | time per epoch
--- | --- | --- | ---
-base | 4.8 | 83244 | 4 hr
-big | 1.1 | 41365 | 10 hr
-
-### Evaluation results
-Below are the case-insensitive BLEU scores after 10 epochs.
-
-Param Set | Score
--- | --- |
-base | 27.7
-big | 28.9
-
-
-## Detailed instructions
-
-
-0. ### Environment preparation
-
-   #### Add models repo to PYTHONPATH
-   Follow the instructions described in the [Requirements](https://github.com/tensorflow/models/tree/master/official#requirements) section to add the models folder to the python path.
-
-   #### Export variables (optional)
-
-   Export the following variables, or modify the values in each of the snippets below:
-   ```
-   PARAM_SET=big
-   DATA_DIR=$HOME/transformer/data
-   MODEL_DIR=$HOME/transformer/model_$PARAM_SET
-   VOCAB_FILE=$DATA_DIR/vocab.ende.32768
-   ```
-
-1. ### Download and preprocess datasets
-
-   [data_download.py](data_download.py) downloads and preprocesses the training and evaluation WMT datasets. After the data is downloaded and extracted, the training data is used to generate a vocabulary of subtokens. The evaluation and training strings are tokenized, and the resulting data is sharded, shuffled, and saved as TFRecords.
-
-   1.75GB of compressed data will be downloaded. In total, the raw files (compressed, extracted, and combined files) take up 8.4GB of disk space. The resulting TFRecord and vocabulary files are 722MB. The script takes around 40 minutes to run, with the bulk of the time spent downloading and ~15 minutes spent on preprocessing.
-
-   Command to run:
-   ```
-   python data_download.py --data_dir=$DATA_DIR
-   ```
-
-   Arguments:
-   * `--data_dir`: Path where the preprocessed TFRecord data, and vocab file will be saved.
-   * Use the `--help` or `-h` flag to get a full list of possible arguments.
-
-2. ### Model training and evaluation
-
-   [transformer_main.py](transformer_main.py) creates a Transformer model, and trains it using Tensorflow Estimator.
-
-   Command to run:
-   ```
-   python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
-       --vocab_file=$VOCAB_FILE --param_set=$PARAM_SET
-   ```
-
-   Arguments:
-   * `--data_dir`: This should be set to the same directory given to the `data_download`'s `data_dir` argument.
-   * `--model_dir`: Directory to save Transformer model training checkpoints.
-   * `--vocab_file`: Path to subtoken vocabulary file. If data_download was used, you may find the file in `data_dir`.
-   * `--param_set`: Parameter set to use when creating and training the model. Options are `base` and `big` (default).
-   * Use the `--help` or `-h` flag to get a full list of possible arguments.
-
-   #### Customizing training schedule
-
-   By default, the model will train for 10 epochs, and evaluate after every epoch. The training schedule may be defined through the flags:
-   * Training with epochs (default):
-     * `--train_epochs`: The total number of complete passes to make through the dataset
-     * `--epochs_between_evals`: The number of epochs to train between evaluations.
-   * Training with steps:
-     * `--train_steps`: sets the total number of training steps to run.
-     * `--steps_between_evals`: Number of training steps to run between evaluations.
-
-   Only one of `train_epochs` or `train_steps` may be set. Since the default option is to evaluate the model after training for an epoch, it may take 4 or more hours between model evaluations. To get more frequent evaluations, use the flags `--train_steps=250000 --steps_between_evals=1000`.
-
-   Note: At the beginning of each training session, the training dataset is reloaded and shuffled. Stopping the training before completing an epoch may result in worse model quality, due to the chance that some examples may be seen more than others. Therefore, it is recommended to use epochs when the model quality is important.
-
-   #### Compute BLEU score during model evaluation
-
-   Use these flags to compute the BLEU when the model evaluates:
-   * `--bleu_source`: Path to file containing text to translate.
-   * `--bleu_ref`: Path to file containing the reference translation.
-   * `--stop_threshold`: Train until the BLEU score reaches this lower bound. This setting overrides the `--train_steps` and `--train_epochs` flags.
-
-   When running `transformer_main.py`, use the flags: `--bleu_source=$DATA_DIR/newstest2014.en --bleu_ref=$DATA_DIR/newstest2014.de`
-
-   #### Tensorboard
-   Training and evaluation metrics (loss, accuracy, approximate BLEU score, etc.) are logged, and can be displayed in the browser using Tensorboard.
-   ```
-   tensorboard --logdir=$MODEL_DIR
-   ```
-   The values are displayed at [localhost:6006](localhost:6006).
-
-3. ### Translate using the model
-   [translate.py](translate.py) contains the script to use the trained model to translate input text or file. Each line in the file is translated separately.
-
-   Command to run:
-   ```
-   python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
-       --param_set=$PARAM_SET --text="hello world"
-   ```
-
-   Arguments for initializing the Subtokenizer and trained model:
-   * `--model_dir` and `--param_set`: These parameters are used to rebuild the trained model
-   * `--vocab_file`: Path to subtoken vocabulary file. If data_download was used, you may find the file in `data_dir`.
-
-   Arguments for specifying what to translate:
-   * `--text`: Text to translate
-   * `--file`: Path to file containing text to translate
-   * `--file_out`: If `--file` is set, then this file will store the input file's translations.
-
-   To translate the newstest2014 data, run:
-   ```
-   python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
-       --param_set=$PARAM_SET --file=$DATA_DIR/newstest2014.en --file_out=translation.en
-   ```
-
-   Translating the file takes around 15 minutes on a GTX1080, or 5 minutes on a P100.
-
-4. ### Compute official BLEU score
-   Use [compute_bleu.py](compute_bleu.py) to compute the BLEU by comparing generated translations to the reference translation.
-
-   Command to run:
-   ```
-   python compute_bleu.py --translation=translation.en --reference=$DATA_DIR/newstest2014.de
-   ```
-
-   Arguments:
-   * `--translation`: Path to file containing generated translations.
-   * `--reference`: Path to file containing reference translations.
-   * Use the `--help` or `-h` flag to get a full list of possible arguments.
-
-5. ### TPU
-   TPU support for this version of Transformer is experimental. Currently it is present for
-   demonstration purposes only, but will be optimized in the coming weeks.
-
-## Export trained model
-To export the model as a Tensorflow [SavedModel](https://www.tensorflow.org/guide/saved_model) format, use the argument `--export_dir` when running `transformer_main.py`. A folder will be created in the directory with the name as the timestamp (e.g. $EXPORT_DIR/1526427396).
-
-```
-EXPORT_DIR=$HOME/transformer/saved_model
-python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
-  --vocab_file=$VOCAB_FILE --param_set=$PARAM_SET --export_model=$EXPORT_DIR
-```
-
-To inspect the SavedModel, use saved_model_cli:
-```
-SAVED_MODEL_DIR=$EXPORT_DIR/{TIMESTAMP}  # replace {TIMESTAMP} with the name of the folder created
-saved_model_cli show --dir=$SAVED_MODEL_DIR  --all
-```
-
-### Example translation
-Let's translate **"hello world!"**, **"goodbye world."**, and **"Would you like some pie?"**.
-
-The SignatureDef for "translate" is:
-
-    signature_def['translate']:
-        The given SavedModel SignatureDef contains the following input(s):
-          inputs['input'] tensor_info:
-              dtype: DT_INT64
-              shape: (-1, -1)
-              name: Placeholder:0
-        The given SavedModel SignatureDef contains the following output(s):
-          outputs['outputs'] tensor_info:
-              dtype: DT_INT32
-              shape: (-1, -1)
-              name: model/Transformer/strided_slice_19:0
-          outputs['scores'] tensor_info:
-              dtype: DT_FLOAT
-              shape: (-1)
-              name: model/Transformer/strided_slice_20:0
-
-Follow the steps below to use the translate signature def:
-
-1. #### Encode the inputs to integer arrays.
-   This can be done using `utils.tokenizer.Subtokenizer`, and the vocab file in the SavedModel assets (`$SAVED_MODEL_DIR/assets.extra/vocab.txt`).
-
-   ```
-   from official.transformer.utils.tokenizer import Subtokenizer
-   s = Subtokenizer(PATH_TO_VOCAB_FILE)
-   print(s.encode("hello world!", add_eos=True))
-   ```
-
-   The encoded inputs are:
-   * `"hello world!" = [6170, 3731, 178, 207, 1]`
-   * `"goodbye world." = [15431, 13966, 36, 178, 3, 1]`
-   * `"Would you like some pie?" = [9092, 72, 155, 202, 19851, 102, 1]`
-
-2. #### Run `saved_model_cli` to obtain the predicted translations
-   The encoded inputs should be padded so that they are the same length. The padding token is `0`.
-   ```
-   ENCODED_INPUTS="[[26228, 145, 178, 1, 0, 0, 0], \
-                   [15431, 13966, 36, 178, 3, 1, 0], \
-                   [9092, 72, 155, 202, 19851, 102, 1]]"
-   ```
-
-   Now, use the `run` command with `saved_model_cli` to get the outputs.
-
-   ```
-   saved_model_cli run --dir=$SAVED_MODEL_DIR --tag_set=serve --signature_def=translate \
-     --input_expr="input=$ENCODED_INPUTS"
-   ```
-
-   The outputs will look similar to:
-   ```
-   Result for output key outputs:
-   [[18744   145   297     1     0     0     0     0     0     0     0     0
-         0     0]
-    [ 5450  4642    21    11   297     3     1     0     0     0     0     0
-         0     0]
-    [25940    22    66   103 21713    31   102     1     0     0     0     0
-         0     0]]
-   Result for output key scores:
-   [-1.5493642 -1.4032784 -3.252089 ]
-   ```
-
-3. #### Decode the outputs to strings.
-   Use the `Subtokenizer` and vocab file as described in step 1 to decode the output integer arrays.
-   ```
-   from official.transformer.utils.tokenizer import Subtokenizer
-   s = Subtokenizer(PATH_TO_VOCAB_FILE)
-   print(s.decode([18744, 145, 297, 1]))
-   ```
-   The decoded outputs from above are:
-   * `[18744, 145, 297, 1] = "Hallo Welt<EOS>"`
-   * `[5450, 4642, 21, 11, 297, 3, 1] = "Abschied von der Welt.<EOS>"`
-   * `[25940, 22, 66, 103, 21713, 31, 102, 1] = "Möchten Sie einen Kuchen?<EOS>"`
-
-## Implementation overview
-
-A brief look at each component in the code:
-
-### Model Definition
-The [model](model) subdirectory contains the implementation of the Transformer model. The following files define the Transformer model and its layers:
-* [transformer.py](model/transformer.py): Defines the transformer model and its encoder/decoder layer stacks.
-* [embedding_layer.py](model/embedding_layer.py): Contains the layer that calculates the embeddings. The embedding weights are also used to calculate the pre-softmax probabilities from the decoder output.
-* [attention_layer.py](model/attention_layer.py): Defines the multi-headed and self attention layers that are used in the encoder/decoder stacks.
-* [ffn_layer.py](model/ffn_layer.py): Defines the feedforward network that is used in the encoder/decoder stacks. The network is composed of 2 fully connected layers.
-
-Other files:
-* [beam_search.py](model/beam_search.py) contains the beam search implementation, which is used during model inference to find high scoring translations.
-* [model_params.py](model/model_params.py) contains the parameters used for the big and base models.
-* [model_utils.py](model/model_utils.py) defines some helper functions used in the model (calculating padding, bias, etc.).
-
-
-### Model Estimator
-[transformer_main.py](model/transformer.py) creates an `Estimator` to train and evaluate the model.
-
-Helper functions:
-* [utils/dataset.py](utils/dataset.py): contains functions for creating a `dataset` that is passed to the `Estimator`.
-* [utils/metrics.py](utils/metrics.py): defines metrics functions used by the `Estimator` to evaluate the
-
-### Other scripts
-
-Aside from the main file to train the Transformer model, we provide other scripts for using the model or downloading the data:
-
-#### Data download and preprocessing
-
-[data_download.py](data_download.py) downloads and extracts data, then uses `Subtokenizer` to tokenize strings into arrays of int IDs. The int arrays are converted to `tf.Examples` and saved in the `tf.RecordDataset` format.
-
- The data is downloaded from the Workshop of Machine Translation (WMT) [news translation task](http://www.statmt.org/wmt17/translation-task.html). The following datasets are used:
-
- * Europarl v7
- * Common Crawl corpus
- * News Commentary v12
-
- See the [download section](http://www.statmt.org/wmt17/translation-task.html#download) to explore the raw datasets. The parameters in this model are tuned to fit the English-German translation data, so the EN-DE texts are extracted from the downloaded compressed files.
-
-The text is transformed into arrays of integer IDs using the `Subtokenizer` defined in [`utils/tokenizer.py`](util/tokenizer.py). During initialization of the `Subtokenizer`, the raw training data is used to generate a vocabulary list containing common subtokens.
-
-The target vocabulary size of the WMT dataset is 32,768. The set of subtokens is found through binary search on the minimum number of times a subtoken appears in the data. The actual vocabulary size is 33,708, and is stored in a 324kB file.
-
-#### Translation
-Translation is defined in [translate.py](translate.py). First, `Subtokenizer` tokenizes the input. The vocabulary file is the same used to tokenize the training/eval files. Next, beam search is used to find the combination of tokens that maximizes the probability outputted by the model decoder. The tokens are then converted back to strings with `Subtokenizer`.
-
-#### BLEU computation
-[compute_bleu.py](compute_bleu.py): Implementation from [https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py).
-
-### Test dataset
-The [newstest2014 files](https://storage.googleapis.com/tf-perf-public/official_transformer/test_data/newstest2014.tgz)
-are extracted from the [NMT Seq2Seq tutorial](https://google.github.io/seq2seq/nmt/#download-data).
-The raw text files are converted from the SGM format of the
-[WMT 2016](http://www.statmt.org/wmt16/translation-task.html) test sets. The
-newstest2014 files are put into the `$DATA_DIR` when executing
-`data_download.py`
-
-## Term definitions
-
-**Steps / Epochs**:
-* Step: unit for processing a single batch of data
-* Epoch: a complete run through the dataset
-
-Example: Consider a training a dataset with 100 examples that is divided into 20 batches with 5 examples per batch. A single training step trains the model on one batch. After 20 training steps, the model will have trained on every batch in the dataset, or one epoch.
-
-**Subtoken**: Words are referred to as tokens, and parts of words are referred to as 'subtokens'. For example, the word 'inclined' may be split into `['incline', 'd_']`. The '\_' indicates the end of the token. The subtoken vocabulary list is guaranteed to contain the alphabet (including numbers and special characters), so all words can be tokenized.
--- a/official/r1/transformer/__init__.py
+++ b/official/r1/transformer/__init__.py
--- a/official/r1/transformer/attention_layer.py
+++ b/official/r1/transformer/attention_layer.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Implementation of multiheaded attention and self-attention layers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-
-
-class Attention(tf.layers.Layer):
-  """Multi-headed attention layer."""
-
-  def __init__(self, hidden_size, num_heads, attention_dropout, train):
-    if hidden_size % num_heads != 0:
-      raise ValueError("Hidden size must be evenly divisible by the number of "
-                       "heads.")
-
-    super(Attention, self).__init__()
-    self.hidden_size = hidden_size
-    self.num_heads = num_heads
-    self.attention_dropout = attention_dropout
-    self.train = train
-
-    # Layers for linearly projecting the queries, keys, and values.
-    self.q_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="q")
-    self.k_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="k")
-    self.v_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="v")
-
-    self.output_dense_layer = tf.layers.Dense(hidden_size, use_bias=False,
-                                              name="output_transform")
-
-  def split_heads(self, x):
-    """Split x into different heads, and transpose the resulting value.
-
-    The tensor is transposed to insure the inner dimensions hold the correct
-    values during the matrix multiplication.
-
-    Args:
-      x: A tensor with shape [batch_size, length, hidden_size]
-
-    Returns:
-      A tensor with shape [batch_size, num_heads, length, hidden_size/num_heads]
-    """
-    with tf.name_scope("split_heads"):
-      batch_size = tf.shape(x)[0]
-      length = tf.shape(x)[1]
-
-      # Calculate depth of last dimension after it has been split.
-      depth = (self.hidden_size // self.num_heads)
-
-      # Split the last dimension
-      x = tf.reshape(x, [batch_size, length, self.num_heads, depth])
-
-      # Transpose the result
-      return tf.transpose(x, [0, 2, 1, 3])
-
-  def combine_heads(self, x):
-    """Combine tensor that has been split.
-
-    Args:
-      x: A tensor [batch_size, num_heads, length, hidden_size/num_heads]
-
-    Returns:
-      A tensor with shape [batch_size, length, hidden_size]
-    """
-    with tf.name_scope("combine_heads"):
-      batch_size = tf.shape(x)[0]
-      length = tf.shape(x)[2]
-      x = tf.transpose(x, [0, 2, 1, 3])  # --> [batch, length, num_heads, depth]
-      return tf.reshape(x, [batch_size, length, self.hidden_size])
-
-  def call(self, x, y, bias, cache=None):
-    """Apply attention mechanism to x and y.
-
-    Args:
-      x: a tensor with shape [batch_size, length_x, hidden_size]
-      y: a tensor with shape [batch_size, length_y, hidden_size]
-      bias: attention bias that will be added to the result of the dot product.
-      cache: (Used during prediction) dictionary with tensors containing results
-        of previous attentions. The dictionary must have the items:
-            {"k": tensor with shape [batch_size, i, key_channels],
-             "v": tensor with shape [batch_size, i, value_channels]}
-        where i is the current decoded length.
-
-    Returns:
-      Attention layer output with shape [batch_size, length_x, hidden_size]
-    """
-    # Linearly project the query (q), key (k) and value (v) using different
-    # learned projections. This is in preparation of splitting them into
-    # multiple heads. Multi-head attention uses multiple queries, keys, and
-    # values rather than regular attention (which uses a single q, k, v).
-    q = self.q_dense_layer(x)
-    k = self.k_dense_layer(y)
-    v = self.v_dense_layer(y)
-
-    if cache is not None:
-      # Combine cached keys and values with new keys and values.
-      k = tf.concat([cache["k"], k], axis=1)
-      v = tf.concat([cache["v"], v], axis=1)
-
-      # Update cache
-      cache["k"] = k
-      cache["v"] = v
-
-    # Split q, k, v into heads.
-    q = self.split_heads(q)
-    k = self.split_heads(k)
-    v = self.split_heads(v)
-
-    # Scale q to prevent the dot product between q and k from growing too large.
-    depth = (self.hidden_size // self.num_heads)
-    q *= depth ** -0.5
-
-    # Calculate dot product attention
-    logits = tf.matmul(q, k, transpose_b=True)
-    logits += bias
-    weights = tf.nn.softmax(logits, name="attention_weights")
-    if self.train:
-      weights = tf.nn.dropout(weights, 1.0 - self.attention_dropout)
-    attention_output = tf.matmul(weights, v)
-
-    # Recombine heads --> [batch_size, length, hidden_size]
-    attention_output = self.combine_heads(attention_output)
-
-    # Run the combined outputs through another linear projection layer.
-    attention_output = self.output_dense_layer(attention_output)
-    return attention_output
-
-
-class SelfAttention(Attention):
-  """Multiheaded self-attention layer."""
-
-  def call(self, x, bias, cache=None):
-    return super(SelfAttention, self).call(x, x, bias, cache)
--- a/official/r1/transformer/dataset.py
+++ b/official/r1/transformer/dataset.py
--- a/official/r1/transformer/embedding_layer.py
+++ b/official/r1/transformer/embedding_layer.py
--- a/official/r1/transformer/ffn_layer.py
+++ b/official/r1/transformer/ffn_layer.py
--- a/official/r1/transformer/schedule.py
+++ b/official/r1/transformer/schedule.py
--- a/official/r1/transformer/schedule_test.py
+++ b/official/r1/transformer/schedule_test.py
--- a/official/r1/transformer/transformer.py
+++ b/official/r1/transformer/transformer.py
--- a/official/r1/transformer/transformer_main.py
+++ b/official/r1/transformer/transformer_main.py
--- a/official/r1/transformer/translate.py
+++ b/official/r1/transformer/translate.py