update TF code

441c8f40 · qianyj · ec90ad8e · ec90ad8e · ec90ad8e · ec90ad8e
Commit 441c8f40 authored Aug 01, 2022 by qianyj
20 changed files
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_cifar_benchmark.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_cifar_benchmark.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Executes Keras benchmarks and accuracy tests."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import time
-from absl import flags
-
-from official.resnet import cifar10_main as cifar_main
-from official.resnet.keras import keras_benchmark
-from official.resnet.keras import keras_cifar_main
-from official.resnet.keras import keras_common
-
-DATA_DIR = '/data/cifar10_data/cifar-10-batches-bin'
-MIN_TOP_1_ACCURACY = 0.925
-MAX_TOP_1_ACCURACY = 0.938
-
-FLAGS = flags.FLAGS
-
-
-class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
-  """Accuracy tests for ResNet56 Keras CIFAR-10."""
-
-  def __init__(self, output_dir=None):
-    flag_methods = [
-        keras_common.define_keras_flags, cifar_main.define_cifar_flags
-    ]
-
-    super(Resnet56KerasAccuracy, self).__init__(
-        output_dir=output_dir, flag_methods=flag_methods)
-
-  def benchmark_graph_1_gpu(self):
-    """Test keras based model with Keras fit and distribution strategies."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.data_dir = DATA_DIR
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir('keras_resnet56_1_gpu')
-    FLAGS.dtype = 'fp32'
-    self._run_and_report_benchmark()
-
-  def benchmark_1_gpu(self):
-    """Test keras based model with eager and distribution strategies."""
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.data_dir = DATA_DIR
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir('keras_resnet56_eager_1_gpu')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    self._run_and_report_benchmark()
-
-  def benchmark_2_gpu(self):
-    """Test keras based model with eager and distribution strategies."""
-    self._setup()
-    FLAGS.num_gpus = 2
-    FLAGS.data_dir = DATA_DIR
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir('keras_resnet56_eager_2_gpu')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_2_gpu(self):
-    """Test keras based model with Keras fit and distribution strategies."""
-    self._setup()
-    FLAGS.num_gpus = 2
-    FLAGS.data_dir = DATA_DIR
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir('keras_resnet56_2_gpu')
-    FLAGS.dtype = 'fp32'
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_1_gpu_no_dist_strat(self):
-    """Test keras based model with Keras fit but not distribution strategies."""
-    self._setup()
-    FLAGS.turn_off_distribution_strategy = True
-    FLAGS.num_gpus = 1
-    FLAGS.data_dir = DATA_DIR
-    FLAGS.batch_size = 128
-    FLAGS.train_epochs = 182
-    FLAGS.model_dir = self._get_model_dir('keras_resnet56_no_dist_strat_1_gpu')
-    FLAGS.dtype = 'fp32'
-    self._run_and_report_benchmark()
-
-  def _run_and_report_benchmark(self):
-    start_time_sec = time.time()
-    stats = keras_cifar_main.run(FLAGS)
-    wall_time_sec = time.time() - start_time_sec
-
-    super(Resnet56KerasAccuracy, self)._report_benchmark(
-        stats,
-        wall_time_sec,
-        top_1_min=MIN_TOP_1_ACCURACY,
-        top_1_max=MAX_TOP_1_ACCURACY,
-        total_batch_size=FLAGS.batch_size,
-        log_steps=100)
-
-
-class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
-  """Short performance tests for ResNet56 via Keras and CIFAR-10."""
-
-  def __init__(self, output_dir=None, default_flags=None):
-    flag_methods = [
-        keras_common.define_keras_flags, cifar_main.define_cifar_flags
-    ]
-
-    super(Resnet56KerasBenchmarkBase, self).__init__(
-        output_dir=output_dir,
-        flag_methods=flag_methods,
-        default_flags=default_flags)
-
-  def _run_and_report_benchmark(self):
-    start_time_sec = time.time()
-    stats = keras_cifar_main.run(FLAGS)
-    wall_time_sec = time.time() - start_time_sec
-
-    super(Resnet56KerasBenchmarkBase, self)._report_benchmark(
-        stats,
-        wall_time_sec,
-        total_batch_size=FLAGS.batch_size,
-        log_steps=FLAGS.log_steps)
-
-  def benchmark_1_gpu_no_dist_strat(self):
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.turn_off_distribution_strategy = True
-    FLAGS.batch_size = 128
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_1_gpu_no_dist_strat(self):
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = False
-    FLAGS.turn_off_distribution_strategy = True
-    FLAGS.batch_size = 128
-    self._run_and_report_benchmark()
-
-  def benchmark_1_gpu(self):
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.turn_off_distribution_strategy = False
-    FLAGS.batch_size = 128
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_1_gpu(self):
-    self._setup()
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = False
-    FLAGS.turn_off_distribution_strategy = False
-    FLAGS.batch_size = 128
-    self._run_and_report_benchmark()
-
-  def benchmark_2_gpu(self):
-    self._setup()
-    FLAGS.num_gpus = 2
-    FLAGS.enable_eager = True
-    FLAGS.turn_off_distribution_strategy = False
-    FLAGS.batch_size = 128 * 2  # 2 GPUs
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_2_gpu(self):
-    self._setup()
-    FLAGS.num_gpus = 2
-    FLAGS.enable_eager = False
-    FLAGS.turn_off_distribution_strategy = False
-    FLAGS.batch_size = 128 * 2  # 2 GPUs
-    self._run_and_report_benchmark()
-
-
-class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase):
-  """Synthetic benchmarks for ResNet56 and Keras."""
-
-  def __init__(self, output_dir=None):
-    def_flags = {}
-    def_flags['skip_eval'] = True
-    def_flags['use_synthetic_data'] = True
-    def_flags['train_steps'] = 110
-    def_flags['log_steps'] = 10
-
-    super(Resnet56KerasBenchmarkSynth, self).__init__(
-        output_dir=output_dir, default_flags=def_flags)
-
-
-class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase):
-  """Real data benchmarks for ResNet56 and Keras."""
-
-  def __init__(self, output_dir=None):
-    def_flags = {}
-    def_flags['skip_eval'] = True
-    def_flags['data_dir'] = DATA_DIR
-    def_flags['train_steps'] = 110
-    def_flags['log_steps'] = 10
-
-    super(Resnet56KerasBenchmarkReal, self).__init__(
-        output_dir=output_dir, default_flags=def_flags)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_cifar_main.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_cifar_main.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Runs a ResNet model on the Cifar-10 dataset."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl import app as absl_app
-from absl import flags
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-
-from official.resnet import cifar10_main as cifar_main
-from official.resnet.keras import keras_common
-from official.resnet.keras import resnet_cifar_model
-from official.utils.flags import core as flags_core
-from official.utils.logs import logger
-from official.utils.misc import distribution_utils
-
-
-LR_SCHEDULE = [  # (multiplier, epoch to start) tuples
-    (0.1, 91), (0.01, 136), (0.001, 182)
-]
-
-
-def learning_rate_schedule(current_epoch,
-                           current_batch,
-                           batches_per_epoch,
-                           batch_size):
-  """Handles linear scaling rule and LR decay.
-
-  Scale learning rate at epoch boundaries provided in LR_SCHEDULE by the
-  provided scaling factor.
-
-  Args:
-    current_epoch: integer, current epoch indexed from 0.
-    current_batch: integer, current batch in the current epoch, indexed from 0.
-    batches_per_epoch: integer, number of steps in an epoch.
-    batch_size: integer, total batch sized.
-
-  Returns:
-    Adjusted learning rate.
-  """
-  initial_learning_rate = keras_common.BASE_LEARNING_RATE * batch_size / 128
-  learning_rate = initial_learning_rate
-  for mult, start_epoch in LR_SCHEDULE:
-    if current_epoch >= start_epoch:
-      learning_rate = initial_learning_rate * mult
-    else:
-      break
-  return learning_rate
-
-
-def parse_record_keras(raw_record, is_training, dtype):
-  """Parses a record containing a training example of an image.
-
-  The input record is parsed into a label and image, and the image is passed
-  through preprocessing steps (cropping, flipping, and so on).
-
-  This method converts the label to one hot to fit the loss function.
-
-  Args:
-    raw_record: scalar Tensor tf.string containing a serialized
-      Example protocol buffer.
-    is_training: A boolean denoting whether the input is for training.
-    dtype: Data type to use for input images.
-
-  Returns:
-    Tuple with processed image tensor and one-hot-encoded label tensor.
-  """
-  image, label = cifar_main.parse_record(raw_record, is_training, dtype)
-  label = tf.sparse_to_dense(label, (cifar_main.NUM_CLASSES,), 1)
-  return image, label
-
-
-def run(flags_obj):
-  """Run ResNet Cifar-10 training and eval loop using native Keras APIs.
-
-  Args:
-    flags_obj: An object containing parsed flag values.
-
-  Raises:
-    ValueError: If fp16 is passed as it is not currently supported.
-
-  Returns:
-    Dictionary of training and eval stats.
-  """
-  if flags_obj.enable_eager:
-    tf.enable_eager_execution()
-
-  dtype = flags_core.get_tf_dtype(flags_obj)
-  if dtype == 'fp16':
-    raise ValueError('dtype fp16 is not supported in Keras. Use the default '
-                     'value(fp32).')
-
-  data_format = flags_obj.data_format
-  if data_format is None:
-    data_format = ('channels_first'
-                   if tf.test.is_built_with_cuda() else 'channels_last')
-  tf.keras.backend.set_image_data_format(data_format)
-
-  if flags_obj.use_synthetic_data:
-    input_fn = keras_common.get_synth_input_fn(
-        height=cifar_main.HEIGHT,
-        width=cifar_main.WIDTH,
-        num_channels=cifar_main.NUM_CHANNELS,
-        num_classes=cifar_main.NUM_CLASSES,
-        dtype=flags_core.get_tf_dtype(flags_obj))
-  else:
-    input_fn = cifar_main.input_fn
-
-  train_input_dataset = input_fn(
-      is_training=True,
-      data_dir=flags_obj.data_dir,
-      batch_size=flags_obj.batch_size,
-      num_epochs=flags_obj.train_epochs,
-      parse_record_fn=parse_record_keras)
-
-  eval_input_dataset = input_fn(
-      is_training=False,
-      data_dir=flags_obj.data_dir,
-      batch_size=flags_obj.batch_size,
-      num_epochs=flags_obj.train_epochs,
-      parse_record_fn=parse_record_keras)
-
-  strategy = distribution_utils.get_distribution_strategy(
-      num_gpus=flags_obj.num_gpus,
-      turn_off_distribution_strategy=flags_obj.turn_off_distribution_strategy)
-
-  strategy_scope = keras_common.get_strategy_scope(strategy)
-
-  with strategy_scope:
-    optimizer = keras_common.get_optimizer()
-    model = resnet_cifar_model.resnet56(classes=cifar_main.NUM_CLASSES)
-
-    model.compile(loss='categorical_crossentropy',
-                  optimizer=optimizer,
-                  metrics=['categorical_accuracy'])
-
-  time_callback, tensorboard_callback, lr_callback = keras_common.get_callbacks(
-      learning_rate_schedule, cifar_main.NUM_IMAGES['train'])
-
-  train_steps = cifar_main.NUM_IMAGES['train'] // flags_obj.batch_size
-  train_epochs = flags_obj.train_epochs
-
-  if flags_obj.train_steps:
-    train_steps = min(flags_obj.train_steps, train_steps)
-    train_epochs = 1
-
-  num_eval_steps = (cifar_main.NUM_IMAGES['validation'] //
-                    flags_obj.batch_size)
-
-  validation_data = eval_input_dataset
-  if flags_obj.skip_eval:
-    tf.keras.backend.set_learning_phase(1)
-    num_eval_steps = None
-    validation_data = None
-
-  history = model.fit(train_input_dataset,
-                      epochs=train_epochs,
-                      steps_per_epoch=train_steps,
-                      callbacks=[
-                          time_callback,
-                          lr_callback,
-                          tensorboard_callback
-                      ],
-                      validation_steps=num_eval_steps,
-                      validation_data=validation_data,
-                      verbose=2)
-  eval_output = None
-  if not flags_obj.skip_eval:
-    eval_output = model.evaluate(eval_input_dataset,
-                                 steps=num_eval_steps,
-                                 verbose=1)
-  stats = keras_common.build_stats(history, eval_output, time_callback)
-  return stats
-
-
-def main(_):
-  with logger.benchmark_context(flags.FLAGS):
-    return run(flags.FLAGS)
-
-
-if __name__ == '__main__':
-  tf.logging.set_verbosity(tf.logging.INFO)
-  cifar_main.define_cifar_flags()
-  keras_common.define_keras_flags()
-  absl_app.run(main)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_common.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_common.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Common util functions and classes used by both keras cifar and imagenet."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import time
-
-import numpy as np
-
-# pylint: disable=g-bad-import-order
-from absl import flags
-import tensorflow as tf
-from tensorflow.python.keras.optimizer_v2 import (gradient_descent as
-                                                  gradient_descent_v2)
-
-FLAGS = flags.FLAGS
-BASE_LEARNING_RATE = 0.1  # This matches Jing's version.
-TRAIN_TOP_1 = 'training_accuracy_top_1'
-
-
-class BatchTimestamp(object):
-  """A structure to store batch time stamp."""
-
-  def __init__(self, batch_index, timestamp):
-    self.batch_index = batch_index
-    self.timestamp = timestamp
-
-
-class TimeHistory(tf.keras.callbacks.Callback):
-  """Callback for Keras models."""
-
-  def __init__(self, batch_size, log_steps):
-    """Callback for logging performance (# image/second).
-
-    Args:
-      batch_size: Total batch size.
-
-    """
-    self.batch_size = batch_size
-    super(TimeHistory, self).__init__()
-    self.log_steps = log_steps
-
-    # Logs start of step 0 then end of each step based on log_steps interval.
-    self.timestamp_log = []
-
-  def on_train_begin(self, logs=None):
-    self.record_batch = True
-
-  def on_train_end(self, logs=None):
-    self.train_finish_time = time.time()
-
-  def on_batch_begin(self, batch, logs=None):
-    if self.record_batch:
-      timestamp = time.time()
-      self.start_time = timestamp
-      self.record_batch = False
-      if batch == 0:
-        self.timestamp_log.append(BatchTimestamp(batch, timestamp))
-
-  def on_batch_end(self, batch, logs=None):
-    if batch % self.log_steps == 0:
-      timestamp = time.time()
-      elapsed_time = timestamp - self.start_time
-      examples_per_second = (self.batch_size * self.log_steps) / elapsed_time
-      if batch != 0:
-        self.record_batch = True
-        self.timestamp_log.append(BatchTimestamp(batch, timestamp))
-        tf.logging.info("BenchmarkMetric: {'num_batches':%d, 'time_taken': %f,"
-                        "'images_per_second': %f}" %
-                        (batch, elapsed_time, examples_per_second))
-
-
-class LearningRateBatchScheduler(tf.keras.callbacks.Callback):
-  """Callback to update learning rate on every batch (not epoch boundaries).
-
-  N.B. Only support Keras optimizers, not TF optimizers.
-
-  Args:
-      schedule: a function that takes an epoch index and a batch index as input
-          (both integer, indexed from 0) and returns a new learning rate as
-          output (float).
-  """
-
-  def __init__(self, schedule, batch_size, num_images):
-    super(LearningRateBatchScheduler, self).__init__()
-    self.schedule = schedule
-    self.batches_per_epoch = num_images / batch_size
-    self.batch_size = batch_size
-    self.epochs = -1
-    self.prev_lr = -1
-
-  def on_epoch_begin(self, epoch, logs=None):
-    if not hasattr(self.model.optimizer, 'learning_rate'):
-      raise ValueError('Optimizer must have a "learning_rate" attribute.')
-    self.epochs += 1
-
-  def on_batch_begin(self, batch, logs=None):
-    """Executes before step begins."""
-    lr = self.schedule(self.epochs,
-                       batch,
-                       self.batches_per_epoch,
-                       self.batch_size)
-    if not isinstance(lr, (float, np.float32, np.float64)):
-      raise ValueError('The output of the "schedule" function should be float.')
-    if lr != self.prev_lr:
-      self.model.optimizer.learning_rate = lr  # lr should be a float here
-      self.prev_lr = lr
-      tf.logging.debug('Epoch %05d Batch %05d: LearningRateBatchScheduler '
-                       'change learning rate to %s.', self.epochs, batch, lr)
-
-
-def get_optimizer():
-  """Returns optimizer to use."""
-  # The learning_rate is overwritten at the beginning of each step by callback.
-  return gradient_descent_v2.SGD(learning_rate=0.1, momentum=0.9)
-
-
-def get_callbacks(learning_rate_schedule_fn, num_images):
-  """Returns common callbacks."""
-  time_callback = TimeHistory(FLAGS.batch_size, FLAGS.log_steps)
-
-  tensorboard_callback = tf.keras.callbacks.TensorBoard(
-      log_dir=FLAGS.model_dir)
-
-  lr_callback = LearningRateBatchScheduler(
-      learning_rate_schedule_fn,
-      batch_size=FLAGS.batch_size,
-      num_images=num_images)
-
-  return time_callback, tensorboard_callback, lr_callback
-
-
-def build_stats(history, eval_output, time_callback):
-  """Normalizes and returns dictionary of stats.
-
-  Args:
-    history: Results of the training step. Supports both categorical_accuracy
-      and sparse_categorical_accuracy.
-    eval_output: Output of the eval step. Assumes first value is eval_loss and
-      second value is accuracy_top_1.
-    time_callback: Time tracking callback likely used during keras.fit.
-
-  Returns:
-    Dictionary of normalized results.
-  """
-  stats = {}
-  if eval_output:
-    stats['accuracy_top_1'] = eval_output[1].item()
-    stats['eval_loss'] = eval_output[0].item()
-
-  if history and history.history:
-    train_hist = history.history
-    # Gets final loss from training.
-    stats['loss'] = train_hist['loss'][-1].item()
-    # Gets top_1 training accuracy.
-    if 'categorical_accuracy' in train_hist:
-      stats[TRAIN_TOP_1] = train_hist['categorical_accuracy'][-1].item()
-    elif 'sparse_categorical_accuracy' in train_hist:
-      stats[TRAIN_TOP_1] = train_hist['sparse_categorical_accuracy'][-1].item()
-
-  if time_callback:
-    timestamp_log = time_callback.timestamp_log
-    stats['step_timestamp_log'] = timestamp_log
-    stats['train_finish_time'] = time_callback.train_finish_time
-    if len(timestamp_log) > 1:
-      stats['avg_exp_per_second'] = (
-          time_callback.batch_size * time_callback.log_steps *
-          (len(time_callback.timestamp_log)-1) /
-          (timestamp_log[-1].timestamp - timestamp_log[0].timestamp))
-
-  return stats
-
-
-def define_keras_flags():
-  flags.DEFINE_boolean(name='enable_eager', default=False, help='Enable eager?')
-  flags.DEFINE_boolean(name='skip_eval', default=False, help='Skip evaluation?')
-  flags.DEFINE_integer(
-      name='train_steps', default=None,
-      help='The number of steps to run for training. If it is larger than '
-      '# batches per epoch, then use # batches per epoch. When this flag is '
-      'set, only one epoch is going to run for training.')
-  flags.DEFINE_integer(
-      name='log_steps', default=100,
-      help='For every log_steps, we log the timing information such as '
-      'examples per second. Besides, for every log_steps, we store the '
-      'timestamp of a batch end.')
-
-
-def get_synth_input_fn(height, width, num_channels, num_classes,
-                       dtype=tf.float32):
-  """Returns an input function that returns a dataset with random data.
-
-  This input_fn returns a data set that iterates over a set of random data and
-  bypasses all preprocessing, e.g. jpeg decode and copy. The host to device
-  copy is still included. This used to find the upper throughput bound when
-  tuning the full input pipeline.
-
-  Args:
-    height: Integer height that will be used to create a fake image tensor.
-    width: Integer width that will be used to create a fake image tensor.
-    num_channels: Integer depth that will be used to create a fake image tensor.
-    num_classes: Number of classes that should be represented in the fake labels
-      tensor
-    dtype: Data type for features/images.
-
-  Returns:
-    An input_fn that can be used in place of a real one to return a dataset
-    that can be used for iteration.
-  """
-  # pylint: disable=unused-argument
-  def input_fn(is_training, data_dir, batch_size, *args, **kwargs):
-    """Returns dataset filled with random data."""
-    # Synthetic input should be within [0, 255].
-    inputs = tf.truncated_normal(
-        [height, width, num_channels],
-        dtype=dtype,
-        mean=127,
-        stddev=60,
-        name='synthetic_inputs')
-
-    labels = tf.random_uniform(
-        [1],
-        minval=0,
-        maxval=num_classes - 1,
-        dtype=tf.int32,
-        name='synthetic_labels')
-    data = tf.data.Dataset.from_tensors((inputs, labels)).repeat()
-    data = data.batch(batch_size)
-    data = data.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
-    return data
-
-  return input_fn
-
-
-def get_strategy_scope(strategy):
-  if strategy:
-    strategy_scope = strategy.scope()
-  else:
-    strategy_scope = DummyContextManager()
-
-  return strategy_scope
-
-
-class DummyContextManager(object):
-
-  def __enter__(self):
-    pass
-
-  def __exit__(self, *args):
-    pass
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_common_test.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_common_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests for the keras_common module."""
-from __future__ import absolute_import
-from __future__ import print_function
-
-from mock import Mock
-import numpy as np
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-
-from official.resnet.keras import keras_common
-
-tf.logging.set_verbosity(tf.logging.ERROR)
-
-
-class KerasCommonTests(tf.test.TestCase):
-  """Tests for keras_common."""
-
-  @classmethod
-  def setUpClass(cls):  # pylint: disable=invalid-name
-    super(KerasCommonTests, cls).setUpClass()
-
-  def test_build_stats(self):
-
-    history = self._build_history(1.145, cat_accuracy=.99988)
-    eval_output = self._build_eval_output(.56432111, 5.990)
-    th = keras_common.TimeHistory(128, 100)
-
-    th.batch_start_timestamps = [1, 2, 3]
-    th.batch_end_timestamps = [4, 5, 6]
-    th.train_finish_time = 12345
-    stats = keras_common.build_stats(history, eval_output, th)
-
-    self.assertEqual(1.145, stats['loss'])
-    self.assertEqual(.99988, stats['training_accuracy_top_1'])
-
-    self.assertEqual(.56432111, stats['accuracy_top_1'])
-    self.assertEqual(5.990, stats['eval_loss'])
-
-    self.assertItemsEqual([1, 2, 3], stats['batch_start_timestamps'])
-    self.assertItemsEqual([4, 5, 6], stats['batch_end_timestamps'])
-    self.assertEqual(12345, stats['train_finish_time'])
-
-  def test_build_stats_sparse(self):
-
-    history = self._build_history(1.145, cat_accuracy_sparse=.99988)
-    eval_output = self._build_eval_output(.928, 1.9844)
-    stats = keras_common.build_stats(history, eval_output, None)
-
-    self.assertEqual(1.145, stats['loss'])
-    self.assertEqual(.99988, stats['training_accuracy_top_1'])
-
-    self.assertEqual(.928, stats['accuracy_top_1'])
-    self.assertEqual(1.9844, stats['eval_loss'])
-
-  def test_time_history(self):
-    th = keras_common.TimeHistory(batch_size=128, log_steps=3)
-
-    th.on_train_begin()
-    th.on_batch_begin(0)
-    th.on_batch_end(0)
-    th.on_batch_begin(1)
-    th.on_batch_end(1)
-    th.on_batch_begin(2)
-    th.on_batch_end(2)
-    th.on_batch_begin(3)
-    th.on_batch_end(3)
-    th.on_batch_begin(4)
-    th.on_batch_end(4)
-    th.on_batch_begin(5)
-    th.on_batch_end(5)
-    th.on_batch_begin(6)
-    th.on_batch_end(6)
-    th.on_train_end()
-
-    self.assertEqual(3, len(th.batch_start_timestamps))
-    self.assertEqual(2, len(th.batch_end_timestamps))
-
-    self.assertEqual(0, th.batch_start_timestamps[0].batch_index)
-    self.assertEqual(1, th.batch_start_timestamps[1].batch_index)
-    self.assertEqual(4, th.batch_start_timestamps[2].batch_index)
-
-    self.assertEqual(3, th.batch_end_timestamps[0].batch_index)
-    self.assertEqual(6, th.batch_end_timestamps[1].batch_index)
-
-  def _build_history(self, loss, cat_accuracy=None,
-                     cat_accuracy_sparse=None):
-    history_p = Mock()
-    history = {}
-    history_p.history = history
-    history['loss'] = [np.float64(loss)]
-    if cat_accuracy:
-      history['categorical_accuracy'] = [np.float64(cat_accuracy)]
-    if cat_accuracy_sparse:
-      history['sparse_categorical_accuracy'] = [np.float64(cat_accuracy_sparse)]
-
-    return history_p
-
-  def _build_eval_output(self, top_1, eval_loss):
-    eval_output = [np.float64(eval_loss), np.float64(top_1)]
-    return eval_output
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_imagenet_benchmark.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_imagenet_benchmark.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Executes Keras benchmarks and accuracy tests."""
-from __future__ import print_function
-
-import os
-import time
-
-from absl import flags
-
-from official.resnet import imagenet_main
-from official.resnet.keras import keras_benchmark
-from official.resnet.keras import keras_common
-from official.resnet.keras import keras_imagenet_main
-
-MIN_TOP_1_ACCURACY = 0.76
-MAX_TOP_1_ACCURACY = 0.77
-DATA_DIR = '/data/imagenet/'
-
-FLAGS = flags.FLAGS
-
-
-class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
-  """Benchmark accuracy tests for ResNet50 in Keras."""
-
-  def __init__(self, output_dir=None):
-    flag_methods = [
-        keras_common.define_keras_flags, imagenet_main.define_imagenet_flags
-    ]
-
-    super(Resnet50KerasAccuracy, self).__init__(
-        output_dir=output_dir, flag_methods=flag_methods)
-
-  def benchmark_graph_8_gpu(self):
-    """Test Keras model with Keras fit/dist_strat and 8 GPUs."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.data_dir = DATA_DIR
-    FLAGS.batch_size = 128 * 8
-    FLAGS.train_epochs = 90
-    FLAGS.model_dir = self._get_model_dir('keras_resnet50_8_gpu')
-    FLAGS.dtype = 'fp32'
-    self._run_and_report_benchmark()
-
-  def benchmark_8_gpu(self):
-    """Test Keras model with eager, dist_strat and 8 GPUs."""
-    self._setup()
-    FLAGS.num_gpus = 8
-    FLAGS.data_dir = DATA_DIR
-    FLAGS.batch_size = 128 * 8
-    FLAGS.train_epochs = 90
-    FLAGS.model_dir = self._get_model_dir('keras_resnet50_eager_8_gpu')
-    FLAGS.dtype = 'fp32'
-    FLAGS.enable_eager = True
-    self._run_and_report_benchmark()
-
-  def _run_and_report_benchmark(self):
-    start_time_sec = time.time()
-    stats = keras_imagenet_main.run(flags.FLAGS)
-    wall_time_sec = time.time() - start_time_sec
-
-    super(Resnet50KerasAccuracy, self)._report_benchmark(
-        stats,
-        wall_time_sec,
-        top_1_min=MIN_TOP_1_ACCURACY,
-        top_1_max=MAX_TOP_1_ACCURACY,
-        total_batch_size=FLAGS.batch_size,
-        log_steps=100)
-
-  def _get_model_dir(self, folder_name):
-    return os.path.join(self.output_dir, folder_name)
-
-
-class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
-  """Resnet50 benchmarks."""
-
-  def __init__(self, output_dir=None, default_flags=None):
-    flag_methods = [
-        keras_common.define_keras_flags, imagenet_main.define_imagenet_flags
-    ]
-
-    super(Resnet50KerasBenchmarkBase, self).__init__(
-        output_dir=output_dir,
-        flag_methods=flag_methods,
-        default_flags=default_flags)
-
-  def _run_and_report_benchmark(self):
-    start_time_sec = time.time()
-    stats = keras_imagenet_main.run(FLAGS)
-    wall_time_sec = time.time() - start_time_sec
-
-    super(Resnet50KerasBenchmarkBase, self)._report_benchmark(
-        stats,
-        wall_time_sec,
-        total_batch_size=FLAGS.batch_size,
-        log_steps=FLAGS.log_steps)
-
-  def benchmark_1_gpu_no_dist_strat(self):
-    self._setup()
-
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.turn_off_distribution_strategy = True
-    FLAGS.batch_size = 128
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_1_gpu_no_dist_strat(self):
-    self._setup()
-
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = False
-    FLAGS.turn_off_distribution_strategy = True
-    FLAGS.batch_size = 128
-    self._run_and_report_benchmark()
-
-  def benchmark_1_gpu(self):
-    self._setup()
-
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = True
-    FLAGS.turn_off_distribution_strategy = False
-    FLAGS.batch_size = 128
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_1_gpu(self):
-    self._setup()
-
-    FLAGS.num_gpus = 1
-    FLAGS.enable_eager = False
-    FLAGS.turn_off_distribution_strategy = False
-    FLAGS.batch_size = 128
-    self._run_and_report_benchmark()
-
-  def benchmark_8_gpu(self):
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.enable_eager = True
-    FLAGS.turn_off_distribution_strategy = False
-    FLAGS.batch_size = 128 * 8  # 8 GPUs
-    self._run_and_report_benchmark()
-
-  def benchmark_graph_8_gpu(self):
-    self._setup()
-
-    FLAGS.num_gpus = 8
-    FLAGS.enable_eager = False
-    FLAGS.turn_off_distribution_strategy = False
-    FLAGS.batch_size = 128 * 8  # 8 GPUs
-    self._run_and_report_benchmark()
-
-  def fill_report_object(self, stats):
-    super(Resnet50KerasBenchmarkBase, self).fill_report_object(
-        stats,
-        total_batch_size=FLAGS.batch_size,
-        log_steps=FLAGS.log_steps)
-
-
-class Resnet50KerasBenchmarkSynth(Resnet50KerasBenchmarkBase):
-  """Resnet50 synthetic benchmark tests."""
-
-  def __init__(self, output_dir=None):
-    def_flags = {}
-    def_flags['skip_eval'] = True
-    def_flags['use_synthetic_data'] = True
-    def_flags['train_steps'] = 110
-    def_flags['log_steps'] = 10
-
-    super(Resnet50KerasBenchmarkSynth, self).__init__(
-        output_dir=output_dir, default_flags=def_flags)
-
-
-class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
-  """Resnet50 real data benchmark tests."""
-
-  def __init__(self, output_dir=None):
-    def_flags = {}
-    def_flags['skip_eval'] = True
-    def_flags['data_dir'] = DATA_DIR
-    def_flags['train_steps'] = 110
-    def_flags['log_steps'] = 10
-
-    super(Resnet50KerasBenchmarkReal, self).__init__(
-        output_dir=output_dir, default_flags=def_flags)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_imagenet_main.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/keras_imagenet_main.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Runs a ResNet model on the ImageNet dataset."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl import app as absl_app
-from absl import flags
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-
-from official.resnet import imagenet_main
-from official.resnet.keras import keras_common
-from official.resnet.keras import resnet_model
-from official.utils.flags import core as flags_core
-from official.utils.logs import logger
-from official.utils.misc import distribution_utils
-
-
-LR_SCHEDULE = [    # (multiplier, epoch to start) tuples
-    (1.0, 5), (0.1, 30), (0.01, 60), (0.001, 80)
-]
-
-
-def learning_rate_schedule(current_epoch,
-                           current_batch,
-                           batches_per_epoch,
-                           batch_size):
-  """Handles linear scaling rule, gradual warmup, and LR decay.
-
-  Scale learning rate at epoch boundaries provided in LR_SCHEDULE by the
-  provided scaling factor.
-
-  Args:
-    current_epoch: integer, current epoch indexed from 0.
-    current_batch: integer, current batch in the current epoch, indexed from 0.
-    batches_per_epoch: integer, number of steps in an epoch.
-    batch_size: integer, total batch sized.
-
-  Returns:
-    Adjusted learning rate.
-  """
-  initial_lr = keras_common.BASE_LEARNING_RATE * batch_size / 256
-  epoch = current_epoch + float(current_batch) / batches_per_epoch
-  warmup_lr_multiplier, warmup_end_epoch = LR_SCHEDULE[0]
-  if epoch < warmup_end_epoch:
-    # Learning rate increases linearly per step.
-    return initial_lr * warmup_lr_multiplier * epoch / warmup_end_epoch
-  for mult, start_epoch in LR_SCHEDULE:
-    if epoch >= start_epoch:
-      learning_rate = initial_lr * mult
-    else:
-      break
-  return learning_rate
-
-
-def parse_record_keras(raw_record, is_training, dtype):
-  """Adjust the shape of label."""
-  image, label = imagenet_main.parse_record(raw_record, is_training, dtype)
-
-  # Subtract one so that labels are in [0, 1000), and cast to float32 for
-  # Keras model.
-  label = tf.cast(tf.cast(tf.reshape(label, shape=[1]), dtype=tf.int32) - 1,
-                  dtype=tf.float32)
-  return image, label
-
-
-def run(flags_obj):
-  """Run ResNet ImageNet training and eval loop using native Keras APIs.
-
-  Args:
-    flags_obj: An object containing parsed flag values.
-
-  Raises:
-    ValueError: If fp16 is passed as it is not currently supported.
-  """
-  if flags_obj.enable_eager:
-    tf.enable_eager_execution()
-
-  dtype = flags_core.get_tf_dtype(flags_obj)
-  if dtype == 'fp16':
-    raise ValueError('dtype fp16 is not supported in Keras. Use the default '
-                     'value(fp32).')
-
-  data_format = flags_obj.data_format
-  if data_format is None:
-    data_format = ('channels_first'
-                   if tf.test.is_built_with_cuda() else 'channels_last')
-  tf.keras.backend.set_image_data_format(data_format)
-
-  # pylint: disable=protected-access
-  if flags_obj.use_synthetic_data:
-    input_fn = keras_common.get_synth_input_fn(
-        height=imagenet_main.DEFAULT_IMAGE_SIZE,
-        width=imagenet_main.DEFAULT_IMAGE_SIZE,
-        num_channels=imagenet_main.NUM_CHANNELS,
-        num_classes=imagenet_main.NUM_CLASSES,
-        dtype=flags_core.get_tf_dtype(flags_obj))
-  else:
-    input_fn = imagenet_main.input_fn
-
-  train_input_dataset = input_fn(is_training=True,
-                                 data_dir=flags_obj.data_dir,
-                                 batch_size=flags_obj.batch_size,
-                                 num_epochs=flags_obj.train_epochs,
-                                 parse_record_fn=parse_record_keras)
-
-  eval_input_dataset = input_fn(is_training=False,
-                                data_dir=flags_obj.data_dir,
-                                batch_size=flags_obj.batch_size,
-                                num_epochs=flags_obj.train_epochs,
-                                parse_record_fn=parse_record_keras)
-
-  strategy = distribution_utils.get_distribution_strategy(
-      num_gpus=flags_obj.num_gpus,
-      turn_off_distribution_strategy=flags_obj.turn_off_distribution_strategy)
-
-  strategy_scope = keras_common.get_strategy_scope(strategy)
-
-  with strategy_scope:
-    optimizer = keras_common.get_optimizer()
-    model = resnet_model.resnet50(num_classes=imagenet_main.NUM_CLASSES)
-
-    model.compile(loss='sparse_categorical_crossentropy',
-                  optimizer=optimizer,
-                  metrics=['sparse_categorical_accuracy'])
-
-  time_callback, tensorboard_callback, lr_callback = keras_common.get_callbacks(
-      learning_rate_schedule, imagenet_main.NUM_IMAGES['train'])
-
-  train_steps = imagenet_main.NUM_IMAGES['train'] // flags_obj.batch_size
-  train_epochs = flags_obj.train_epochs
-
-  if flags_obj.train_steps:
-    train_steps = min(flags_obj.train_steps, train_steps)
-    train_epochs = 1
-
-  num_eval_steps = (imagenet_main.NUM_IMAGES['validation'] //
-                    flags_obj.batch_size)
-
-  validation_data = eval_input_dataset
-  if flags_obj.skip_eval:
-    # Only build the training graph. This reduces memory usage introduced by
-    # control flow ops in layers that have different implementations for
-    # training and inference (e.g., batch norm).
-    tf.keras.backend.set_learning_phase(1)
-    num_eval_steps = None
-    validation_data = None
-
-  history = model.fit(train_input_dataset,
-                      epochs=train_epochs,
-                      steps_per_epoch=train_steps,
-                      callbacks=[
-                          time_callback,
-                          lr_callback,
-                          tensorboard_callback
-                      ],
-                      validation_steps=num_eval_steps,
-                      validation_data=validation_data,
-                      verbose=2)
-
-  eval_output = None
-  if not flags_obj.skip_eval:
-    eval_output = model.evaluate(eval_input_dataset,
-                                 steps=num_eval_steps,
-                                 verbose=1)
-  stats = keras_common.build_stats(history, eval_output, time_callback)
-  return stats
-
-
-def main(_):
-  with logger.benchmark_context(flags.FLAGS):
-    return run(flags.FLAGS)
-
-
-if __name__ == '__main__':
-  tf.logging.set_verbosity(tf.logging.INFO)
-  imagenet_main.define_imagenet_flags()
-  keras_common.define_keras_flags()
-  absl_app.run(main)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/resnet_cifar_model.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/resnet_cifar_model.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""ResNet56 model for Keras adapted from tf.keras.applications.ResNet50.
-
-# Reference:
- [Deep Residual Learning for Image Recognition](
-    https://arxiv.org/abs/1512.03385)
-Adapted from code contributed by BigMoyan.
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-from tensorflow.python.keras import backend
-from tensorflow.python.keras import layers
-
-
-BATCH_NORM_DECAY = 0.997
-BATCH_NORM_EPSILON = 1e-5
-L2_WEIGHT_DECAY = 2e-4
-
-
-def identity_building_block(input_tensor,
-                            kernel_size,
-                            filters,
-                            stage,
-                            block,
-                            training=None):
-  """The identity block is the block that has no conv layer at shortcut.
-
-  Arguments:
-    input_tensor: input tensor
-    kernel_size: default 3, the kernel size of
-        middle conv layer at main path
-    filters: list of integers, the filters of 3 conv layer at main path
-    stage: integer, current stage label, used for generating layer names
-    block: 'a','b'..., current block label, used for generating layer names
-    training: Only used if training keras model with Estimator.  In other
-      scenarios it is handled automatically.
-
-  Returns:
-    Output tensor for the block.
-  """
-  filters1, filters2 = filters
-  if tf.keras.backend.image_data_format() == 'channels_last':
-    bn_axis = 3
-  else:
-    bn_axis = 1
-  conv_name_base = 'res' + str(stage) + block + '_branch'
-  bn_name_base = 'bn' + str(stage) + block + '_branch'
-
-  x = tf.keras.layers.Conv2D(filters1, kernel_size,
-                             padding='same',
-                             kernel_initializer='he_normal',
-                             kernel_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             bias_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             name=conv_name_base + '2a')(input_tensor)
-  x = tf.keras.layers.BatchNormalization(axis=bn_axis,
-                                         name=bn_name_base + '2a',
-                                         momentum=BATCH_NORM_DECAY,
-                                         epsilon=BATCH_NORM_EPSILON)(
-                                             x, training=training)
-  x = tf.keras.layers.Activation('relu')(x)
-
-  x = tf.keras.layers.Conv2D(filters2, kernel_size,
-                             padding='same',
-                             kernel_initializer='he_normal',
-                             kernel_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             bias_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             name=conv_name_base + '2b')(x)
-  x = tf.keras.layers.BatchNormalization(axis=bn_axis,
-                                         name=bn_name_base + '2b',
-                                         momentum=BATCH_NORM_DECAY,
-                                         epsilon=BATCH_NORM_EPSILON)(
-                                             x, training=training)
-
-  x = tf.keras.layers.add([x, input_tensor])
-  x = tf.keras.layers.Activation('relu')(x)
-  return x
-
-
-def conv_building_block(input_tensor,
-                        kernel_size,
-                        filters,
-                        stage,
-                        block,
-                        strides=(2, 2),
-                        training=None):
-  """A block that has a conv layer at shortcut.
-
-  Arguments:
-    input_tensor: input tensor
-    kernel_size: default 3, the kernel size of
-        middle conv layer at main path
-    filters: list of integers, the filters of 3 conv layer at main path
-    stage: integer, current stage label, used for generating layer names
-    block: 'a','b'..., current block label, used for generating layer names
-    strides: Strides for the first conv layer in the block.
-    training: Only used if training keras model with Estimator.  In other
-      scenarios it is handled automatically.
-
-  Returns:
-    Output tensor for the block.
-
-  Note that from stage 3,
-  the first conv layer at main path is with strides=(2, 2)
-  And the shortcut should have strides=(2, 2) as well
-  """
-  filters1, filters2 = filters
-  if tf.keras.backend.image_data_format() == 'channels_last':
-    bn_axis = 3
-  else:
-    bn_axis = 1
-  conv_name_base = 'res' + str(stage) + block + '_branch'
-  bn_name_base = 'bn' + str(stage) + block + '_branch'
-
-  x = tf.keras.layers.Conv2D(filters1, kernel_size, strides=strides,
-                             padding='same',
-                             kernel_initializer='he_normal',
-                             kernel_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             bias_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             name=conv_name_base + '2a')(input_tensor)
-  x = tf.keras.layers.BatchNormalization(axis=bn_axis,
-                                         name=bn_name_base + '2a',
-                                         momentum=BATCH_NORM_DECAY,
-                                         epsilon=BATCH_NORM_EPSILON)(
-                                             x, training=training)
-  x = tf.keras.layers.Activation('relu')(x)
-
-  x = tf.keras.layers.Conv2D(filters2, kernel_size, padding='same',
-                             kernel_initializer='he_normal',
-                             kernel_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             bias_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             name=conv_name_base + '2b')(x)
-  x = tf.keras.layers.BatchNormalization(axis=bn_axis,
-                                         name=bn_name_base + '2b',
-                                         momentum=BATCH_NORM_DECAY,
-                                         epsilon=BATCH_NORM_EPSILON)(
-                                             x, training=training)
-
-  shortcut = tf.keras.layers.Conv2D(filters2, (1, 1), strides=strides,
-                                    kernel_initializer='he_normal',
-                                    kernel_regularizer=
-                                    tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                                    bias_regularizer=
-                                    tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                                    name=conv_name_base + '1')(input_tensor)
-  shortcut = tf.keras.layers.BatchNormalization(
-      axis=bn_axis, name=bn_name_base + '1',
-      momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON)(
-          shortcut, training=training)
-
-  x = tf.keras.layers.add([x, shortcut])
-  x = tf.keras.layers.Activation('relu')(x)
-  return x
-
-
-def resnet56(classes=100, training=None):
-  """Instantiates the ResNet56 architecture.
-
-  Arguments:
-    classes: optional number of classes to classify images into
-    training: Only used if training keras model with Estimator.  In other
-    scenarios it is handled automatically.
-
-  Returns:
-    A Keras model instance.
-  """
-  input_shape = (32, 32, 3)
-  img_input = layers.Input(shape=input_shape)
-
-  if backend.image_data_format() == 'channels_first':
-    x = layers.Lambda(lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
-                      name='transpose')(img_input)
-    bn_axis = 1
-  else:  # channel_last
-    x = img_input
-    bn_axis = 3
-
-  x = tf.keras.layers.ZeroPadding2D(padding=(1, 1), name='conv1_pad')(x)
-  x = tf.keras.layers.Conv2D(16, (3, 3),
-                             strides=(1, 1),
-                             padding='valid',
-                             kernel_initializer='he_normal',
-                             kernel_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             bias_regularizer=
-                             tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                             name='conv1')(x)
-  x = tf.keras.layers.BatchNormalization(axis=bn_axis, name='bn_conv1',
-                                         momentum=BATCH_NORM_DECAY,
-                                         epsilon=BATCH_NORM_EPSILON)(
-                                             x, training=training)
-  x = tf.keras.layers.Activation('relu')(x)
-
-  x = conv_building_block(x, 3, [16, 16], stage=2, block='a', strides=(1, 1),
-                          training=training)
-  x = identity_building_block(x, 3, [16, 16], stage=2, block='b',
-                              training=training)
-  x = identity_building_block(x, 3, [16, 16], stage=2, block='c',
-                              training=training)
-  x = identity_building_block(x, 3, [16, 16], stage=2, block='d',
-                              training=training)
-  x = identity_building_block(x, 3, [16, 16], stage=2, block='e',
-                              training=training)
-  x = identity_building_block(x, 3, [16, 16], stage=2, block='f',
-                              training=training)
-  x = identity_building_block(x, 3, [16, 16], stage=2, block='g',
-                              training=training)
-  x = identity_building_block(x, 3, [16, 16], stage=2, block='h',
-                              training=training)
-  x = identity_building_block(x, 3, [16, 16], stage=2, block='i',
-                              training=training)
-
-  x = conv_building_block(x, 3, [32, 32], stage=3, block='a',
-                          training=training)
-  x = identity_building_block(x, 3, [32, 32], stage=3, block='b',
-                              training=training)
-  x = identity_building_block(x, 3, [32, 32], stage=3, block='c',
-                              training=training)
-  x = identity_building_block(x, 3, [32, 32], stage=3, block='d',
-                              training=training)
-  x = identity_building_block(x, 3, [32, 32], stage=3, block='e',
-                              training=training)
-  x = identity_building_block(x, 3, [32, 32], stage=3, block='f',
-                              training=training)
-  x = identity_building_block(x, 3, [32, 32], stage=3, block='g',
-                              training=training)
-  x = identity_building_block(x, 3, [32, 32], stage=3, block='h',
-                              training=training)
-  x = identity_building_block(x, 3, [32, 32], stage=3, block='i',
-                              training=training)
-
-  x = conv_building_block(x, 3, [64, 64], stage=4, block='a',
-                          training=training)
-  x = identity_building_block(x, 3, [64, 64], stage=4, block='b',
-                              training=training)
-  x = identity_building_block(x, 3, [64, 64], stage=4, block='c',
-                              training=training)
-  x = identity_building_block(x, 3, [64, 64], stage=4, block='d',
-                              training=training)
-  x = identity_building_block(x, 3, [64, 64], stage=4, block='e',
-                              training=training)
-  x = identity_building_block(x, 3, [64, 64], stage=4, block='f',
-                              training=training)
-  x = identity_building_block(x, 3, [64, 64], stage=4, block='g',
-                              training=training)
-  x = identity_building_block(x, 3, [64, 64], stage=4, block='h',
-                              training=training)
-  x = identity_building_block(x, 3, [64, 64], stage=4, block='i',
-                              training=training)
-
-  x = tf.keras.layers.GlobalAveragePooling2D(name='avg_pool')(x)
-  x = tf.keras.layers.Dense(classes, activation='softmax',
-                            kernel_initializer='he_normal',
-                            kernel_regularizer=
-                            tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                            bias_regularizer=
-                            tf.keras.regularizers.l2(L2_WEIGHT_DECAY),
-                            name='fc10')(x)
-
-  inputs = img_input
-  # Create model.
-  model = tf.keras.models.Model(inputs, x, name='resnet56')
-
-  return model
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/resnet_model.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/keras/resnet_model.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""ResNet50 model for Keras.
-
-Adapted from tf.keras.applications.resnet50.ResNet50().
-This is ResNet model version 1.5.
-
-Related papers/blogs:
- https://arxiv.org/abs/1512.03385
- https://arxiv.org/pdf/1603.05027v2.pdf
- http://torch.ch/blog/2016/02/04/resnets.html
-
-"""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import warnings
-
-from tensorflow.python.keras import backend
-from tensorflow.python.keras import layers
-from tensorflow.python.keras import models
-from tensorflow.python.keras import regularizers
-from tensorflow.python.keras import utils
-
-
-L2_WEIGHT_DECAY = 1e-4
-BATCH_NORM_DECAY = 0.9
-BATCH_NORM_EPSILON = 1e-5
-
-
-def identity_block(input_tensor, kernel_size, filters, stage, block):
-  """The identity block is the block that has no conv layer at shortcut.
-
-  # Arguments
-      input_tensor: input tensor
-      kernel_size: default 3, the kernel size of
-          middle conv layer at main path
-      filters: list of integers, the filters of 3 conv layer at main path
-      stage: integer, current stage label, used for generating layer names
-      block: 'a','b'..., current block label, used for generating layer names
-
-  # Returns
-      Output tensor for the block.
-  """
-  filters1, filters2, filters3 = filters
-  if backend.image_data_format() == 'channels_last':
-    bn_axis = 3
-  else:
-    bn_axis = 1
-  conv_name_base = 'res' + str(stage) + block + '_branch'
-  bn_name_base = 'bn' + str(stage) + block + '_branch'
-
-  x = layers.Conv2D(filters1, (1, 1), use_bias=False,
-                    kernel_initializer='he_normal',
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                    name=conv_name_base + '2a')(input_tensor)
-  x = layers.BatchNormalization(axis=bn_axis,
-                                momentum=BATCH_NORM_DECAY,
-                                epsilon=BATCH_NORM_EPSILON,
-                                name=bn_name_base + '2a')(x)
-  x = layers.Activation('relu')(x)
-
-  x = layers.Conv2D(filters2, kernel_size,
-                    padding='same', use_bias=False,
-                    kernel_initializer='he_normal',
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                    name=conv_name_base + '2b')(x)
-  x = layers.BatchNormalization(axis=bn_axis,
-                                momentum=BATCH_NORM_DECAY,
-                                epsilon=BATCH_NORM_EPSILON,
-                                name=bn_name_base + '2b')(x)
-  x = layers.Activation('relu')(x)
-
-  x = layers.Conv2D(filters3, (1, 1), use_bias=False,
-                    kernel_initializer='he_normal',
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                    name=conv_name_base + '2c')(x)
-  x = layers.BatchNormalization(axis=bn_axis,
-                                momentum=BATCH_NORM_DECAY,
-                                epsilon=BATCH_NORM_EPSILON,
-                                name=bn_name_base + '2c')(x)
-
-  x = layers.add([x, input_tensor])
-  x = layers.Activation('relu')(x)
-  return x
-
-
-def conv_block(input_tensor,
-               kernel_size,
-               filters,
-               stage,
-               block,
-               strides=(2, 2)):
-  """A block that has a conv layer at shortcut.
-
-  # Arguments
-      input_tensor: input tensor
-      kernel_size: default 3, the kernel size of
-          middle conv layer at main path
-      filters: list of integers, the filters of 3 conv layer at main path
-      stage: integer, current stage label, used for generating layer names
-      block: 'a','b'..., current block label, used for generating layer names
-      strides: Strides for the second conv layer in the block.
-
-  # Returns
-      Output tensor for the block.
-
-  Note that from stage 3,
-  the second conv layer at main path is with strides=(2, 2)
-  And the shortcut should have strides=(2, 2) as well
-  """
-  filters1, filters2, filters3 = filters
-  if backend.image_data_format() == 'channels_last':
-    bn_axis = 3
-  else:
-    bn_axis = 1
-  conv_name_base = 'res' + str(stage) + block + '_branch'
-  bn_name_base = 'bn' + str(stage) + block + '_branch'
-
-  x = layers.Conv2D(filters1, (1, 1), use_bias=False,
-                    kernel_initializer='he_normal',
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                    name=conv_name_base + '2a')(input_tensor)
-  x = layers.BatchNormalization(axis=bn_axis,
-                                momentum=BATCH_NORM_DECAY,
-                                epsilon=BATCH_NORM_EPSILON,
-                                name=bn_name_base + '2a')(x)
-  x = layers.Activation('relu')(x)
-
-  x = layers.Conv2D(filters2, kernel_size, strides=strides, padding='same',
-                    use_bias=False, kernel_initializer='he_normal',
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                    name=conv_name_base + '2b')(x)
-  x = layers.BatchNormalization(axis=bn_axis,
-                                momentum=BATCH_NORM_DECAY,
-                                epsilon=BATCH_NORM_EPSILON,
-                                name=bn_name_base + '2b')(x)
-  x = layers.Activation('relu')(x)
-
-  x = layers.Conv2D(filters3, (1, 1), use_bias=False,
-                    kernel_initializer='he_normal',
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                    name=conv_name_base + '2c')(x)
-  x = layers.BatchNormalization(axis=bn_axis,
-                                momentum=BATCH_NORM_DECAY,
-                                epsilon=BATCH_NORM_EPSILON,
-                                name=bn_name_base + '2c')(x)
-
-  shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, use_bias=False,
-                           kernel_initializer='he_normal',
-                           kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                           name=conv_name_base + '1')(input_tensor)
-  shortcut = layers.BatchNormalization(axis=bn_axis,
-                                       momentum=BATCH_NORM_DECAY,
-                                       epsilon=BATCH_NORM_EPSILON,
-                                       name=bn_name_base + '1')(shortcut)
-
-  x = layers.add([x, shortcut])
-  x = layers.Activation('relu')(x)
-  return x
-
-
-def resnet50(num_classes):
-  # TODO(tfboyd): add training argument, just lik resnet56.
-  """Instantiates the ResNet50 architecture.
-
-  Args:
-    num_classes: `int` number of classes for image classification.
-
-  Returns:
-      A Keras model instance.
-  """
-  input_shape = (224, 224, 3)
-  img_input = layers.Input(shape=input_shape)
-
-  if backend.image_data_format() == 'channels_first':
-    x = layers.Lambda(lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
-                      name='transpose')(img_input)
-    bn_axis = 1
-  else:  # channels_last
-    x = img_input
-    bn_axis = 3
-
-  x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(x)
-  x = layers.Conv2D(64, (7, 7),
-                    strides=(2, 2),
-                    padding='valid', use_bias=False,
-                    kernel_initializer='he_normal',
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                    name='conv1')(x)
-  x = layers.BatchNormalization(axis=bn_axis,
-                                momentum=BATCH_NORM_DECAY,
-                                epsilon=BATCH_NORM_EPSILON,
-                                name='bn_conv1')(x)
-  x = layers.Activation('relu')(x)
-  x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x)
-  x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x)
-
-  x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1))
-  x = identity_block(x, 3, [64, 64, 256], stage=2, block='b')
-  x = identity_block(x, 3, [64, 64, 256], stage=2, block='c')
-
-  x = conv_block(x, 3, [128, 128, 512], stage=3, block='a')
-  x = identity_block(x, 3, [128, 128, 512], stage=3, block='b')
-  x = identity_block(x, 3, [128, 128, 512], stage=3, block='c')
-  x = identity_block(x, 3, [128, 128, 512], stage=3, block='d')
-
-  x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a')
-  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b')
-  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c')
-  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d')
-  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e')
-  x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f')
-
-  x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a')
-  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b')
-  x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c')
-
-  x = layers.GlobalAveragePooling2D(name='avg_pool')(x)
-  x = layers.Dense(
-      num_classes, activation='softmax',
-      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-      bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-      name='fc1000')(x)
-
-  # Create model.
-  return models.Model(img_input, x, name='resnet50')
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/layer_test.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/layer_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Test that the definitions of ResNet layers haven't changed.
-
-These tests will fail if either:
-  a)  The graph of a resnet layer changes and the change is significant enough
-      that it can no longer load existing checkpoints.
-  b)  The numerical results produced by the layer change.
-
-A warning will be issued if the graph changes, but the checkpoint still loads.
-
-In the event that a layer change is intended, or the TensorFlow implementation
-of a layer changes (and thus changes the graph), regenerate using the command:
-
-  $ python3 layer_test.py -regen
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-
-import tensorflow as tf   # pylint: disable=g-bad-import-order
-from official.resnet import resnet_model
-from official.utils.testing import reference_data
-
-
-DATA_FORMAT = "channels_last"  # CPU instructions often preclude channels_first
-BATCH_SIZE = 32
-BLOCK_TESTS = [
-    dict(bottleneck=True, projection=True, resnet_version=1, width=8,
-         channels=4),
-    dict(bottleneck=True, projection=True, resnet_version=2, width=8,
-         channels=4),
-    dict(bottleneck=True, projection=False, resnet_version=1, width=8,
-         channels=4),
-    dict(bottleneck=True, projection=False, resnet_version=2, width=8,
-         channels=4),
-    dict(bottleneck=False, projection=True, resnet_version=1, width=8,
-         channels=4),
-    dict(bottleneck=False, projection=True, resnet_version=2, width=8,
-         channels=4),
-    dict(bottleneck=False, projection=False, resnet_version=1, width=8,
-         channels=4),
-    dict(bottleneck=False, projection=False, resnet_version=2, width=8,
-         channels=4),
-]
-
-
-class BaseTest(reference_data.BaseTest):
-  """Tests for core ResNet layers."""
-
-  @property
-  def test_name(self):
-    return "resnet"
-
-  def _batch_norm_ops(self, test=False):
-    name = "batch_norm"
-
-    g = tf.Graph()
-    with g.as_default():
-      tf.set_random_seed(self.name_to_seed(name))
-      input_tensor = tf.get_variable(
-          "input_tensor", dtype=tf.float32,
-          initializer=tf.random_uniform((32, 16, 16, 3), maxval=1)
-      )
-      layer = resnet_model.batch_norm(
-          inputs=input_tensor, data_format=DATA_FORMAT, training=True)
-
-    self._save_or_test_ops(
-        name=name, graph=g, ops_to_eval=[input_tensor, layer], test=test,
-        correctness_function=self.default_correctness_function
-    )
-
-  def make_projection(self, filters_out, strides, data_format):
-    """1D convolution with stride projector.
-
-    Args:
-      filters_out: Number of filters in the projection.
-      strides: Stride length for convolution.
-      data_format: channels_first or channels_last
-
-    Returns:
-      A CNN projector function with kernel_size 1.
-    """
-    def projection_shortcut(inputs):
-      return resnet_model.conv2d_fixed_padding(
-          inputs=inputs, filters=filters_out, kernel_size=1, strides=strides,
-          data_format=data_format)
-    return projection_shortcut
-
-  def _resnet_block_ops(self, test, batch_size, bottleneck, projection,
-                        resnet_version, width, channels):
-    """Test whether resnet block construction has changed.
-
-    Args:
-      test: Whether or not to run as a test case.
-      batch_size: Number of points in the fake image. This is needed due to
-        batch normalization.
-      bottleneck: Whether or not to use bottleneck layers.
-      projection: Whether or not to project the input.
-      resnet_version: Which version of ResNet to test.
-      width: The width of the fake image.
-      channels: The number of channels in the fake image.
-    """
-
-    name = "batch-size-{}_{}{}_version-{}_width-{}_channels-{}".format(
-        batch_size,
-        "bottleneck" if bottleneck else "building",
-        "_projection" if projection else "",
-        resnet_version,
-        width,
-        channels
-    )
-
-    if resnet_version == 1:
-      block_fn = resnet_model._building_block_v1
-      if bottleneck:
-        block_fn = resnet_model._bottleneck_block_v1
-    else:
-      block_fn = resnet_model._building_block_v2
-      if bottleneck:
-        block_fn = resnet_model._bottleneck_block_v2
-
-    g = tf.Graph()
-    with g.as_default():
-      tf.set_random_seed(self.name_to_seed(name))
-      strides = 1
-      channels_out = channels
-      projection_shortcut = None
-      if projection:
-        strides = 2
-        channels_out *= strides
-        projection_shortcut = self.make_projection(
-            filters_out=channels_out, strides=strides, data_format=DATA_FORMAT)
-
-      filters = channels_out
-      if bottleneck:
-        filters = channels_out // 4
-
-      input_tensor = tf.get_variable(
-          "input_tensor", dtype=tf.float32,
-          initializer=tf.random_uniform((batch_size, width, width, channels),
-                                        maxval=1)
-      )
-
-      layer = block_fn(inputs=input_tensor, filters=filters, training=True,
-                       projection_shortcut=projection_shortcut, strides=strides,
-                       data_format=DATA_FORMAT)
-
-    self._save_or_test_ops(
-        name=name, graph=g, ops_to_eval=[input_tensor, layer], test=test,
-        correctness_function=self.default_correctness_function
-    )
-
-  def test_batch_norm(self):
-    self._batch_norm_ops(test=True)
-
-  def test_block_0(self):
-    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[0])
-
-  def test_block_1(self):
-    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[1])
-
-  def test_block_2(self):
-    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[2])
-
-  def test_block_3(self):
-    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[3])
-
-  def test_block_4(self):
-    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[4])
-
-  def test_block_5(self):
-    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[5])
-
-  def test_block_6(self):
-    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[6])
-
-  def test_block_7(self):
-    self._resnet_block_ops(test=True, batch_size=BATCH_SIZE, **BLOCK_TESTS[7])
-
-  def regenerate(self):
-    """Create reference data files for ResNet layer tests."""
-    self._batch_norm_ops(test=False)
-    for block_params in BLOCK_TESTS:
-      self._resnet_block_ops(test=False, batch_size=BATCH_SIZE, **block_params)
-
-
-if __name__ == "__main__":
-  reference_data.main(argv=sys.argv, test_class=BaseTest)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/resnet_model.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/resnet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains definitions for Residual Networks.
-
-Residual networks ('v1' ResNets) were originally proposed in:
-[1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-    Deep Residual Learning for Image Recognition. arXiv:1512.03385
-
-The full preactivation 'v2' ResNet variant was introduced by:
-[2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
-    Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
-
-The key difference of the full preactivation 'v2' variant compared to the
-'v1' variant in [1] is the use of batch normalization before every weight layer
-rather than after.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-_BATCH_NORM_DECAY = 0.997
-_BATCH_NORM_EPSILON = 1e-5
-DEFAULT_VERSION = 2
-DEFAULT_DTYPE = tf.float32
-CASTABLE_TYPES = (tf.float16,)
-ALLOWED_TYPES = (DEFAULT_DTYPE,) + CASTABLE_TYPES
-
-
-################################################################################
-# Convenience functions for building the ResNet model.
-################################################################################
-def batch_norm(inputs, training, data_format):
-  """Performs a batch normalization using a standard set of parameters."""
-  # We set fused=True for a significant performance boost. See
-  # https://www.tensorflow.org/performance/performance_guide#common_fused_ops
-  return tf.layers.batch_normalization(
-      inputs=inputs, axis=1 if data_format == 'channels_first' else 3,
-      momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True,
-      scale=True, training=training, fused=True)
-
-
-def fixed_padding(inputs, kernel_size, data_format):
-  """Pads the input along the spatial dimensions independently of input size.
-
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
-                 Should be a positive integer.
-    data_format: The input format ('channels_last' or 'channels_first').
-
-  Returns:
-    A tensor with the same format as the input with the data either intact
-    (if kernel_size == 1) or padded (if kernel_size > 1).
-  """
-  pad_total = kernel_size - 1
-  pad_beg = pad_total // 2
-  pad_end = pad_total - pad_beg
-
-  if data_format == 'channels_first':
-    padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
-                                    [pad_beg, pad_end], [pad_beg, pad_end]])
-  else:
-    padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
-                                    [pad_beg, pad_end], [0, 0]])
-  return padded_inputs
-
-
-def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
-  """Strided 2-D convolution with explicit padding."""
-  # The padding is consistent and is based only on `kernel_size`, not on the
-  # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
-  if strides > 1:
-    inputs = fixed_padding(inputs, kernel_size, data_format)
-
-  return tf.layers.conv2d(
-      inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides,
-      padding=('SAME' if strides == 1 else 'VALID'), use_bias=False,
-      kernel_initializer=tf.variance_scaling_initializer(),
-      data_format=data_format)
-
-
-################################################################################
-# ResNet block definitions.
-################################################################################
-def _building_block_v1(inputs, filters, training, projection_shortcut, strides,
-                       data_format):
-  """A single block for ResNet v1, without a bottleneck.
-
-  Convolution then batch normalization then ReLU as described by:
-    Deep Residual Learning for Image Recognition
-    https://arxiv.org/pdf/1512.03385.pdf
-    by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
-
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the convolutions.
-    training: A Boolean for whether the model is in training or inference
-      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts
-      (typically a 1x1 convolution when downsampling the input).
-    strides: The block's stride. If greater than 1, this block will ultimately
-      downsample the input.
-    data_format: The input format ('channels_last' or 'channels_first').
-
-  Returns:
-    The output tensor of the block; shape should match inputs.
-  """
-  shortcut = inputs
-
-  if projection_shortcut is not None:
-    shortcut = projection_shortcut(inputs)
-    shortcut = batch_norm(inputs=shortcut, training=training,
-                          data_format=data_format)
-
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
-      data_format=data_format)
-  inputs = batch_norm(inputs, training, data_format)
-  inputs = tf.nn.relu(inputs)
-
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=filters, kernel_size=3, strides=1,
-      data_format=data_format)
-  inputs = batch_norm(inputs, training, data_format)
-  inputs += shortcut
-  inputs = tf.nn.relu(inputs)
-
-  return inputs
-
-
-def _building_block_v2(inputs, filters, training, projection_shortcut, strides,
-                       data_format):
-  """A single block for ResNet v2, without a bottleneck.
-
-  Batch normalization then ReLu then convolution as described by:
-    Identity Mappings in Deep Residual Networks
-    https://arxiv.org/pdf/1603.05027.pdf
-    by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.
-
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the convolutions.
-    training: A Boolean for whether the model is in training or inference
-      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts
-      (typically a 1x1 convolution when downsampling the input).
-    strides: The block's stride. If greater than 1, this block will ultimately
-      downsample the input.
-    data_format: The input format ('channels_last' or 'channels_first').
-
-  Returns:
-    The output tensor of the block; shape should match inputs.
-  """
-  shortcut = inputs
-  inputs = batch_norm(inputs, training, data_format)
-  inputs = tf.nn.relu(inputs)
-
-  # The projection shortcut should come after the first batch norm and ReLU
-  # since it performs a 1x1 convolution.
-  if projection_shortcut is not None:
-    shortcut = projection_shortcut(inputs)
-
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
-      data_format=data_format)
-
-  inputs = batch_norm(inputs, training, data_format)
-  inputs = tf.nn.relu(inputs)
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=filters, kernel_size=3, strides=1,
-      data_format=data_format)
-
-  return inputs + shortcut
-
-
-def _bottleneck_block_v1(inputs, filters, training, projection_shortcut,
-                         strides, data_format):
-  """A single block for ResNet v1, with a bottleneck.
-
-  Similar to _building_block_v1(), except using the "bottleneck" blocks
-  described in:
-    Convolution then batch normalization then ReLU as described by:
-      Deep Residual Learning for Image Recognition
-      https://arxiv.org/pdf/1512.03385.pdf
-      by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
-
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the convolutions.
-    training: A Boolean for whether the model is in training or inference
-      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts
-      (typically a 1x1 convolution when downsampling the input).
-    strides: The block's stride. If greater than 1, this block will ultimately
-      downsample the input.
-    data_format: The input format ('channels_last' or 'channels_first').
-
-  Returns:
-    The output tensor of the block; shape should match inputs.
-  """
-  shortcut = inputs
-
-  if projection_shortcut is not None:
-    shortcut = projection_shortcut(inputs)
-    shortcut = batch_norm(inputs=shortcut, training=training,
-                          data_format=data_format)
-
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=filters, kernel_size=1, strides=1,
-      data_format=data_format)
-  inputs = batch_norm(inputs, training, data_format)
-  inputs = tf.nn.relu(inputs)
-
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
-      data_format=data_format)
-  inputs = batch_norm(inputs, training, data_format)
-  inputs = tf.nn.relu(inputs)
-
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
-      data_format=data_format)
-  inputs = batch_norm(inputs, training, data_format)
-  inputs += shortcut
-  inputs = tf.nn.relu(inputs)
-
-  return inputs
-
-
-def _bottleneck_block_v2(inputs, filters, training, projection_shortcut,
-                         strides, data_format):
-  """A single block for ResNet v2, with a bottleneck.
-
-  Similar to _building_block_v2(), except using the "bottleneck" blocks
-  described in:
-    Convolution then batch normalization then ReLU as described by:
-      Deep Residual Learning for Image Recognition
-      https://arxiv.org/pdf/1512.03385.pdf
-      by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
-
-  Adapted to the ordering conventions of:
-    Batch normalization then ReLu then convolution as described by:
-      Identity Mappings in Deep Residual Networks
-      https://arxiv.org/pdf/1603.05027.pdf
-      by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.
-
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the convolutions.
-    training: A Boolean for whether the model is in training or inference
-      mode. Needed for batch normalization.
-    projection_shortcut: The function to use for projection shortcuts
-      (typically a 1x1 convolution when downsampling the input).
-    strides: The block's stride. If greater than 1, this block will ultimately
-      downsample the input.
-    data_format: The input format ('channels_last' or 'channels_first').
-
-  Returns:
-    The output tensor of the block; shape should match inputs.
-  """
-  shortcut = inputs
-  inputs = batch_norm(inputs, training, data_format)
-  inputs = tf.nn.relu(inputs)
-
-  # The projection shortcut should come after the first batch norm and ReLU
-  # since it performs a 1x1 convolution.
-  if projection_shortcut is not None:
-    shortcut = projection_shortcut(inputs)
-
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=filters, kernel_size=1, strides=1,
-      data_format=data_format)
-
-  inputs = batch_norm(inputs, training, data_format)
-  inputs = tf.nn.relu(inputs)
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=filters, kernel_size=3, strides=strides,
-      data_format=data_format)
-
-  inputs = batch_norm(inputs, training, data_format)
-  inputs = tf.nn.relu(inputs)
-  inputs = conv2d_fixed_padding(
-      inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
-      data_format=data_format)
-
-  return inputs + shortcut
-
-
-def block_layer(inputs, filters, bottleneck, block_fn, blocks, strides,
-                training, name, data_format):
-  """Creates one layer of blocks for the ResNet model.
-
-  Args:
-    inputs: A tensor of size [batch, channels, height_in, width_in] or
-      [batch, height_in, width_in, channels] depending on data_format.
-    filters: The number of filters for the first convolution of the layer.
-    bottleneck: Is the block created a bottleneck block.
-    block_fn: The block to use within the model, either `building_block` or
-      `bottleneck_block`.
-    blocks: The number of blocks contained in the layer.
-    strides: The stride to use for the first convolution of the layer. If
-      greater than 1, this layer will ultimately downsample the input.
-    training: Either True or False, whether we are currently training the
-      model. Needed for batch norm.
-    name: A string name for the tensor output of the block layer.
-    data_format: The input format ('channels_last' or 'channels_first').
-
-  Returns:
-    The output tensor of the block layer.
-  """
-
-  # Bottleneck blocks end with 4x the number of filters as they start with
-  filters_out = filters * 4 if bottleneck else filters
-
-  def projection_shortcut(inputs):
-    return conv2d_fixed_padding(
-        inputs=inputs, filters=filters_out, kernel_size=1, strides=strides,
-        data_format=data_format)
-
-  # Only the first block per block_layer uses projection_shortcut and strides
-  inputs = block_fn(inputs, filters, training, projection_shortcut, strides,
-                    data_format)
-
-  for _ in range(1, blocks):
-    inputs = block_fn(inputs, filters, training, None, 1, data_format)
-
-  return tf.identity(inputs, name)
-
-
-class Model(object):
-  """Base class for building the Resnet Model."""
-
-  def __init__(self, resnet_size, bottleneck, num_classes, num_filters,
-               kernel_size,
-               conv_stride, first_pool_size, first_pool_stride,
-               block_sizes, block_strides,
-               resnet_version=DEFAULT_VERSION, data_format=None,
-               dtype=DEFAULT_DTYPE):
-    """Creates a model for classifying an image.
-
-    Args:
-      resnet_size: A single integer for the size of the ResNet model.
-      bottleneck: Use regular blocks or bottleneck blocks.
-      num_classes: The number of classes used as labels.
-      num_filters: The number of filters to use for the first block layer
-        of the model. This number is then doubled for each subsequent block
-        layer.
-      kernel_size: The kernel size to use for convolution.
-      conv_stride: stride size for the initial convolutional layer
-      first_pool_size: Pool size to be used for the first pooling layer.
-        If none, the first pooling layer is skipped.
-      first_pool_stride: stride size for the first pooling layer. Not used
-        if first_pool_size is None.
-      block_sizes: A list containing n values, where n is the number of sets of
-        block layers desired. Each value should be the number of blocks in the
-        i-th set.
-      block_strides: List of integers representing the desired stride size for
-        each of the sets of block layers. Should be same length as block_sizes.
-      resnet_version: Integer representing which version of the ResNet network
-        to use. See README for details. Valid values: [1, 2]
-      data_format: Input format ('channels_last', 'channels_first', or None).
-        If set to None, the format is dependent on whether a GPU is available.
-      dtype: The TensorFlow dtype to use for calculations. If not specified
-        tf.float32 is used.
-
-    Raises:
-      ValueError: if invalid version is selected.
-    """
-    self.resnet_size = resnet_size
-
-    if not data_format:
-      data_format = (
-          'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
-
-    self.resnet_version = resnet_version
-    if resnet_version not in (1, 2):
-      raise ValueError(
-          'Resnet version should be 1 or 2. See README for citations.')
-
-    self.bottleneck = bottleneck
-    if bottleneck:
-      if resnet_version == 1:
-        self.block_fn = _bottleneck_block_v1
-      else:
-        self.block_fn = _bottleneck_block_v2
-    else:
-      if resnet_version == 1:
-        self.block_fn = _building_block_v1
-      else:
-        self.block_fn = _building_block_v2
-
-    if dtype not in ALLOWED_TYPES:
-      raise ValueError('dtype must be one of: {}'.format(ALLOWED_TYPES))
-
-    self.data_format = data_format
-    self.num_classes = num_classes
-    self.num_filters = num_filters
-    self.kernel_size = kernel_size
-    self.conv_stride = conv_stride
-    self.first_pool_size = first_pool_size
-    self.first_pool_stride = first_pool_stride
-    self.block_sizes = block_sizes
-    self.block_strides = block_strides
-    self.dtype = dtype
-    self.pre_activation = resnet_version == 2
-
-  def _custom_dtype_getter(self, getter, name, shape=None, dtype=DEFAULT_DTYPE,
-                           *args, **kwargs):
-    """Creates variables in fp32, then casts to fp16 if necessary.
-
-    This function is a custom getter. A custom getter is a function with the
-    same signature as tf.get_variable, except it has an additional getter
-    parameter. Custom getters can be passed as the `custom_getter` parameter of
-    tf.variable_scope. Then, tf.get_variable will call the custom getter,
-    instead of directly getting a variable itself. This can be used to change
-    the types of variables that are retrieved with tf.get_variable.
-    The `getter` parameter is the underlying variable getter, that would have
-    been called if no custom getter was used. Custom getters typically get a
-    variable with `getter`, then modify it in some way.
-
-    This custom getter will create an fp32 variable. If a low precision
-    (e.g. float16) variable was requested it will then cast the variable to the
-    requested dtype. The reason we do not directly create variables in low
-    precision dtypes is that applying small gradients to such variables may
-    cause the variable not to change.
-
-    Args:
-      getter: The underlying variable getter, that has the same signature as
-        tf.get_variable and returns a variable.
-      name: The name of the variable to get.
-      shape: The shape of the variable to get.
-      dtype: The dtype of the variable to get. Note that if this is a low
-        precision dtype, the variable will be created as a tf.float32 variable,
-        then cast to the appropriate dtype
-      *args: Additional arguments to pass unmodified to getter.
-      **kwargs: Additional keyword arguments to pass unmodified to getter.
-
-    Returns:
-      A variable which is cast to fp16 if necessary.
-    """
-
-    if dtype in CASTABLE_TYPES:
-      var = getter(name, shape, tf.float32, *args, **kwargs)
-      return tf.cast(var, dtype=dtype, name=name + '_cast')
-    else:
-      return getter(name, shape, dtype, *args, **kwargs)
-
-  def _model_variable_scope(self):
-    """Returns a variable scope that the model should be created under.
-
-    If self.dtype is a castable type, model variable will be created in fp32
-    then cast to self.dtype before being used.
-
-    Returns:
-      A variable scope for the model.
-    """
-
-    return tf.variable_scope('resnet_model',
-                             custom_getter=self._custom_dtype_getter)
-
-  def __call__(self, inputs, training):
-    """Add operations to classify a batch of input images.
-
-    Args:
-      inputs: A Tensor representing a batch of input images.
-      training: A boolean. Set to True to add operations required only when
-        training the classifier.
-
-    Returns:
-      A logits Tensor with shape [<batch_size>, self.num_classes].
-    """
-
-    with self._model_variable_scope():
-      if self.data_format == 'channels_first':
-        # Convert the inputs from channels_last (NHWC) to channels_first (NCHW).
-        # This provides a large performance boost on GPU. See
-        # https://www.tensorflow.org/performance/performance_guide#data_formats
-        inputs = tf.transpose(inputs, [0, 3, 1, 2])
-
-      inputs = conv2d_fixed_padding(
-          inputs=inputs, filters=self.num_filters, kernel_size=self.kernel_size,
-          strides=self.conv_stride, data_format=self.data_format)
-      inputs = tf.identity(inputs, 'initial_conv')
-
-      # We do not include batch normalization or activation functions in V2
-      # for the initial conv1 because the first ResNet unit will perform these
-      # for both the shortcut and non-shortcut paths as part of the first
-      # block's projection. Cf. Appendix of [2].
-      if self.resnet_version == 1:
-        inputs = batch_norm(inputs, training, self.data_format)
-        inputs = tf.nn.relu(inputs)
-
-      if self.first_pool_size:
-        inputs = tf.layers.max_pooling2d(
-            inputs=inputs, pool_size=self.first_pool_size,
-            strides=self.first_pool_stride, padding='SAME',
-            data_format=self.data_format)
-        inputs = tf.identity(inputs, 'initial_max_pool')
-
-      for i, num_blocks in enumerate(self.block_sizes):
-        num_filters = self.num_filters * (2**i)
-        inputs = block_layer(
-            inputs=inputs, filters=num_filters, bottleneck=self.bottleneck,
-            block_fn=self.block_fn, blocks=num_blocks,
-            strides=self.block_strides[i], training=training,
-            name='block_layer{}'.format(i + 1), data_format=self.data_format)
-
-      # Only apply the BN and ReLU for model that does pre_activation in each
-      # building/bottleneck block, eg resnet V2.
-      if self.pre_activation:
-        inputs = batch_norm(inputs, training, self.data_format)
-        inputs = tf.nn.relu(inputs)
-
-      # The current top layer has shape
-      # `batch_size x pool_size x pool_size x final_size`.
-      # ResNet does an Average Pooling layer over pool_size,
-      # but that is the same as doing a reduce_mean. We do a reduce_mean
-      # here because it performs better than AveragePooling2D.
-      axes = [2, 3] if self.data_format == 'channels_first' else [1, 2]
-      inputs = tf.reduce_mean(inputs, axes, keepdims=True)
-      inputs = tf.identity(inputs, 'final_reduce_mean')
-
-      inputs = tf.squeeze(inputs, axes)
-      inputs = tf.layers.dense(inputs=inputs, units=self.num_classes)
-      inputs = tf.identity(inputs, 'final_dense')
-      return inputs
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/resnet_run_loop.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/resnet/resnet_run_loop.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains utility and supporting functions for ResNet.
-
-  This module contains ResNet code which does not directly build layers. This
-includes dataset management, hyperparameter and optimizer code, and argument
-parsing. Code for defining the ResNet layers can be found in resnet_model.py.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import functools
-import math
-import multiprocessing
-import os
-
-# pylint: disable=g-bad-import-order
-from absl import flags
-import tensorflow as tf
-from tensorflow.contrib.data.python.ops import threadpool
-
-from official.resnet import resnet_model
-from official.utils.flags import core as flags_core
-from official.utils.export import export
-from official.utils.logs import hooks_helper
-from official.utils.logs import logger
-from official.resnet import imagenet_preprocessing
-from official.utils.misc import distribution_utils
-from official.utils.misc import model_helpers
-
-
-################################################################################
-# Functions for input processing.
-################################################################################
-def process_record_dataset(dataset,
-                           is_training,
-                           batch_size,
-                           shuffle_buffer,
-                           parse_record_fn,
-                           num_epochs=1,
-                           dtype=tf.float32,
-                           datasets_num_private_threads=None,
-                           num_parallel_batches=1):
-  """Given a Dataset with raw records, return an iterator over the records.
-
-  Args:
-    dataset: A Dataset representing raw records
-    is_training: A boolean denoting whether the input is for training.
-    batch_size: The number of samples per batch.
-    shuffle_buffer: The buffer size to use when shuffling records. A larger
-      value results in better randomness, but smaller values reduce startup
-      time and use less memory.
-    parse_record_fn: A function that takes a raw record and returns the
-      corresponding (image, label) pair.
-    num_epochs: The number of epochs to repeat the dataset.
-    dtype: Data type to use for images/features.
-    datasets_num_private_threads: Number of threads for a private
-      threadpool created for all datasets computation.
-    num_parallel_batches: Number of parallel batches for tf.data.
-
-  Returns:
-    Dataset of (image, label) pairs ready for iteration.
-  """
-
-  # Prefetches a batch at a time to smooth out the time taken to load input
-  # files for shuffling and processing.
-  dataset = dataset.prefetch(buffer_size=batch_size)
-  if is_training:
-    # Shuffles records before repeating to respect epoch boundaries.
-    dataset = dataset.shuffle(buffer_size=shuffle_buffer)
-
-  # Repeats the dataset for the number of epochs to train.
-  dataset = dataset.repeat(num_epochs)
-
-  # Parses the raw records into images and labels.
-  dataset = dataset.apply(
-      tf.contrib.data.map_and_batch(
-          lambda value: parse_record_fn(value, is_training, dtype),
-          batch_size=batch_size,
-          num_parallel_batches=num_parallel_batches,
-          drop_remainder=False))
-
-  # Operations between the final prefetch and the get_next call to the iterator
-  # will happen synchronously during run time. We prefetch here again to
-  # background all of the above processing work and keep it out of the
-  # critical training path. Setting buffer_size to tf.contrib.data.AUTOTUNE
-  # allows DistributionStrategies to adjust how many batches to fetch based
-  # on how many devices are present.
-  dataset = dataset.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
-
-  # Defines a specific size thread pool for tf.data operations.
-  if datasets_num_private_threads:
-    tf.logging.info('datasets_num_private_threads: %s',
-                    datasets_num_private_threads)
-    dataset = threadpool.override_threadpool(
-        dataset,
-        threadpool.PrivateThreadPool(
-            datasets_num_private_threads,
-            display_name='input_pipeline_thread_pool'))
-
-  return dataset
-
-
-def get_synth_input_fn(height, width, num_channels, num_classes,
-                       dtype=tf.float32):
-  """Returns an input function that returns a dataset with random data.
-
-  This input_fn returns a data set that iterates over a set of random data and
-  bypasses all preprocessing, e.g. jpeg decode and copy. The host to device
-  copy is still included. This used to find the upper throughput bound when
-  tunning the full input pipeline.
-
-  Args:
-    height: Integer height that will be used to create a fake image tensor.
-    width: Integer width that will be used to create a fake image tensor.
-    num_channels: Integer depth that will be used to create a fake image tensor.
-    num_classes: Number of classes that should be represented in the fake labels
-      tensor
-    dtype: Data type for features/images.
-
-  Returns:
-    An input_fn that can be used in place of a real one to return a dataset
-    that can be used for iteration.
-  """
-  # pylint: disable=unused-argument
-  def input_fn(is_training, data_dir, batch_size, *args, **kwargs):
-    """Returns dataset filled with random data."""
-    # Synthetic input should be within [0, 255].
-    inputs = tf.truncated_normal(
-        [batch_size] + [height, width, num_channels],
-        dtype=dtype,
-        mean=127,
-        stddev=60,
-        name='synthetic_inputs')
-
-    labels = tf.random_uniform(
-        [batch_size],
-        minval=0,
-        maxval=num_classes - 1,
-        dtype=tf.int32,
-        name='synthetic_labels')
-    data = tf.data.Dataset.from_tensors((inputs, labels)).repeat()
-    data = data.prefetch(buffer_size=tf.contrib.data.AUTOTUNE)
-    return data
-
-  return input_fn
-
-
-def image_bytes_serving_input_fn(image_shape, dtype=tf.float32):
-  """Serving input fn for raw jpeg images."""
-
-  def _preprocess_image(image_bytes):
-    """Preprocess a single raw image."""
-    # Bounding box around the whole image.
-    bbox = tf.constant([0.0, 0.0, 1.0, 1.0], dtype=dtype, shape=[1, 1, 4])
-    height, width, num_channels = image_shape
-    image = imagenet_preprocessing.preprocess_image(
-        image_bytes, bbox, height, width, num_channels, is_training=False)
-    return image
-
-  image_bytes_list = tf.placeholder(
-      shape=[None], dtype=tf.string, name='input_tensor')
-  images = tf.map_fn(
-      _preprocess_image, image_bytes_list, back_prop=False, dtype=dtype)
-  return tf.estimator.export.TensorServingInputReceiver(
-      images, {'image_bytes': image_bytes_list})
-
-
-def override_flags_and_set_envars_for_gpu_thread_pool(flags_obj):
-  """Override flags and set env_vars for performance.
-
-  These settings exist to test the difference between using stock settings
-  and manual tuning. It also shows some of the ENV_VARS that can be tweaked to
-  squeeze a few extra examples per second.  These settings are defaulted to the
-  current platform of interest, which changes over time.
-
-  On systems with small numbers of cpu cores, e.g. under 8 logical cores,
-  setting up a gpu thread pool with `tf_gpu_thread_mode=gpu_private` may perform
-  poorly.
-
-  Args:
-    flags_obj: Current flags, which will be adjusted possibly overriding
-    what has been set by the user on the command-line.
-  """
-  cpu_count = multiprocessing.cpu_count()
-  tf.logging.info('Logical CPU cores: %s', cpu_count)
-
-  # Sets up thread pool for each GPU for op scheduling.
-  per_gpu_thread_count = 1
-  total_gpu_thread_count = per_gpu_thread_count * flags_obj.num_gpus
-  os.environ['TF_GPU_THREAD_MODE'] = flags_obj.tf_gpu_thread_mode
-  os.environ['TF_GPU_THREAD_COUNT'] = str(per_gpu_thread_count)
-  tf.logging.info('TF_GPU_THREAD_COUNT: %s', os.environ['TF_GPU_THREAD_COUNT'])
-  tf.logging.info('TF_GPU_THREAD_MODE: %s', os.environ['TF_GPU_THREAD_MODE'])
-
-  # Reduces general thread pool by number of threads used for GPU pool.
-  main_thread_count = cpu_count - total_gpu_thread_count
-  flags_obj.inter_op_parallelism_threads = main_thread_count
-
-  # Sets thread count for tf.data. Logical cores minus threads assign to the
-  # private GPU pool along with 2 thread per GPU for event monitoring and
-  # sending / receiving tensors.
-  num_monitoring_threads = 2 * flags_obj.num_gpus
-  flags_obj.datasets_num_private_threads = (cpu_count - total_gpu_thread_count
-                                            - num_monitoring_threads)
-
-
-################################################################################
-# Functions for running training/eval/validation loops for the model.
-################################################################################
-def learning_rate_with_decay(
-    batch_size, batch_denom, num_images, boundary_epochs, decay_rates,
-    base_lr=0.1, warmup=False):
-  """Get a learning rate that decays step-wise as training progresses.
-
-  Args:
-    batch_size: the number of examples processed in each training batch.
-    batch_denom: this value will be used to scale the base learning rate.
-      `0.1 * batch size` is divided by this number, such that when
-      batch_denom == batch_size, the initial learning rate will be 0.1.
-    num_images: total number of images that will be used for training.
-    boundary_epochs: list of ints representing the epochs at which we
-      decay the learning rate.
-    decay_rates: list of floats representing the decay rates to be used
-      for scaling the learning rate. It should have one more element
-      than `boundary_epochs`, and all elements should have the same type.
-    base_lr: Initial learning rate scaled based on batch_denom.
-    warmup: Run a 5 epoch warmup to the initial lr.
-  Returns:
-    Returns a function that takes a single argument - the number of batches
-    trained so far (global_step)- and returns the learning rate to be used
-    for training the next batch.
-  """
-  initial_learning_rate = base_lr * batch_size / batch_denom
-  batches_per_epoch = num_images / batch_size
-
-  # Reduce the learning rate at certain epochs.
-  # CIFAR-10: divide by 10 at epoch 100, 150, and 200
-  # ImageNet: divide by 10 at epoch 30, 60, 80, and 90
-  boundaries = [int(batches_per_epoch * epoch) for epoch in boundary_epochs]
-  vals = [initial_learning_rate * decay for decay in decay_rates]
-
-  def learning_rate_fn(global_step):
-    """Builds scaled learning rate function with 5 epoch warm up."""
-    lr = tf.train.piecewise_constant(global_step, boundaries, vals)
-    if warmup:
-      warmup_steps = int(batches_per_epoch * 5)
-      warmup_lr = (
-          initial_learning_rate * tf.cast(global_step, tf.float32) / tf.cast(
-              warmup_steps, tf.float32))
-      return tf.cond(global_step < warmup_steps, lambda: warmup_lr, lambda: lr)
-    return lr
-
-  return learning_rate_fn
-
-
-def resnet_model_fn(features, labels, mode, model_class,
-                    resnet_size, weight_decay, learning_rate_fn, momentum,
-                    data_format, resnet_version, loss_scale,
-                    loss_filter_fn=None, dtype=resnet_model.DEFAULT_DTYPE,
-                    fine_tune=False):
-  """Shared functionality for different resnet model_fns.
-
-  Initializes the ResnetModel representing the model layers
-  and uses that model to build the necessary EstimatorSpecs for
-  the `mode` in question. For training, this means building losses,
-  the optimizer, and the train op that get passed into the EstimatorSpec.
-  For evaluation and prediction, the EstimatorSpec is returned without
-  a train op, but with the necessary parameters for the given mode.
-
-  Args:
-    features: tensor representing input images
-    labels: tensor representing class labels for all input images
-    mode: current estimator mode; should be one of
-      `tf.estimator.ModeKeys.TRAIN`, `EVALUATE`, `PREDICT`
-    model_class: a class representing a TensorFlow model that has a __call__
-      function. We assume here that this is a subclass of ResnetModel.
-    resnet_size: A single integer for the size of the ResNet model.
-    weight_decay: weight decay loss rate used to regularize learned variables.
-    learning_rate_fn: function that returns the current learning rate given
-      the current global_step
-    momentum: momentum term used for optimization
-    data_format: Input format ('channels_last', 'channels_first', or None).
-      If set to None, the format is dependent on whether a GPU is available.
-    resnet_version: Integer representing which version of the ResNet network to
-      use. See README for details. Valid values: [1, 2]
-    loss_scale: The factor to scale the loss for numerical stability. A detailed
-      summary is present in the arg parser help text.
-    loss_filter_fn: function that takes a string variable name and returns
-      True if the var should be included in loss calculation, and False
-      otherwise. If None, batch_normalization variables will be excluded
-      from the loss.
-    dtype: the TensorFlow dtype to use for calculations.
-    fine_tune: If True only train the dense layers(final layers).
-
-  Returns:
-    EstimatorSpec parameterized according to the input params and the
-    current mode.
-  """
-
-  # Generate a summary node for the images
-  tf.summary.image('images', features, max_outputs=6)
-  # Checks that features/images have same data type being used for calculations.
-  assert features.dtype == dtype
-
-  model = model_class(resnet_size, data_format, resnet_version=resnet_version,
-                      dtype=dtype)
-
-  logits = model(features, mode == tf.estimator.ModeKeys.TRAIN)
-
-  # This acts as a no-op if the logits are already in fp32 (provided logits are
-  # not a SparseTensor). If dtype is is low precision, logits must be cast to
-  # fp32 for numerical stability.
-  logits = tf.cast(logits, tf.float32)
-
-  predictions = {
-      'classes': tf.argmax(logits, axis=1),
-      'probabilities': tf.nn.softmax(logits, name='softmax_tensor')
-  }
-
-  if mode == tf.estimator.ModeKeys.PREDICT:
-    # Return the predictions and the specification for serving a SavedModel
-    return tf.estimator.EstimatorSpec(
-        mode=mode,
-        predictions=predictions,
-        export_outputs={
-            'predict': tf.estimator.export.PredictOutput(predictions)
-        })
-
-  # Calculate loss, which includes softmax cross entropy and L2 regularization.
-  cross_entropy = tf.losses.sparse_softmax_cross_entropy(
-      logits=logits, labels=labels)
-
-  # Create a tensor named cross_entropy for logging purposes.
-  tf.identity(cross_entropy, name='cross_entropy')
-  tf.summary.scalar('cross_entropy', cross_entropy)
-
-  # If no loss_filter_fn is passed, assume we want the default behavior,
-  # which is that batch_normalization variables are excluded from loss.
-  def exclude_batch_norm(name):
-    return 'batch_normalization' not in name
-  loss_filter_fn = loss_filter_fn or exclude_batch_norm
-
-  # Add weight decay to the loss.
-  l2_loss = weight_decay * tf.add_n(
-      # loss is computed using fp32 for numerical stability.
-      [tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()
-       if loss_filter_fn(v.name)])
-  tf.summary.scalar('l2_loss', l2_loss)
-  loss = cross_entropy + l2_loss
-
-  if mode == tf.estimator.ModeKeys.TRAIN:
-    global_step = tf.train.get_or_create_global_step()
-
-    learning_rate = learning_rate_fn(global_step)
-
-    # Create a tensor named learning_rate for logging purposes
-    tf.identity(learning_rate, name='learning_rate')
-    tf.summary.scalar('learning_rate', learning_rate)
-
-    optimizer = tf.train.MomentumOptimizer(
-        learning_rate=learning_rate,
-        momentum=momentum
-    )
-
-    def _dense_grad_filter(gvs):
-      """Only apply gradient updates to the final layer.
-
-      This function is used for fine tuning.
-
-      Args:
-        gvs: list of tuples with gradients and variable info
-      Returns:
-        filtered gradients so that only the dense layer remains
-      """
-      return [(g, v) for g, v in gvs if 'dense' in v.name]
-
-    if loss_scale != 1:
-      # When computing fp16 gradients, often intermediate tensor values are
-      # so small, they underflow to 0. To avoid this, we multiply the loss by
-      # loss_scale to make these tensor values loss_scale times bigger.
-      scaled_grad_vars = optimizer.compute_gradients(loss * loss_scale)
-
-      if fine_tune:
-        scaled_grad_vars = _dense_grad_filter(scaled_grad_vars)
-
-      # Once the gradient computation is complete we can scale the gradients
-      # back to the correct scale before passing them to the optimizer.
-      unscaled_grad_vars = [(grad / loss_scale, var)
-                            for grad, var in scaled_grad_vars]
-      minimize_op = optimizer.apply_gradients(unscaled_grad_vars, global_step)
-    else:
-      grad_vars = optimizer.compute_gradients(loss)
-      if fine_tune:
-        grad_vars = _dense_grad_filter(grad_vars)
-      minimize_op = optimizer.apply_gradients(grad_vars, global_step)
-
-    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
-    train_op = tf.group(minimize_op, update_ops)
-  else:
-    train_op = None
-
-  accuracy = tf.metrics.accuracy(labels, predictions['classes'])
-  accuracy_top_5 = tf.metrics.mean(tf.nn.in_top_k(predictions=logits,
-                                                  targets=labels,
-                                                  k=5,
-                                                  name='top_5_op'))
-  metrics = {'accuracy': accuracy,
-             'accuracy_top_5': accuracy_top_5}
-
-  # Create a tensor named train_accuracy for logging purposes
-  tf.identity(accuracy[1], name='train_accuracy')
-  tf.identity(accuracy_top_5[1], name='train_accuracy_top_5')
-  tf.summary.scalar('train_accuracy', accuracy[1])
-  tf.summary.scalar('train_accuracy_top_5', accuracy_top_5[1])
-
-  return tf.estimator.EstimatorSpec(
-      mode=mode,
-      predictions=predictions,
-      loss=loss,
-      train_op=train_op,
-      eval_metric_ops=metrics)
-
-
-def resnet_main(
-    flags_obj, model_function, input_function, dataset_name, shape=None):
-  """Shared main loop for ResNet Models.
-
-  Args:
-    flags_obj: An object containing parsed flags. See define_resnet_flags()
-      for details.
-    model_function: the function that instantiates the Model and builds the
-      ops for train/eval. This will be passed directly into the estimator.
-    input_function: the function that processes the dataset and returns a
-      dataset that the estimator can train on. This will be wrapped with
-      all the relevant flags for running and passed to estimator.
-    dataset_name: the name of the dataset for training and evaluation. This is
-      used for logging purpose.
-    shape: list of ints representing the shape of the images used for training.
-      This is only used if flags_obj.export_dir is passed.
-
-  Returns:
-    Dict of results of the run.
-  """
-
-  model_helpers.apply_clean(flags.FLAGS)
-
-  # Ensures flag override logic is only executed if explicitly triggered.
-  if flags_obj.tf_gpu_thread_mode:
-    override_flags_and_set_envars_for_gpu_thread_pool(flags_obj)
-
-  # Creates session config. allow_soft_placement = True, is required for
-  # multi-GPU and is not harmful for other modes.
-  session_config = tf.ConfigProto(
-      inter_op_parallelism_threads=flags_obj.inter_op_parallelism_threads,
-      intra_op_parallelism_threads=flags_obj.intra_op_parallelism_threads,
-      allow_soft_placement=True)
-
-  distribution_strategy = distribution_utils.get_distribution_strategy(
-      flags_core.get_num_gpus(flags_obj), flags_obj.all_reduce_alg)
-
-  # Creates a `RunConfig` that checkpoints every 24 hours which essentially
-  # results in checkpoints determined only by `epochs_between_evals`.
-  run_config = tf.estimator.RunConfig(
-      train_distribute=distribution_strategy,
-      session_config=session_config,
-      save_checkpoints_secs=60*60*24)
-
-  # Initializes model with all but the dense layer from pretrained ResNet.
-  if flags_obj.pretrained_model_checkpoint_path is not None:
-    warm_start_settings = tf.estimator.WarmStartSettings(
-        flags_obj.pretrained_model_checkpoint_path,
-        vars_to_warm_start='^(?!.*dense)')
-  else:
-    warm_start_settings = None
-
-  classifier = tf.estimator.Estimator(
-      model_fn=model_function, model_dir=flags_obj.model_dir, config=run_config,
-      warm_start_from=warm_start_settings, params={
-          'resnet_size': int(flags_obj.resnet_size),
-          'data_format': flags_obj.data_format,
-          'batch_size': flags_obj.batch_size,
-          'resnet_version': int(flags_obj.resnet_version),
-          'loss_scale': flags_core.get_loss_scale(flags_obj),
-          'dtype': flags_core.get_tf_dtype(flags_obj),
-          'fine_tune': flags_obj.fine_tune
-      })
-
-  run_params = {
-      'batch_size': flags_obj.batch_size,
-      'dtype': flags_core.get_tf_dtype(flags_obj),
-      'resnet_size': flags_obj.resnet_size,
-      'resnet_version': flags_obj.resnet_version,
-      'synthetic_data': flags_obj.use_synthetic_data,
-      'train_epochs': flags_obj.train_epochs,
-  }
-  if flags_obj.use_synthetic_data:
-    dataset_name = dataset_name + '-synthetic'
-
-  benchmark_logger = logger.get_benchmark_logger()
-  benchmark_logger.log_run_info('resnet', dataset_name, run_params,
-                                test_id=flags_obj.benchmark_test_id)
-
-  train_hooks = hooks_helper.get_train_hooks(
-      flags_obj.hooks,
-      model_dir=flags_obj.model_dir,
-      batch_size=flags_obj.batch_size)
-
-  def input_fn_train(num_epochs):
-    return input_function(
-        is_training=True,
-        data_dir=flags_obj.data_dir,
-        batch_size=distribution_utils.per_device_batch_size(
-            flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
-        num_epochs=num_epochs,
-        dtype=flags_core.get_tf_dtype(flags_obj),
-        datasets_num_private_threads=flags_obj.datasets_num_private_threads,
-        num_parallel_batches=flags_obj.datasets_num_parallel_batches)
-
-  def input_fn_eval():
-    return input_function(
-        is_training=False,
-        data_dir=flags_obj.data_dir,
-        batch_size=distribution_utils.per_device_batch_size(
-            flags_obj.batch_size, flags_core.get_num_gpus(flags_obj)),
-        num_epochs=1,
-        dtype=flags_core.get_tf_dtype(flags_obj))
-
-  if flags_obj.eval_only or not flags_obj.train_epochs:
-    # If --eval_only is set, perform a single loop with zero train epochs.
-    schedule, n_loops = [0], 1
-  else:
-    # Compute the number of times to loop while training. All but the last
-    # pass will train for `epochs_between_evals` epochs, while the last will
-    # train for the number needed to reach `training_epochs`. For instance if
-    #   train_epochs = 25 and epochs_between_evals = 10
-    # schedule will be set to [10, 10, 5]. That is to say, the loop will:
-    #   Train for 10 epochs and then evaluate.
-    #   Train for another 10 epochs and then evaluate.
-    #   Train for a final 5 epochs (to reach 25 epochs) and then evaluate.
-    n_loops = math.ceil(flags_obj.train_epochs / flags_obj.epochs_between_evals)
-    schedule = [flags_obj.epochs_between_evals for _ in range(int(n_loops))]
-    schedule[-1] = flags_obj.train_epochs - sum(schedule[:-1])  # over counting.
-
-  for cycle_index, num_train_epochs in enumerate(schedule):
-    tf.logging.info('Starting cycle: %d/%d', cycle_index, int(n_loops))
-
-    if num_train_epochs:
-      classifier.train(input_fn=lambda: input_fn_train(num_train_epochs),
-                       hooks=train_hooks, max_steps=flags_obj.max_train_steps)
-
-    tf.logging.info('Starting to evaluate.')
-
-    # flags_obj.max_train_steps is generally associated with testing and
-    # profiling. As a result it is frequently called with synthetic data, which
-    # will iterate forever. Passing steps=flags_obj.max_train_steps allows the
-    # eval (which is generally unimportant in those circumstances) to terminate.
-    # Note that eval will run for max_train_steps each loop, regardless of the
-    # global_step count.
-    eval_results = classifier.evaluate(input_fn=input_fn_eval,
-                                       steps=flags_obj.max_train_steps)
-
-    benchmark_logger.log_evaluation_result(eval_results)
-
-    if model_helpers.past_stop_threshold(
-        flags_obj.stop_threshold, eval_results['accuracy']):
-      break
-
-  if flags_obj.export_dir is not None:
-    # Exports a saved model for the given classifier.
-    export_dtype = flags_core.get_tf_dtype(flags_obj)
-    if flags_obj.image_bytes_as_serving_input:
-      input_receiver_fn = functools.partial(
-          image_bytes_serving_input_fn, shape, dtype=export_dtype)
-    else:
-      input_receiver_fn = export.build_tensor_serving_input_receiver_fn(
-          shape, batch_size=flags_obj.batch_size, dtype=export_dtype)
-    classifier.export_savedmodel(flags_obj.export_dir, input_receiver_fn,
-                                 strip_default_attrs=True)
-  return eval_results
-
-def define_resnet_flags(resnet_size_choices=None):
-  """Add flags and validators for ResNet."""
-  flags_core.define_base()
-  flags_core.define_performance(num_parallel_calls=False,
-                                tf_gpu_thread_mode=True,
-                                datasets_num_private_threads=True,
-                                datasets_num_parallel_batches=True)
-  flags_core.define_image()
-  flags_core.define_benchmark()
-  flags.adopt_module_key_flags(flags_core)
-
-  flags.DEFINE_enum(
-      name='resnet_version', short_name='rv', default='1',
-      enum_values=['1', '2'],
-      help=flags_core.help_wrap(
-          'Version of ResNet. (1 or 2) See README.md for details.'))
-  flags.DEFINE_bool(
-      name='fine_tune', short_name='ft', default=False,
-      help=flags_core.help_wrap(
-          'If True do not train any parameters except for the final layer.'))
-  flags.DEFINE_string(
-      name='pretrained_model_checkpoint_path', short_name='pmcp', default=None,
-      help=flags_core.help_wrap(
-          'If not None initialize all the network except the final layer with '
-          'these values'))
-  flags.DEFINE_boolean(
-      name='eval_only', default=False,
-      help=flags_core.help_wrap('Skip training and only perform evaluation on '
-                                'the latest checkpoint.'))
-  flags.DEFINE_boolean(
-      name='image_bytes_as_serving_input', default=False,
-      help=flags_core.help_wrap(
-          'If True exports savedmodel with serving signature that accepts '
-          'JPEG image bytes instead of a fixed size [HxWxC] tensor that '
-          'represents the image. The former is easier to use for serving at '
-          'the expense of image resize/cropping being done as part of model '
-          'inference. Note, this flag only applies to ImageNet and cannot '
-          'be used for CIFAR.'))
-  flags.DEFINE_boolean(
-      name='turn_off_distribution_strategy', default=False,
-      help=flags_core.help_wrap('Set to True to not use distribution '
-                                'strategies.'))
-  choice_kwargs = dict(
-      name='resnet_size', short_name='rs', default='50',
-      help=flags_core.help_wrap('The size of the ResNet model to use.'))
-
-  if resnet_size_choices is None:
-    flags.DEFINE_string(**choice_kwargs)
-  else:
-    flags.DEFINE_enum(enum_values=resnet_size_choices, **choice_kwargs)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/README.md
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/README.md
-# Transformer Translation Model
-This is an implementation of the Transformer translation model as described in the [Attention is All You Need](https://arxiv.org/abs/1706.03762) paper. Based on the code provided by the authors: [Transformer code](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/models/transformer.py) from [Tensor2Tensor](https://github.com/tensorflow/tensor2tensor).
-
-Transformer is a neural network architecture that solves sequence to sequence problems using attention mechanisms. Unlike traditional neural seq2seq models, Transformer does not involve recurrent connections. The attention mechanism learns dependencies between tokens in two sequences. Since attention weights apply to all tokens in the sequences, the Transformer model is able to easily capture long-distance dependencies.
-
-Transformer's overall structure follows the standard encoder-decoder pattern. The encoder uses self-attention to compute a representation of the input sequence. The decoder generates the output sequence one token at a time, taking the encoder output and previous decoder-outputted tokens as inputs.
-
-The model also applies embeddings on the input and output tokens, and adds a constant positional encoding. The positional encoding adds information about the position of each token.
-
-## Contents
-  * [Contents](#contents)
-  * [Walkthrough](#walkthrough)
-  * [Benchmarks](#benchmarks)
-    * [Training times](#training-times)
-    * [Evaluation results](#evaluation-results)
-  * [Detailed instructions](#detailed-instructions)
-    * [Environment preparation](#environment-preparation)
-    * [Download and preprocess datasets](#download-and-preprocess-datasets)
-    * [Model training and evaluation](#model-training-and-evaluation)
-    * [Translate using the model](#translate-using-the-model)
-    * [Compute official BLEU score](#compute-official-bleu-score)
-    * [TPU](#tpu)
-  * [Export trained model](#export-trained-model)
-    * [Example translation](#example-translation)
-  * [Implementation overview](#implementation-overview)
-    * [Model Definition](#model-definition)
-    * [Model Estimator](#model-estimator)
-    * [Other scripts](#other-scripts)
-    * [Test dataset](#test-dataset)
-  * [Term definitions](#term-definitions)
-
-## Walkthrough
-
-Below are the commands for running the Transformer model. See the [Detailed instrutions](#detailed-instructions) for more details on running the model.
-
-```
-cd /path/to/models/official/transformer
-
-# Ensure that PYTHONPATH is correctly defined as described in
-# https://github.com/tensorflow/models/tree/master/official#requirements
-# export PYTHONPATH="$PYTHONPATH:/path/to/models"
-
-# Export variables
-PARAM_SET=big
-DATA_DIR=$HOME/transformer/data
-MODEL_DIR=$HOME/transformer/model_$PARAM_SET
-VOCAB_FILE=$DATA_DIR/vocab.ende.32768
-
-# Download training/evaluation datasets
-python data_download.py --data_dir=$DATA_DIR
-
-# Train the model for 10 epochs, and evaluate after every epoch.
-python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
-    --vocab_file=$VOCAB_FILE --param_set=$PARAM_SET \
-    --bleu_source=test_data/newstest2014.en --bleu_ref=test_data/newstest2014.de
-
-# Run during training in a separate process to get continuous updates,
-# or after training is complete.
-tensorboard --logdir=$MODEL_DIR
-
-# Translate some text using the trained model
-python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
-    --param_set=$PARAM_SET --text="hello world"
-
-# Compute model's BLEU score using the newstest2014 dataset.
-python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
-    --param_set=$PARAM_SET --file=test_data/newstest2014.en --file_out=translation.en
-python compute_bleu.py --translation=translation.en --reference=test_data/newstest2014.de
-```
-
-## Benchmarks
-### Training times
-
-Currently, both big and base parameter sets run on a single GPU. The measurements below
-are reported from running the model on a P100 GPU.
-
-Param Set | batches/sec | batches per epoch | time per epoch
--- | --- | --- | ---
-base | 4.8 | 83244 | 4 hr
-big | 1.1 | 41365 | 10 hr
-
-### Evaluation results
-Below are the case-insensitive BLEU scores after 10 epochs.
-
-Param Set | Score
--- | --- |
-base | 27.7
-big | 28.9
-
-
-## Detailed instructions
-
-
-0. ### Environment preparation
-
-   #### Add models repo to PYTHONPATH
-   Follow the instructions described in the [Requirements](https://github.com/tensorflow/models/tree/master/official#requirements) section to add the models folder to the python path.
-
-   #### Export variables (optional)
-
-   Export the following variables, or modify the values in each of the snippets below:
-   ```
-   PARAM_SET=big
-   DATA_DIR=$HOME/transformer/data
-   MODEL_DIR=$HOME/transformer/model_$PARAM_SET
-   VOCAB_FILE=$DATA_DIR/vocab.ende.32768
-   ```
-
-1. ### Download and preprocess datasets
-
-   [data_download.py](data_download.py) downloads and preprocesses the training and evaluation WMT datasets. After the data is downloaded and extracted, the training data is used to generate a vocabulary of subtokens. The evaluation and training strings are tokenized, and the resulting data is sharded, shuffled, and saved as TFRecords.
-
-   1.75GB of compressed data will be downloaded. In total, the raw files (compressed, extracted, and combined files) take up 8.4GB of disk space. The resulting TFRecord and vocabulary files are 722MB. The script takes around 40 minutes to run, with the bulk of the time spent downloading and ~15 minutes spent on preprocessing.
-
-   Command to run:
-   ```
-   python data_download.py --data_dir=$DATA_DIR
-   ```
-
-   Arguments:
-   * `--data_dir`: Path where the preprocessed TFRecord data, and vocab file will be saved.
-   * Use the `--help` or `-h` flag to get a full list of possible arguments.
-
-2. ### Model training and evaluation
-
-   [transformer_main.py](transformer_main.py) creates a Transformer model, and trains it using Tensorflow Estimator.
-
-   Command to run:
-   ```
-   python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
-       --vocab_file=$VOCAB_FILE --param_set=$PARAM_SET
-   ```
-
-   Arguments:
-   * `--data_dir`: This should be set to the same directory given to the `data_download`'s `data_dir` argument.
-   * `--model_dir`: Directory to save Transformer model training checkpoints.
-   * `--vocab_file`: Path to subtoken vacbulary file. If data_download was used, you may find the file in `data_dir`.
-   * `--param_set`: Parameter set to use when creating and training the model. Options are `base` and `big` (default).
-   * Use the `--help` or `-h` flag to get a full list of possible arguments.
-
-   #### Customizing training schedule
-
-   By default, the model will train for 10 epochs, and evaluate after every epoch. The training schedule may be defined through the flags:
-   * Training with epochs (default):
-     * `--train_epochs`: The total number of complete passes to make through the dataset
-     * `--epochs_between_evals`: The number of epochs to train between evaluations.
-   * Training with steps:
-     * `--train_steps`: sets the total number of training steps to run.
-     * `--steps_between_evals`: Number of training steps to run between evaluations.
-
-   Only one of `train_epochs` or `train_steps` may be set. Since the default option is to evaluate the model after training for an epoch, it may take 4 or more hours between model evaluations. To get more frequent evaluations, use the flags `--train_steps=250000 --steps_between_evals=1000`.
-
-   Note: At the beginning of each training session, the training dataset is reloaded and shuffled. Stopping the training before completing an epoch may result in worse model quality, due to the chance that some examples may be seen more than others. Therefore, it is recommended to use epochs when the model quality is important.
-
-   #### Compute BLEU score during model evaluation
-
-   Use these flags to compute the BLEU when the model evaluates:
-   * `--bleu_source`: Path to file containing text to translate.
-   * `--bleu_ref`: Path to file containing the reference translation.
-   * `--stop_threshold`: Train until the BLEU score reaches this lower bound. This setting overrides the `--train_steps` and `--train_epochs` flags.
-
-   The test source and reference files located in the `test_data` directory are extracted from the preprocessed dataset from the [NMT Seq2Seq tutorial](https://google.github.io/seq2seq/nmt/#download-data).
-
-   When running `transformer_main.py`, use the flags: `--bleu_source=test_data/newstest2014.en --bleu_ref=test_data/newstest2014.de`
-
-   #### Tensorboard
-   Training and evaluation metrics (loss, accuracy, approximate BLEU score, etc.) are logged, and can be displayed in the browser using Tensorboard.
-   ```
-   tensorboard --logdir=$MODEL_DIR
-   ```
-   The values are displayed at [localhost:6006](localhost:6006).
-
-3. ### Translate using the model
-   [translate.py](translate.py) contains the script to use the trained model to translate input text or file. Each line in the file is translated separately.
-
-   Command to run:
-   ```
-   python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
-       --param_set=$PARAM_SET --text="hello world"
-   ```
-
-   Arguments for initializing the Subtokenizer and trained model:
-   * `--model_dir` and `--param_set`: These parameters are used to rebuild the trained model
-   * `--vocab_file`: Path to subtoken vacbulary file. If data_download was used, you may find the file in `data_dir`.
-
-   Arguments for specifying what to translate:
-   * `--text`: Text to translate
-   * `--file`: Path to file containing text to translate
-   * `--file_out`: If `--file` is set, then this file will store the input file's translations.
-
-   To translate the newstest2014 data, run:
-   ```
-   python translate.py --model_dir=$MODEL_DIR --vocab_file=$VOCAB_FILE \
-       --param_set=$PARAM_SET --file=test_data/newstest2014.en --file_out=translation.en
-   ```
-
-   Translating the file takes around 15 minutes on a GTX1080, or 5 minutes on a P100.
-
-4. ### Compute official BLEU score
-   Use [compute_bleu.py](compute_bleu.py) to compute the BLEU by comparing generated translations to the reference translation.
-
-   Command to run:
-   ```
-   python compute_bleu.py --translation=translation.en --reference=test_data/newstest2014.de
-   ```
-
-   Arguments:
-   * `--translation`: Path to file containing generated translations.
-   * `--reference`: Path to file containing reference translations.
-   * Use the `--help` or `-h` flag to get a full list of possible arguments.
-   
-5. ### TPU
-   TPU support for this version of Transformer is experimental. Currently it is present for
-   demonstration purposes only, but will be optimized in the coming weeks.
-
-## Export trained model
-To export the model as a Tensorflow [SavedModel](https://www.tensorflow.org/guide/saved_model) format, use the argument `--export_dir` when running `transformer_main.py`. A folder will be created in the directory with the name as the timestamp (e.g. $EXPORT_DIR/1526427396).
-
-```
-EXPORT_DIR=$HOME/transformer/saved_model
-python transformer_main.py --data_dir=$DATA_DIR --model_dir=$MODEL_DIR \
-  --vocab_file=$VOCAB_FILE --param_set=$PARAM_SET --export_model=$EXPORT_DIR
-```
-
-To inspect the SavedModel, use saved_model_cli:
-```
-SAVED_MODEL_DIR=$EXPORT_DIR/{TIMESTAMP}  # replace {TIMESTAMP} with the name of the folder created
-saved_model_cli show --dir=$SAVED_MODEL_DIR  --all
-```
-
-### Example translation
-Let's translate **"hello world!"**, **"goodbye world."**, and **"Would you like some pie?"**.
-
-The SignatureDef for "translate" is:
-
-    signature_def['translate']:
-        The given SavedModel SignatureDef contains the following input(s):
-          inputs['input'] tensor_info:
-              dtype: DT_INT64
-              shape: (-1, -1)
-              name: Placeholder:0
-        The given SavedModel SignatureDef contains the following output(s):
-          outputs['outputs'] tensor_info:
-              dtype: DT_INT32
-              shape: (-1, -1)
-              name: model/Transformer/strided_slice_19:0
-          outputs['scores'] tensor_info:
-              dtype: DT_FLOAT
-              shape: (-1)
-              name: model/Transformer/strided_slice_20:0
-
-Follow the steps below to use the translate signature def:
-
-1. #### Encode the inputs to integer arrays.
-   This can be done using `utils.tokenizer.Subtokenizer`, and the vocab file in the SavedModel assets (`$SAVED_MODEL_DIR/assets.extra/vocab.txt`).
-
-   ```
-   from official.transformer.utils.tokenizer import Subtokenizer
-   s = Subtokenizer(PATH_TO_VOCAB_FILE)
-   print(s.encode("hello world!", add_eos=True))
-   ```
-
-   The encoded inputs are:
-   * `"hello world!" = [6170, 3731, 178, 207, 1]`
-   * `"goodbye world." = [15431, 13966, 36, 178, 3, 1]`
-   * `"Would you like some pie?" = [9092, 72, 155, 202, 19851, 102, 1]`
-
-2. #### Run `saved_model_cli` to obtain the predicted translations
-   The encoded inputs should be padded so that they are the same length. The padding token is `0`.
-   ```
-   ENCODED_INPUTS="[[26228, 145, 178, 1, 0, 0, 0], \
-                   [15431, 13966, 36, 178, 3, 1, 0], \
-                   [9092, 72, 155, 202, 19851, 102, 1]]"
-   ```
-
-   Now, use the `run` command with `saved_model_cli` to get the outputs.
-
-   ```
-   saved_model_cli run --dir=$SAVED_MODEL_DIR --tag_set=serve --signature_def=translate \
-     --input_expr="input=$ENCODED_INPUTS"
-   ```
-
-   The outputs will look similar to:
-   ```
-   Result for output key outputs:
-   [[18744   145   297     1     0     0     0     0     0     0     0     0
-         0     0]
-    [ 5450  4642    21    11   297     3     1     0     0     0     0     0
-         0     0]
-    [25940    22    66   103 21713    31   102     1     0     0     0     0
-         0     0]]
-   Result for output key scores:
-   [-1.5493642 -1.4032784 -3.252089 ]
-   ```
-
-3. #### Decode the outputs to strings.
-   Use the `Subtokenizer` and vocab file as described in step 1 to decode the output integer arrays.
-   ```
-   from official.transformer.utils.tokenizer import Subtokenizer
-   s = Subtokenizer(PATH_TO_VOCAB_FILE)
-   print(s.decode([18744, 145, 297, 1]))
-   ```
-   The decoded outputs from above are:
-   * `[18744, 145, 297, 1] = "Hallo Welt<EOS>"`
-   * `[5450, 4642, 21, 11, 297, 3, 1] = "Abschied von der Welt.<EOS>"`
-   * `[25940, 22, 66, 103, 21713, 31, 102, 1] = "Möchten Sie einen Kuchen?<EOS>"`
-
-## Implementation overview
-
-A brief look at each component in the code:
-
-### Model Definition
-The [model](model) subdirectory contains the implementation of the Transformer model. The following files define the Transformer model and its layers:
-* [transformer.py](model/transformer.py): Defines the transformer model and its encoder/decoder layer stacks.
-* [embedding_layer.py](model/embedding_layer.py): Contains the layer that calculates the embeddings. The embedding weights are also used to calculate the pre-softmax probabilities from the decoder output.
-* [attention_layer.py](model/attention_layer.py): Defines the multi-headed and self attention layers that are used in the encoder/decoder stacks.
-* [ffn_layer.py](model/ffn_layer.py): Defines the feedforward network that is used in the encoder/decoder stacks. The network is composed of 2 fully connected layers.
-
-Other files:
-* [beam_search.py](model/beam_search.py) contains the beam search implementation, which is used during model inference to find high scoring translations.
-* [model_params.py](model/model_params.py) contains the parameters used for the big and base models.
-* [model_utils.py](model/model_utils.py) defines some helper functions used in the model (calculating padding, bias, etc.).
-
-
-### Model Estimator
-[transformer_main.py](model/transformer.py) creates an `Estimator` to train and evaluate the model.
-
-Helper functions:
-* [utils/dataset.py](utils/dataset.py): contains functions for creating a `dataset` that is passed to the `Estimator`.
-* [utils/metrics.py](utils/metrics.py): defines metrics functions used by the `Estimator` to evaluate the
-
-### Other scripts
-
-Aside from the main file to train the Transformer model, we provide other scripts for using the model or downloading the data:
-
-#### Data download and preprocessing
-
-[data_download.py](data_download.py) downloads and extracts data, then uses `Subtokenizer` to tokenize strings into arrays of int IDs. The int arrays are converted to `tf.Examples` and saved in the `tf.RecordDataset` format.
-
- The data is downloaded from the Workshop of Machine Transtion (WMT) [news translation task](http://www.statmt.org/wmt17/translation-task.html). The following datasets are used:
-
- * Europarl v7
- * Common Crawl corpus
- * News Commentary v12
-
- See the [download section](http://www.statmt.org/wmt17/translation-task.html#download) to explore the raw datasets. The parameters in this model are tuned to fit the English-German translation data, so the EN-DE texts are extracted from the downloaded compressed files.
-
-The text is transformed into arrays of integer IDs using the `Subtokenizer` defined in [`utils/tokenizer.py`](util/tokenizer.py). During initialization of the `Subtokenizer`, the raw training data is used to generate a vocabulary list containing common subtokens.
-
-The target vocabulary size of the WMT dataset is 32,768. The set of subtokens is found through binary search on the minimum number of times a subtoken appears in the data. The actual vocabulary size is 33,708, and is stored in a 324kB file.
-
-#### Translation
-Translation is defined in [translate.py](translate.py). First, `Subtokenizer` tokenizes the input. The vocabulary file is the same used to tokenize the training/eval files. Next, beam search is used to find the combination of tokens that maximizes the probability outputted by the model decoder. The tokens are then converted back to strings with `Subtokenizer`.
-
-#### BLEU computation
-[compute_bleu.py](compute_bleu.py): Implementation from [https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py](https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py).
-
-### Test dataset
-The [newstest2014 files](test_data) are extracted from the [NMT Seq2Seq tutorial](https://google.github.io/seq2seq/nmt/#download-data). The raw text files are converted from the SGM format of the [WMT 2016](http://www.statmt.org/wmt16/translation-task.html) test sets.
-
-## Term definitions
-
-**Steps / Epochs**:
-* Step: unit for processing a single batch of data
-* Epoch: a complete run through the dataset
-
-Example: Consider a training a dataset with 100 examples that is divided into 20 batches with 5 examples per batch. A single training step trains the model on one batch. After 20 training steps, the model will have trained on every batch in the dataset, or one epoch.
-
-**Subtoken**: Words are referred to as tokens, and parts of words are referred to as 'subtokens'. For example, the word 'inclined' may be split into `['incline', 'd_']`. The '\_' indicates the end of the token. The subtoken vocabulary list is guaranteed to contain the alphabet (including numbers and special characters), so all words can be tokenized.
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/__init__.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/__init__.py
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/compute_bleu.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/compute_bleu.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Script to compute official BLEU score.
-
-Source:
-https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/bleu_hook.py
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import re
-import sys
-import unicodedata
-
-# pylint: disable=g-bad-import-order
-import six
-from absl import app as absl_app
-from absl import flags
-import tensorflow as tf
-# pylint: enable=g-bad-import-order
-
-from official.transformer.utils import metrics
-from official.utils.flags import core as flags_core
-
-
-class UnicodeRegex(object):
-  """Ad-hoc hack to recognize all punctuation and symbols."""
-
-  def __init__(self):
-    punctuation = self.property_chars("P")
-    self.nondigit_punct_re = re.compile(r"([^\d])([" + punctuation + r"])")
-    self.punct_nondigit_re = re.compile(r"([" + punctuation + r"])([^\d])")
-    self.symbol_re = re.compile("([" + self.property_chars("S") + "])")
-
-  def property_chars(self, prefix):
-    return "".join(six.unichr(x) for x in range(sys.maxunicode)
-                   if unicodedata.category(six.unichr(x)).startswith(prefix))
-
-
-uregex = UnicodeRegex()
-
-
-def bleu_tokenize(string):
-  r"""Tokenize a string following the official BLEU implementation.
-
-  See https://github.com/moses-smt/mosesdecoder/'
-           'blob/master/scripts/generic/mteval-v14.pl#L954-L983
-  In our case, the input string is expected to be just one line
-  and no HTML entities de-escaping is needed.
-  So we just tokenize on punctuation and symbols,
-  except when a punctuation is preceded and followed by a digit
-  (e.g. a comma/dot as a thousand/decimal separator).
-
-  Note that a numer (e.g. a year) followed by a dot at the end of sentence
-  is NOT tokenized,
-  i.e. the dot stays with the number because `s/(\p{P})(\P{N})/ $1 $2/g`
-  does not match this case (unless we add a space after each sentence).
-  However, this error is already in the original mteval-v14.pl
-  and we want to be consistent with it.
-
-  Args:
-    string: the input string
-
-  Returns:
-    a list of tokens
-  """
-  string = uregex.nondigit_punct_re.sub(r"\1 \2 ", string)
-  string = uregex.punct_nondigit_re.sub(r" \1 \2", string)
-  string = uregex.symbol_re.sub(r" \1 ", string)
-  return string.split()
-
-
-def bleu_wrapper(ref_filename, hyp_filename, case_sensitive=False):
-  """Compute BLEU for two files (reference and hypothesis translation)."""
-  ref_lines = tf.gfile.Open(ref_filename).read().strip().splitlines()
-  hyp_lines = tf.gfile.Open(hyp_filename).read().strip().splitlines()
-
-  if len(ref_lines) != len(hyp_lines):
-    raise ValueError("Reference and translation files have different number of "
-                     "lines.")
-  if not case_sensitive:
-    ref_lines = [x.lower() for x in ref_lines]
-    hyp_lines = [x.lower() for x in hyp_lines]
-  ref_tokens = [bleu_tokenize(x) for x in ref_lines]
-  hyp_tokens = [bleu_tokenize(x) for x in hyp_lines]
-  return metrics.compute_bleu(ref_tokens, hyp_tokens) * 100
-
-
-def main(unused_argv):
-  if FLAGS.bleu_variant in ("both", "uncased"):
-    score = bleu_wrapper(FLAGS.reference, FLAGS.translation, False)
-    tf.logging.info("Case-insensitive results: %f" % score)
-
-  if FLAGS.bleu_variant in ("both", "cased"):
-    score = bleu_wrapper(FLAGS.reference, FLAGS.translation, True)
-    tf.logging.info("Case-sensitive results: %f" % score)
-
-
-def define_compute_bleu_flags():
-  """Add flags for computing BLEU score."""
-  flags.DEFINE_string(
-      name="translation", default=None,
-      help=flags_core.help_wrap("File containing translated text."))
-  flags.mark_flag_as_required("translation")
-
-  flags.DEFINE_string(
-      name="reference", default=None,
-      help=flags_core.help_wrap("File containing reference translation."))
-  flags.mark_flag_as_required("reference")
-
-  flags.DEFINE_enum(
-      name="bleu_variant", short_name="bv", default="both",
-      enum_values=["both", "uncased", "cased"], case_sensitive=False,
-      help=flags_core.help_wrap(
-          "Specify one or more BLEU variants to calculate. Variants: \"cased\""
-          ", \"uncased\", or \"both\"."))
-
-
-if __name__ == "__main__":
-  tf.logging.set_verbosity(tf.logging.INFO)
-  define_compute_bleu_flags()
-  FLAGS = flags.FLAGS
-  absl_app.run(main)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/compute_bleu_test.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/compute_bleu_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Test functions in compute_blue.py."""
-
-import tempfile
-
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-
-from official.transformer import compute_bleu
-
-
-class ComputeBleuTest(tf.test.TestCase):
-
-  def _create_temp_file(self, text):
-    temp_file = tempfile.NamedTemporaryFile(delete=False)
-    with tf.gfile.Open(temp_file.name, 'w') as w:
-      w.write(text)
-    return temp_file.name
-
-  def test_bleu_same(self):
-    ref = self._create_temp_file("test 1 two 3\nmore tests!")
-    hyp = self._create_temp_file("test 1 two 3\nmore tests!")
-
-    uncased_score = compute_bleu.bleu_wrapper(ref, hyp, False)
-    cased_score = compute_bleu.bleu_wrapper(ref, hyp, True)
-    self.assertEqual(100, uncased_score)
-    self.assertEqual(100, cased_score)
-
-  def test_bleu_same_different_case(self):
-    ref = self._create_temp_file("Test 1 two 3\nmore tests!")
-    hyp = self._create_temp_file("test 1 two 3\nMore tests!")
-    uncased_score = compute_bleu.bleu_wrapper(ref, hyp, False)
-    cased_score = compute_bleu.bleu_wrapper(ref, hyp, True)
-    self.assertEqual(100, uncased_score)
-    self.assertLess(cased_score, 100)
-
-  def test_bleu_different(self):
-    ref = self._create_temp_file("Testing\nmore tests!")
-    hyp = self._create_temp_file("Dog\nCat")
-    uncased_score = compute_bleu.bleu_wrapper(ref, hyp, False)
-    cased_score = compute_bleu.bleu_wrapper(ref, hyp, True)
-    self.assertLess(uncased_score, 100)
-    self.assertLess(cased_score, 100)
-
-  def test_bleu_tokenize(self):
-    s = "Test0, 1 two, 3"
-    tokenized = compute_bleu.bleu_tokenize(s)
-    self.assertEqual(["Test0", ",", "1", "two", ",", "3"], tokenized)
-
-
-if __name__ == "__main__":
-  tf.test.main()
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/data_download.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/data_download.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Download and preprocess WMT17 ende training and evaluation datasets."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import os
-import random
-import tarfile
-
-# pylint: disable=g-bad-import-order
-import six
-from six.moves import urllib
-from absl import app as absl_app
-from absl import flags
-import tensorflow as tf
-# pylint: enable=g-bad-import-order
-
-from official.transformer.utils import tokenizer
-from official.utils.flags import core as flags_core
-
-# Data sources for training/evaluating the transformer translation model.
-# If any of the training sources are changed, then either:
-#   1) use the flag `--search` to find the best min count or
-#   2) update the _TRAIN_DATA_MIN_COUNT constant.
-# min_count is the minimum number of times a token must appear in the data
-# before it is added to the vocabulary. "Best min count" refers to the value
-# that generates a vocabulary set that is closest in size to _TARGET_VOCAB_SIZE.
-_TRAIN_DATA_SOURCES = [
-    {
-        "url": "http://data.statmt.org/wmt17/translation-task/"
-               "training-parallel-nc-v12.tgz",
-        "input": "news-commentary-v12.de-en.en",
-        "target": "news-commentary-v12.de-en.de",
-    },
-    {
-        "url": "http://www.statmt.org/wmt13/training-parallel-commoncrawl.tgz",
-        "input": "commoncrawl.de-en.en",
-        "target": "commoncrawl.de-en.de",
-    },
-    {
-        "url": "http://www.statmt.org/wmt13/training-parallel-europarl-v7.tgz",
-        "input": "europarl-v7.de-en.en",
-        "target": "europarl-v7.de-en.de",
-    },
-]
-# Use pre-defined minimum count to generate subtoken vocabulary.
-_TRAIN_DATA_MIN_COUNT = 6
-
-_EVAL_DATA_SOURCES = [
-    {
-        "url": "http://data.statmt.org/wmt17/translation-task/dev.tgz",
-        "input": "newstest2013.en",
-        "target": "newstest2013.de",
-    }
-]
-
-# Vocabulary constants
-_TARGET_VOCAB_SIZE = 32768  # Number of subtokens in the vocabulary list.
-_TARGET_THRESHOLD = 327  # Accept vocabulary if size is within this threshold
-VOCAB_FILE = "vocab.ende.%d" % _TARGET_VOCAB_SIZE
-
-# Strings to inclue in the generated files.
-_PREFIX = "wmt32k"
-_TRAIN_TAG = "train"
-_EVAL_TAG = "dev"  # Following WMT and Tensor2Tensor conventions, in which the
-                   # evaluation datasets are tagged as "dev" for development.
-
-# Number of files to split train and evaluation data
-_TRAIN_SHARDS = 100
-_EVAL_SHARDS = 1
-
-
-def find_file(path, filename, max_depth=5):
-  """Returns full filepath if the file is in path or a subdirectory."""
-  for root, dirs, files in os.walk(path):
-    if filename in files:
-      return os.path.join(root, filename)
-
-    # Don't search past max_depth
-    depth = root[len(path) + 1:].count(os.sep)
-    if depth > max_depth:
-      del dirs[:]  # Clear dirs
-  return None
-
-
-###############################################################################
-# Download and extraction functions
-###############################################################################
-def get_raw_files(raw_dir, data_source):
-  """Return raw files from source. Downloads/extracts if needed.
-
-  Args:
-    raw_dir: string directory to store raw files
-    data_source: dictionary with
-      {"url": url of compressed dataset containing input and target files
-       "input": file with data in input language
-       "target": file with data in target language}
-
-  Returns:
-    dictionary with
-      {"inputs": list of files containing data in input language
-       "targets": list of files containing corresponding data in target language
-      }
-  """
-  raw_files = {
-      "inputs": [],
-      "targets": [],
-  }  # keys
-  for d in data_source:
-    input_file, target_file = download_and_extract(
-        raw_dir, d["url"], d["input"], d["target"])
-    raw_files["inputs"].append(input_file)
-    raw_files["targets"].append(target_file)
-  return raw_files
-
-
-def download_report_hook(count, block_size, total_size):
-  """Report hook for download progress.
-
-  Args:
-    count: current block number
-    block_size: block size
-    total_size: total size
-  """
-  percent = int(count * block_size * 100 / total_size)
-  print("\r%d%%" % percent + " completed", end="\r")
-
-
-def download_from_url(path, url):
-  """Download content from a url.
-
-  Args:
-    path: string directory where file will be downloaded
-    url: string url
-
-  Returns:
-    Full path to downloaded file
-  """
-  filename = url.split("/")[-1]
-  found_file = find_file(path, filename, max_depth=0)
-  if found_file is None:
-    filename = os.path.join(path, filename)
-    tf.logging.info("Downloading from %s to %s." % (url, filename))
-    inprogress_filepath = filename + ".incomplete"
-    inprogress_filepath, _ = urllib.request.urlretrieve(
-        url, inprogress_filepath, reporthook=download_report_hook)
-    # Print newline to clear the carriage return from the download progress.
-    print()
-    tf.gfile.Rename(inprogress_filepath, filename)
-    return filename
-  else:
-    tf.logging.info("Already downloaded: %s (at %s)." % (url, found_file))
-    return found_file
-
-
-def download_and_extract(path, url, input_filename, target_filename):
-  """Extract files from downloaded compressed archive file.
-
-  Args:
-    path: string directory where the files will be downloaded
-    url: url containing the compressed input and target files
-    input_filename: name of file containing data in source language
-    target_filename: name of file containing data in target language
-
-  Returns:
-    Full paths to extracted input and target files.
-
-  Raises:
-    OSError: if the the download/extraction fails.
-  """
-  # Check if extracted files already exist in path
-  input_file = find_file(path, input_filename)
-  target_file = find_file(path, target_filename)
-  if input_file and target_file:
-    tf.logging.info("Already downloaded and extracted %s." % url)
-    return input_file, target_file
-
-  # Download archive file if it doesn't already exist.
-  compressed_file = download_from_url(path, url)
-
-  # Extract compressed files
-  tf.logging.info("Extracting %s." % compressed_file)
-  with tarfile.open(compressed_file, "r:gz") as corpus_tar:
-    corpus_tar.extractall(path)
-
-  # Return filepaths of the requested files.
-  input_file = find_file(path, input_filename)
-  target_file = find_file(path, target_filename)
-
-  if input_file and target_file:
-    return input_file, target_file
-
-  raise OSError("Download/extraction failed for url %s to path %s" %
-                (url, path))
-
-
-def txt_line_iterator(path):
-  """Iterate through lines of file."""
-  with tf.gfile.Open(path) as f:
-    for line in f:
-      yield line.strip()
-
-
-def compile_files(raw_dir, raw_files, tag):
-  """Compile raw files into a single file for each language.
-
-  Args:
-    raw_dir: Directory containing downloaded raw files.
-    raw_files: Dict containing filenames of input and target data.
-      {"inputs": list of files containing data in input language
-       "targets": list of files containing corresponding data in target language
-      }
-    tag: String to append to the compiled filename.
-
-  Returns:
-    Full path of compiled input and target files.
-  """
-  tf.logging.info("Compiling files with tag %s." % tag)
-  filename = "%s-%s" % (_PREFIX, tag)
-  input_compiled_file = os.path.join(raw_dir, filename + ".lang1")
-  target_compiled_file = os.path.join(raw_dir, filename + ".lang2")
-
-  with tf.gfile.Open(input_compiled_file, mode="w") as input_writer:
-    with tf.gfile.Open(target_compiled_file, mode="w") as target_writer:
-      for i in range(len(raw_files["inputs"])):
-        input_file = raw_files["inputs"][i]
-        target_file = raw_files["targets"][i]
-
-        tf.logging.info("Reading files %s and %s." % (input_file, target_file))
-        write_file(input_writer, input_file)
-        write_file(target_writer, target_file)
-  return input_compiled_file, target_compiled_file
-
-
-def write_file(writer, filename):
-  """Write all of lines from file using the writer."""
-  for line in txt_line_iterator(filename):
-    writer.write(line)
-    writer.write("\n")
-
-
-###############################################################################
-# Data preprocessing
-###############################################################################
-def encode_and_save_files(
-    subtokenizer, data_dir, raw_files, tag, total_shards):
-  """Save data from files as encoded Examples in TFrecord format.
-
-  Args:
-    subtokenizer: Subtokenizer object that will be used to encode the strings.
-    data_dir: The directory in which to write the examples
-    raw_files: A tuple of (input, target) data files. Each line in the input and
-      the corresponding line in target file will be saved in a tf.Example.
-    tag: String that will be added onto the file names.
-    total_shards: Number of files to divide the data into.
-
-  Returns:
-    List of all files produced.
-  """
-  # Create a file for each shard.
-  filepaths = [shard_filename(data_dir, tag, n + 1, total_shards)
-               for n in range(total_shards)]
-
-  if all_exist(filepaths):
-    tf.logging.info("Files with tag %s already exist." % tag)
-    return filepaths
-
-  tf.logging.info("Saving files with tag %s." % tag)
-  input_file = raw_files[0]
-  target_file = raw_files[1]
-
-  # Write examples to each shard in round robin order.
-  tmp_filepaths = [fname + ".incomplete" for fname in filepaths]
-  writers = [tf.python_io.TFRecordWriter(fname) for fname in tmp_filepaths]
-  counter, shard = 0, 0
-  for counter, (input_line, target_line) in enumerate(zip(
-      txt_line_iterator(input_file), txt_line_iterator(target_file))):
-    if counter > 0 and counter % 100000 == 0:
-      tf.logging.info("\tSaving case %d." % counter)
-    example = dict_to_example(
-        {"inputs": subtokenizer.encode(input_line, add_eos=True),
-         "targets": subtokenizer.encode(target_line, add_eos=True)})
-    writers[shard].write(example.SerializeToString())
-    shard = (shard + 1) % total_shards
-  for writer in writers:
-    writer.close()
-
-  for tmp_name, final_name in zip(tmp_filepaths, filepaths):
-    tf.gfile.Rename(tmp_name, final_name)
-
-  tf.logging.info("Saved %d Examples", counter + 1)
-  return filepaths
-
-
-def shard_filename(path, tag, shard_num, total_shards):
-  """Create filename for data shard."""
-  return os.path.join(
-      path, "%s-%s-%.5d-of-%.5d" % (_PREFIX, tag, shard_num, total_shards))
-
-
-def shuffle_records(fname):
-  """Shuffle records in a single file."""
-  tf.logging.info("Shuffling records in file %s" % fname)
-
-  # Rename file prior to shuffling
-  tmp_fname = fname + ".unshuffled"
-  tf.gfile.Rename(fname, tmp_fname)
-
-  reader = tf.compat.v1.io.tf_record_iterator(tmp_fname)
-  records = []
-  for record in reader:
-    records.append(record)
-    if len(records) % 100000 == 0:
-      tf.logging.info("\tRead: %d", len(records))
-
-  random.shuffle(records)
-
-  # Write shuffled records to original file name
-  with tf.python_io.TFRecordWriter(fname) as w:
-    for count, record in enumerate(records):
-      w.write(record)
-      if count > 0 and count % 100000 == 0:
-        tf.logging.info("\tWriting record: %d" % count)
-
-  tf.gfile.Remove(tmp_fname)
-
-
-def dict_to_example(dictionary):
-  """Converts a dictionary of string->int to a tf.Example."""
-  features = {}
-  for k, v in six.iteritems(dictionary):
-    features[k] = tf.train.Feature(int64_list=tf.train.Int64List(value=v))
-  return tf.train.Example(features=tf.train.Features(feature=features))
-
-
-def all_exist(filepaths):
-  """Returns true if all files in the list exist."""
-  for fname in filepaths:
-    if not tf.gfile.Exists(fname):
-      return False
-  return True
-
-
-def make_dir(path):
-  if not tf.gfile.Exists(path):
-    tf.logging.info("Creating directory %s" % path)
-    tf.gfile.MakeDirs(path)
-
-
-def main(unused_argv):
-  """Obtain training and evaluation data for the Transformer model."""
-  make_dir(FLAGS.raw_dir)
-  make_dir(FLAGS.data_dir)
-
-  # Get paths of download/extracted training and evaluation files.
-  tf.logging.info("Step 1/4: Downloading data from source")
-  train_files = get_raw_files(FLAGS.raw_dir, _TRAIN_DATA_SOURCES)
-  eval_files = get_raw_files(FLAGS.raw_dir, _EVAL_DATA_SOURCES)
-
-  # Create subtokenizer based on the training files.
-  tf.logging.info("Step 2/4: Creating subtokenizer and building vocabulary")
-  train_files_flat = train_files["inputs"] + train_files["targets"]
-  vocab_file = os.path.join(FLAGS.data_dir, VOCAB_FILE)
-  subtokenizer = tokenizer.Subtokenizer.init_from_files(
-      vocab_file, train_files_flat, _TARGET_VOCAB_SIZE, _TARGET_THRESHOLD,
-      min_count=None if FLAGS.search else _TRAIN_DATA_MIN_COUNT)
-
-  tf.logging.info("Step 3/4: Compiling training and evaluation data")
-  compiled_train_files = compile_files(FLAGS.raw_dir, train_files, _TRAIN_TAG)
-  compiled_eval_files = compile_files(FLAGS.raw_dir, eval_files, _EVAL_TAG)
-
-  # Tokenize and save data as Examples in the TFRecord format.
-  tf.logging.info("Step 4/4: Preprocessing and saving data")
-  train_tfrecord_files = encode_and_save_files(
-      subtokenizer, FLAGS.data_dir, compiled_train_files, _TRAIN_TAG,
-      _TRAIN_SHARDS)
-  encode_and_save_files(
-      subtokenizer, FLAGS.data_dir, compiled_eval_files, _EVAL_TAG,
-      _EVAL_SHARDS)
-
-  for fname in train_tfrecord_files:
-    shuffle_records(fname)
-
-
-def define_data_download_flags():
-  """Add flags specifying data download arguments."""
-  flags.DEFINE_string(
-      name="data_dir", short_name="dd", default="/tmp/translate_ende",
-      help=flags_core.help_wrap(
-          "Directory for where the translate_ende_wmt32k dataset is saved."))
-  flags.DEFINE_string(
-      name="raw_dir", short_name="rd", default="/tmp/translate_ende_raw",
-      help=flags_core.help_wrap(
-          "Path where the raw data will be downloaded and extracted."))
-  flags.DEFINE_bool(
-      name="search", default=False,
-      help=flags_core.help_wrap(
-          "If set, use binary search to find the vocabulary set with size"
-          "closest to the target size (%d)." % _TARGET_VOCAB_SIZE))
-
-
-if __name__ == "__main__":
-  tf.logging.set_verbosity(tf.logging.INFO)
-  define_data_download_flags()
-  FLAGS = flags.FLAGS
-  absl_app.run(main)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/model/__init__.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/model/__init__.py
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/model/attention_layer.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/model/attention_layer.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Implementation of multiheaded attention and self-attention layers."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf
-
-
-class Attention(tf.layers.Layer):
-  """Multi-headed attention layer."""
-
-  def __init__(self, hidden_size, num_heads, attention_dropout, train):
-    if hidden_size % num_heads != 0:
-      raise ValueError("Hidden size must be evenly divisible by the number of "
-                       "heads.")
-
-    super(Attention, self).__init__()
-    self.hidden_size = hidden_size
-    self.num_heads = num_heads
-    self.attention_dropout = attention_dropout
-    self.train = train
-
-    # Layers for linearly projecting the queries, keys, and values.
-    self.q_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="q")
-    self.k_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="k")
-    self.v_dense_layer = tf.layers.Dense(hidden_size, use_bias=False, name="v")
-
-    self.output_dense_layer = tf.layers.Dense(hidden_size, use_bias=False,
-                                              name="output_transform")
-
-  def split_heads(self, x):
-    """Split x into different heads, and transpose the resulting value.
-
-    The tensor is transposed to insure the inner dimensions hold the correct
-    values during the matrix multiplication.
-
-    Args:
-      x: A tensor with shape [batch_size, length, hidden_size]
-
-    Returns:
-      A tensor with shape [batch_size, num_heads, length, hidden_size/num_heads]
-    """
-    with tf.name_scope("split_heads"):
-      batch_size = tf.shape(x)[0]
-      length = tf.shape(x)[1]
-
-      # Calculate depth of last dimension after it has been split.
-      depth = (self.hidden_size // self.num_heads)
-
-      # Split the last dimension
-      x = tf.reshape(x, [batch_size, length, self.num_heads, depth])
-
-      # Transpose the result
-      return tf.transpose(x, [0, 2, 1, 3])
-
-  def combine_heads(self, x):
-    """Combine tensor that has been split.
-
-    Args:
-      x: A tensor [batch_size, num_heads, length, hidden_size/num_heads]
-
-    Returns:
-      A tensor with shape [batch_size, length, hidden_size]
-    """
-    with tf.name_scope("combine_heads"):
-      batch_size = tf.shape(x)[0]
-      length = tf.shape(x)[2]
-      x = tf.transpose(x, [0, 2, 1, 3])  # --> [batch, length, num_heads, depth]
-      return tf.reshape(x, [batch_size, length, self.hidden_size])
-
-  def call(self, x, y, bias, cache=None):
-    """Apply attention mechanism to x and y.
-
-    Args:
-      x: a tensor with shape [batch_size, length_x, hidden_size]
-      y: a tensor with shape [batch_size, length_y, hidden_size]
-      bias: attention bias that will be added to the result of the dot product.
-      cache: (Used during prediction) dictionary with tensors containing results
-        of previous attentions. The dictionary must have the items:
-            {"k": tensor with shape [batch_size, i, key_channels],
-             "v": tensor with shape [batch_size, i, value_channels]}
-        where i is the current decoded length.
-
-    Returns:
-      Attention layer output with shape [batch_size, length_x, hidden_size]
-    """
-    # Linearly project the query (q), key (k) and value (v) using different
-    # learned projections. This is in preparation of splitting them into
-    # multiple heads. Multi-head attention uses multiple queries, keys, and
-    # values rather than regular attention (which uses a single q, k, v).
-    q = self.q_dense_layer(x)
-    k = self.k_dense_layer(y)
-    v = self.v_dense_layer(y)
-
-    if cache is not None:
-      # Combine cached keys and values with new keys and values.
-      k = tf.concat([cache["k"], k], axis=1)
-      v = tf.concat([cache["v"], v], axis=1)
-
-      # Update cache
-      cache["k"] = k
-      cache["v"] = v
-
-    # Split q, k, v into heads.
-    q = self.split_heads(q)
-    k = self.split_heads(k)
-    v = self.split_heads(v)
-
-    # Scale q to prevent the dot product between q and k from growing too large.
-    depth = (self.hidden_size // self.num_heads)
-    q *= depth ** -0.5
-
-    # Calculate dot product attention
-    logits = tf.matmul(q, k, transpose_b=True)
-    logits += bias
-    weights = tf.nn.softmax(logits, name="attention_weights")
-    if self.train:
-      weights = tf.nn.dropout(weights, 1.0 - self.attention_dropout)
-    attention_output = tf.matmul(weights, v)
-
-    # Recombine heads --> [batch_size, length, hidden_size]
-    attention_output = self.combine_heads(attention_output)
-
-    # Run the combined outputs through another linear projection layer.
-    attention_output = self.output_dense_layer(attention_output)
-    return attention_output
-
-
-class SelfAttention(Attention):
-  """Multiheaded self-attention layer."""
-
-  def call(self, x, bias, cache=None):
-    return super(SelfAttention, self).call(x, x, bias, cache)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/model/beam_search.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/model/beam_search.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Beam search to find the translated sequence with the highest probability.
-
-Source implementation from Tensor2Tensor:
-https://github.com/tensorflow/tensor2tensor/blob/master/tensor2tensor/utils/beam_search.py
-"""
-
-import tensorflow as tf
-from tensorflow.python.util import nest
-
-# Default value for INF
-INF = 1. * 1e7
-
-
-class _StateKeys(object):
-  """Keys to dictionary storing the state of the beam search loop."""
-
-  # Variable storing the loop index.
-  CUR_INDEX = "CUR_INDEX"
-
-  # Top sequences that are alive for each batch item. Alive sequences are ones
-  # that have not generated an EOS token. Sequences that reach EOS are marked as
-  # finished and moved to the FINISHED_SEQ tensor.
-  # Has shape [batch_size, beam_size, CUR_INDEX + 1]
-  ALIVE_SEQ = "ALIVE_SEQ"
-  # Log probabilities of each alive sequence. Shape [batch_size, beam_size]
-  ALIVE_LOG_PROBS = "ALIVE_LOG_PROBS"
-  # Dictionary of cached values for each alive sequence. The cache stores
-  # the encoder output, attention bias, and the decoder attention output from
-  # the previous iteration.
-  ALIVE_CACHE = "ALIVE_CACHE"
-
-  # Top finished sequences for each batch item.
-  # Has shape [batch_size, beam_size, CUR_INDEX + 1]. Sequences that are
-  # shorter than CUR_INDEX + 1 are padded with 0s.
-  FINISHED_SEQ = "FINISHED_SEQ"
-  # Scores for each finished sequence. Score = log probability / length norm
-  # Shape [batch_size, beam_size]
-  FINISHED_SCORES = "FINISHED_SCORES"
-  # Flags indicating which sequences in the finished sequences are finished.
-  # At the beginning, all of the sequences in FINISHED_SEQ are filler values.
-  # True -> finished sequence, False -> filler. Shape [batch_size, beam_size]
-  FINISHED_FLAGS = "FINISHED_FLAGS"
-
-
-class SequenceBeamSearch(object):
-  """Implementation of beam search loop."""
-
-  def __init__(self, symbols_to_logits_fn, vocab_size, batch_size,
-               beam_size, alpha, max_decode_length, eos_id):
-    self.symbols_to_logits_fn = symbols_to_logits_fn
-    self.vocab_size = vocab_size
-    self.batch_size = batch_size
-    self.beam_size = beam_size
-    self.alpha = alpha
-    self.max_decode_length = max_decode_length
-    self.eos_id = eos_id
-
-  def search(self, initial_ids, initial_cache):
-    """Beam search for sequences with highest scores."""
-    state, state_shapes = self._create_initial_state(initial_ids, initial_cache)
-
-    finished_state = tf.while_loop(
-        self._continue_search, self._search_step, loop_vars=[state],
-        shape_invariants=[state_shapes], parallel_iterations=1, back_prop=False)
-    finished_state = finished_state[0]
-
-    alive_seq = finished_state[_StateKeys.ALIVE_SEQ]
-    alive_log_probs = finished_state[_StateKeys.ALIVE_LOG_PROBS]
-    finished_seq = finished_state[_StateKeys.FINISHED_SEQ]
-    finished_scores = finished_state[_StateKeys.FINISHED_SCORES]
-    finished_flags = finished_state[_StateKeys.FINISHED_FLAGS]
-
-    # Account for corner case where there are no finished sequences for a
-    # particular batch item. In that case, return alive sequences for that batch
-    # item.
-    finished_seq = tf.where(
-        tf.reduce_any(finished_flags, 1), finished_seq, alive_seq)
-    finished_scores = tf.where(
-        tf.reduce_any(finished_flags, 1), finished_scores, alive_log_probs)
-    return finished_seq, finished_scores
-
-  def _create_initial_state(self, initial_ids, initial_cache):
-    """Return initial state dictionary and its shape invariants.
-
-    Args:
-      initial_ids: initial ids to pass into the symbols_to_logits_fn.
-        int tensor with shape [batch_size, 1]
-      initial_cache: dictionary storing values to be passed into the
-        symbols_to_logits_fn.
-
-    Returns:
-        state and shape invariant dictionaries with keys from _StateKeys
-    """
-    # Current loop index (starts at 0)
-    cur_index = tf.constant(0)
-
-    # Create alive sequence with shape [batch_size, beam_size, 1]
-    alive_seq = _expand_to_beam_size(initial_ids, self.beam_size)
-    alive_seq = tf.expand_dims(alive_seq, axis=2)
-
-    # Create tensor for storing initial log probabilities.
-    # Assume initial_ids are prob 1.0
-    initial_log_probs = tf.constant(
-        [[0.] + [-float("inf")] * (self.beam_size - 1)])
-    alive_log_probs = tf.tile(initial_log_probs, [self.batch_size, 1])
-
-    # Expand all values stored in the dictionary to the beam size, so that each
-    # beam has a separate cache.
-    alive_cache = nest.map_structure(
-        lambda t: _expand_to_beam_size(t, self.beam_size), initial_cache)
-
-    # Initialize tensor storing finished sequences with filler values.
-    finished_seq = tf.zeros(tf.shape(alive_seq), tf.int32)
-
-    # Set scores of the initial finished seqs to negative infinity.
-    finished_scores = tf.ones([self.batch_size, self.beam_size]) * -INF
-
-    # Initialize finished flags with all False values.
-    finished_flags = tf.zeros([self.batch_size, self.beam_size], tf.bool)
-
-    # Create state dictionary
-    state = {
-        _StateKeys.CUR_INDEX: cur_index,
-        _StateKeys.ALIVE_SEQ: alive_seq,
-        _StateKeys.ALIVE_LOG_PROBS: alive_log_probs,
-        _StateKeys.ALIVE_CACHE: alive_cache,
-        _StateKeys.FINISHED_SEQ: finished_seq,
-        _StateKeys.FINISHED_SCORES: finished_scores,
-        _StateKeys.FINISHED_FLAGS: finished_flags
-    }
-
-    # Create state invariants for each value in the state dictionary. Each
-    # dimension must be a constant or None. A None dimension means either:
-    #   1) the dimension's value is a tensor that remains the same but may
-    #      depend on the input sequence to the model (e.g. batch size).
-    #   2) the dimension may have different values on different iterations.
-    state_shape_invariants = {
-        _StateKeys.CUR_INDEX: tf.TensorShape([]),
-        _StateKeys.ALIVE_SEQ: tf.TensorShape([None, self.beam_size, None]),
-        _StateKeys.ALIVE_LOG_PROBS: tf.TensorShape([None, self.beam_size]),
-        _StateKeys.ALIVE_CACHE: nest.map_structure(
-            _get_shape_keep_last_dim, alive_cache),
-        _StateKeys.FINISHED_SEQ: tf.TensorShape([None, self.beam_size, None]),
-        _StateKeys.FINISHED_SCORES: tf.TensorShape([None, self.beam_size]),
-        _StateKeys.FINISHED_FLAGS: tf.TensorShape([None, self.beam_size])
-    }
-
-    return state, state_shape_invariants
-
-  def _continue_search(self, state):
-    """Return whether to continue the search loop.
-
-    The loops should terminate when
-      1) when decode length has been reached, or
-      2) when the worst score in the finished sequences is better than the best
-         score in the alive sequences (i.e. the finished sequences are provably
-         unchanging)
-
-    Args:
-      state: A dictionary with the current loop state.
-
-    Returns:
-      Bool tensor with value True if loop should continue, False if loop should
-      terminate.
-    """
-    i = state[_StateKeys.CUR_INDEX]
-    alive_log_probs = state[_StateKeys.ALIVE_LOG_PROBS]
-    finished_scores = state[_StateKeys.FINISHED_SCORES]
-    finished_flags = state[_StateKeys.FINISHED_FLAGS]
-
-    not_at_max_decode_length = tf.less(i, self.max_decode_length)
-
-    # Calculate largest length penalty (the larger penalty, the better score).
-    max_length_norm = _length_normalization(self.alpha, self.max_decode_length)
-    # Get the best possible scores from alive sequences.
-    best_alive_scores = alive_log_probs[:, 0] / max_length_norm
-
-    # Compute worst score in finished sequences for each batch element
-    finished_scores *= tf.to_float(finished_flags)  # set filler scores to zero
-    lowest_finished_scores = tf.reduce_min(finished_scores, axis=1)
-
-    # If there are no finished sequences in a batch element, then set the lowest
-    # finished score to -INF for that element.
-    finished_batches = tf.reduce_any(finished_flags, 1)
-    lowest_finished_scores += (1. - tf.to_float(finished_batches)) * -INF
-
-    worst_finished_score_better_than_best_alive_score = tf.reduce_all(
-        tf.greater(lowest_finished_scores, best_alive_scores)
-    )
-
-    return tf.logical_and(
-        not_at_max_decode_length,
-        tf.logical_not(worst_finished_score_better_than_best_alive_score)
-    )
-
-  def _search_step(self, state):
-    """Beam search loop body.
-
-    Grow alive sequences by a single ID. Sequences that have reached the EOS
-    token are marked as finished. The alive and finished sequences with the
-    highest log probabilities and scores are returned.
-
-    A sequence's finished score is calculating by dividing the log probability
-    by the length normalization factor. Without length normalization, the
-    search is more likely to return shorter sequences.
-
-    Args:
-      state: A dictionary with the current loop state.
-
-    Returns:
-      new state dictionary.
-    """
-    # Grow alive sequences by one token.
-    new_seq, new_log_probs, new_cache = self._grow_alive_seq(state)
-    # Collect top beam_size alive sequences
-    alive_state = self._get_new_alive_state(new_seq, new_log_probs, new_cache)
-
-    # Combine newly finished sequences with existing finished sequences, and
-    # collect the top k scoring sequences.
-    finished_state = self._get_new_finished_state(state, new_seq, new_log_probs)
-
-    # Increment loop index and create new state dictionary
-    new_state = {_StateKeys.CUR_INDEX: state[_StateKeys.CUR_INDEX] + 1}
-    new_state.update(alive_state)
-    new_state.update(finished_state)
-    return [new_state]
-
-  def _grow_alive_seq(self, state):
-    """Grow alive sequences by one token, and collect top 2*beam_size sequences.
-
-    2*beam_size sequences are collected because some sequences may have reached
-    the EOS token. 2*beam_size ensures that at least beam_size sequences are
-    still alive.
-
-    Args:
-      state: A dictionary with the current loop state.
-    Returns:
-      Tuple of
-      (Top 2*beam_size sequences [batch_size, 2 * beam_size, cur_index + 1],
-       Scores of returned sequences [batch_size, 2 * beam_size],
-       New alive cache, for each of the 2 * beam_size sequences)
-    """
-    i = state[_StateKeys.CUR_INDEX]
-    alive_seq = state[_StateKeys.ALIVE_SEQ]
-    alive_log_probs = state[_StateKeys.ALIVE_LOG_PROBS]
-    alive_cache = state[_StateKeys.ALIVE_CACHE]
-
-    beams_to_keep = 2 * self.beam_size
-
-    # Get logits for the next candidate IDs for the alive sequences. Get the new
-    # cache values at the same time.
-    flat_ids = _flatten_beam_dim(alive_seq)  # [batch_size * beam_size]
-    flat_cache = nest.map_structure(_flatten_beam_dim, alive_cache)
-
-    flat_logits, flat_cache = self.symbols_to_logits_fn(flat_ids, i, flat_cache)
-
-    # Unflatten logits to shape [batch_size, beam_size, vocab_size]
-    logits = _unflatten_beam_dim(flat_logits, self.batch_size, self.beam_size)
-    new_cache = nest.map_structure(
-        lambda t: _unflatten_beam_dim(t, self.batch_size, self.beam_size),
-        flat_cache)
-
-    # Convert logits to normalized log probs
-    candidate_log_probs = _log_prob_from_logits(logits)
-
-    # Calculate new log probabilities if each of the alive sequences were
-    # extended # by the the candidate IDs.
-    # Shape [batch_size, beam_size, vocab_size]
-    log_probs = candidate_log_probs + tf.expand_dims(alive_log_probs, axis=2)
-
-    # Each batch item has beam_size * vocab_size candidate sequences. For each
-    # batch item, get the k candidates with the highest log probabilities.
-    flat_log_probs = tf.reshape(log_probs,
-                                [-1, self.beam_size * self.vocab_size])
-    topk_log_probs, topk_indices = tf.nn.top_k(flat_log_probs, k=beams_to_keep)
-
-    # Extract the alive sequences that generate the highest log probabilities
-    # after being extended.
-    topk_beam_indices = topk_indices // self.vocab_size
-    topk_seq, new_cache = _gather_beams(
-        [alive_seq, new_cache], topk_beam_indices, self.batch_size,
-        beams_to_keep)
-
-    # Append the most probable IDs to the topk sequences
-    topk_ids = topk_indices % self.vocab_size
-    topk_ids = tf.expand_dims(topk_ids, axis=2)
-    topk_seq = tf.concat([topk_seq, topk_ids], axis=2)
-    return topk_seq, topk_log_probs, new_cache
-
-  def _get_new_alive_state(self, new_seq, new_log_probs, new_cache):
-    """Gather the top k sequences that are still alive.
-
-    Args:
-      new_seq: New sequences generated by growing the current alive sequences
-        int32 tensor with shape [batch_size, 2 * beam_size, cur_index + 1]
-      new_log_probs: Log probabilities of new sequences
-        float32 tensor with shape [batch_size, beam_size]
-      new_cache: Dict of cached values for each sequence.
-
-    Returns:
-      Dictionary with alive keys from _StateKeys:
-        {Top beam_size sequences that are still alive (don't end with eos_id)
-         Log probabilities of top alive sequences
-         Dict cache storing decoder states for top alive sequences}
-    """
-    # To prevent finished sequences from being considered, set log probs to -INF
-    new_finished_flags = tf.equal(new_seq[:, :, -1], self.eos_id)
-    new_log_probs += tf.to_float(new_finished_flags) * -INF
-
-    top_alive_seq, top_alive_log_probs, top_alive_cache = _gather_topk_beams(
-        [new_seq, new_log_probs, new_cache], new_log_probs, self.batch_size,
-        self.beam_size)
-
-    return {
-        _StateKeys.ALIVE_SEQ: top_alive_seq,
-        _StateKeys.ALIVE_LOG_PROBS: top_alive_log_probs,
-        _StateKeys.ALIVE_CACHE: top_alive_cache
-    }
-
-  def _get_new_finished_state(self, state, new_seq, new_log_probs):
-    """Combine new and old finished sequences, and gather the top k sequences.
-
-    Args:
-      state: A dictionary with the current loop state.
-      new_seq: New sequences generated by growing the current alive sequences
-        int32 tensor with shape [batch_size, beam_size, i + 1]
-      new_log_probs: Log probabilities of new sequences
-        float32 tensor with shape [batch_size, beam_size]
-
-    Returns:
-      Dictionary with finished keys from _StateKeys:
-        {Top beam_size finished sequences based on score,
-         Scores of finished sequences,
-         Finished flags of finished sequences}
-    """
-    i = state[_StateKeys.CUR_INDEX]
-    finished_seq = state[_StateKeys.FINISHED_SEQ]
-    finished_scores = state[_StateKeys.FINISHED_SCORES]
-    finished_flags = state[_StateKeys.FINISHED_FLAGS]
-
-    # First append a column of 0-ids to finished_seq to increment the length.
-    # New shape of finished_seq: [batch_size, beam_size, i + 1]
-    finished_seq = tf.concat(
-        [finished_seq,
-         tf.zeros([self.batch_size, self.beam_size, 1], tf.int32)], axis=2)
-
-    # Calculate new seq scores from log probabilities.
-    length_norm = _length_normalization(self.alpha, i + 1)
-    new_scores = new_log_probs / length_norm
-
-    # Set the scores of the still-alive seq in new_seq to large negative values.
-    new_finished_flags = tf.equal(new_seq[:, :, -1], self.eos_id)
-    new_scores += (1. - tf.to_float(new_finished_flags)) * -INF
-
-    # Combine sequences, scores, and flags.
-    finished_seq = tf.concat([finished_seq, new_seq], axis=1)
-    finished_scores = tf.concat([finished_scores, new_scores], axis=1)
-    finished_flags = tf.concat([finished_flags, new_finished_flags], axis=1)
-
-    # Return the finished sequences with the best scores.
-    top_finished_seq, top_finished_scores, top_finished_flags = (
-        _gather_topk_beams([finished_seq, finished_scores, finished_flags],
-                           finished_scores, self.batch_size, self.beam_size))
-
-    return {
-        _StateKeys.FINISHED_SEQ: top_finished_seq,
-        _StateKeys.FINISHED_SCORES: top_finished_scores,
-        _StateKeys.FINISHED_FLAGS: top_finished_flags
-    }
-
-
-def sequence_beam_search(
-    symbols_to_logits_fn, initial_ids, initial_cache, vocab_size, beam_size,
-    alpha, max_decode_length, eos_id):
-  """Search for sequence of subtoken ids with the largest probability.
-
-  Args:
-    symbols_to_logits_fn: A function that takes in ids, index, and cache as
-      arguments. The passed in arguments will have shape:
-        ids -> [batch_size * beam_size, index]
-        index -> [] (scalar)
-        cache -> nested dictionary of tensors [batch_size * beam_size, ...]
-      The function must return logits and new cache.
-        logits -> [batch * beam_size, vocab_size]
-        new cache -> same shape/structure as inputted cache
-    initial_ids: Starting ids for each batch item.
-      int32 tensor with shape [batch_size]
-    initial_cache: dict containing starting decoder variables information
-    vocab_size: int size of tokens
-    beam_size: int number of beams
-    alpha: float defining the strength of length normalization
-    max_decode_length: maximum length to decoded sequence
-    eos_id: int id of eos token, used to determine when a sequence has finished
-
-  Returns:
-    Top decoded sequences [batch_size, beam_size, max_decode_length]
-    sequence scores [batch_size, beam_size]
-  """
-  batch_size = tf.shape(initial_ids)[0]
-  sbs = SequenceBeamSearch(symbols_to_logits_fn, vocab_size, batch_size,
-                           beam_size, alpha, max_decode_length, eos_id)
-  return sbs.search(initial_ids, initial_cache)
-
-
-def _log_prob_from_logits(logits):
-  return logits - tf.reduce_logsumexp(logits, axis=2, keep_dims=True)
-
-
-def _length_normalization(alpha, length):
-  """Return length normalization factor."""
-  return tf.pow(((5. + tf.to_float(length)) / 6.), alpha)
-
-
-def _expand_to_beam_size(tensor, beam_size):
-  """Tiles a given tensor by beam_size.
-
-  Args:
-    tensor: tensor to tile [batch_size, ...]
-    beam_size: How much to tile the tensor by.
-
-  Returns:
-    Tiled tensor [batch_size, beam_size, ...]
-  """
-  tensor = tf.expand_dims(tensor, axis=1)
-  tile_dims = [1] * tensor.shape.ndims
-  tile_dims[1] = beam_size
-
-  return tf.tile(tensor, tile_dims)
-
-
-def _shape_list(tensor):
-  """Return a list of the tensor's shape, and ensure no None values in list."""
-  # Get statically known shape (may contain None's for unknown dimensions)
-  shape = tensor.get_shape().as_list()
-
-  # Ensure that the shape values are not None
-  dynamic_shape = tf.shape(tensor)
-  for i in range(len(shape)):  # pylint: disable=consider-using-enumerate
-    if shape[i] is None:
-      shape[i] = dynamic_shape[i]
-  return shape
-
-
-def _get_shape_keep_last_dim(tensor):
-  shape_list = _shape_list(tensor)
-
-  # Only the last
-  for i in range(len(shape_list) - 1):
-    shape_list[i] = None
-
-  if isinstance(shape_list[-1], tf.Tensor):
-    shape_list[-1] = None
-  return tf.TensorShape(shape_list)
-
-
-def _flatten_beam_dim(tensor):
-  """Reshapes first two dimensions in to single dimension.
-
-  Args:
-    tensor: Tensor to reshape of shape [A, B, ...]
-
-  Returns:
-    Reshaped tensor of shape [A*B, ...]
-  """
-  shape = _shape_list(tensor)
-  shape[0] *= shape[1]
-  shape.pop(1)  # Remove beam dim
-  return tf.reshape(tensor, shape)
-
-
-def _unflatten_beam_dim(tensor, batch_size, beam_size):
-  """Reshapes first dimension back to [batch_size, beam_size].
-
-  Args:
-    tensor: Tensor to reshape of shape [batch_size*beam_size, ...]
-    batch_size: Tensor, original batch size.
-    beam_size: int, original beam size.
-
-  Returns:
-    Reshaped tensor of shape [batch_size, beam_size, ...]
-  """
-  shape = _shape_list(tensor)
-  new_shape = [batch_size, beam_size] + shape[1:]
-  return tf.reshape(tensor, new_shape)
-
-
-def _gather_beams(nested, beam_indices, batch_size, new_beam_size):
-  """Gather beams from nested structure of tensors.
-
-  Each tensor in nested represents a batch of beams, where beam refers to a
-  single search state (beam search involves searching through multiple states
-  in parallel).
-
-  This function is used to gather the top beams, specified by
-  beam_indices, from the nested tensors.
-
-  Args:
-    nested: Nested structure (tensor, list, tuple or dict) containing tensors
-      with shape [batch_size, beam_size, ...].
-    beam_indices: int32 tensor with shape [batch_size, new_beam_size]. Each
-     value in beam_indices must be between [0, beam_size), and are not
-     necessarily unique.
-    batch_size: int size of batch
-    new_beam_size: int number of beams to be pulled from the nested tensors.
-
-  Returns:
-    Nested structure containing tensors with shape
-      [batch_size, new_beam_size, ...]
-  """
-  # Computes the i'th coodinate that contains the batch index for gather_nd.
-  # Batch pos is a tensor like [[0,0,0,0,],[1,1,1,1],..].
-  batch_pos = tf.range(batch_size * new_beam_size) // new_beam_size
-  batch_pos = tf.reshape(batch_pos, [batch_size, new_beam_size])
-
-  # Create coordinates to be passed to tf.gather_nd. Stacking creates a tensor
-  # with shape [batch_size, beam_size, 2], where the last dimension contains
-  # the (i, j) gathering coordinates.
-  coordinates = tf.stack([batch_pos, beam_indices], axis=2)
-
-  return nest.map_structure(
-      lambda state: tf.gather_nd(state, coordinates), nested)
-
-
-def _gather_topk_beams(nested, score_or_log_prob, batch_size, beam_size):
-  """Gather top beams from nested structure."""
-  _, topk_indexes = tf.nn.top_k(score_or_log_prob, k=beam_size)
-  return _gather_beams(nested, topk_indexes, batch_size, beam_size)
--- a/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/model/beam_search_test.py
+++ b/TensorFlow/ComputeVision/Accuracy_Validation/ResNet50_Official/official/transformer/model/beam_search_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Test beam search helper methods."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow as tf  # pylint: disable=g-bad-import-order
-
-from official.transformer.model import beam_search
-
-
-class BeamSearchHelperTests(tf.test.TestCase):
-
-  def test_expand_to_beam_size(self):
-    x = tf.ones([7, 4, 2, 5])
-    x = beam_search._expand_to_beam_size(x, 3)
-    with self.test_session() as sess:
-      shape = sess.run(tf.shape(x))
-    self.assertAllEqual([7, 3, 4, 2, 5], shape)
-
-  def test_shape_list(self):
-    y = tf.placeholder(dtype=tf.int32, shape=[])
-    x = tf.ones([7, y, 2, 5])
-    shape = beam_search._shape_list(x)
-    self.assertIsInstance(shape[0], int)
-    self.assertIsInstance(shape[1], tf.Tensor)
-    self.assertIsInstance(shape[2], int)
-    self.assertIsInstance(shape[3], int)
-
-  def test_get_shape_keep_last_dim(self):
-    y = tf.constant(4.0)
-    x = tf.ones([7, tf.to_int32(tf.sqrt(y)), 2, 5])
-    shape = beam_search._get_shape_keep_last_dim(x)
-    self.assertAllEqual([None, None, None, 5],
-                        shape.as_list())
-
-  def test_flatten_beam_dim(self):
-    x = tf.ones([7, 4, 2, 5])
-    x = beam_search._flatten_beam_dim(x)
-    with self.test_session() as sess:
-      shape = sess.run(tf.shape(x))
-    self.assertAllEqual([28, 2, 5], shape)
-
-  def test_unflatten_beam_dim(self):
-    x = tf.ones([28, 2, 5])
-    x = beam_search._unflatten_beam_dim(x, 7, 4)
-    with self.test_session() as sess:
-      shape = sess.run(tf.shape(x))
-    self.assertAllEqual([7, 4, 2, 5], shape)
-
-  def test_gather_beams(self):
-    x = tf.reshape(tf.range(24), [2, 3, 4])
-    # x looks like:  [[[ 0  1  2  3]
-    #                  [ 4  5  6  7]
-    #                  [ 8  9 10 11]]
-    #
-    #                 [[12 13 14 15]
-    #                  [16 17 18 19]
-    #                  [20 21 22 23]]]
-
-    y = beam_search._gather_beams(x, [[1, 2], [0, 2]], 2, 2)
-    with self.test_session() as sess:
-      y = sess.run(y)
-
-    self.assertAllEqual([[[4, 5, 6, 7],
-                          [8, 9, 10, 11]],
-                         [[12, 13, 14, 15],
-                          [20, 21, 22, 23]]],
-                        y)
-
-  def test_gather_topk_beams(self):
-    x = tf.reshape(tf.range(24), [2, 3, 4])
-    x_scores = [[0, 1, 1], [1, 0, 1]]
-
-    y = beam_search._gather_topk_beams(x, x_scores, 2, 2)
-    with self.test_session() as sess:
-      y = sess.run(y)
-
-    self.assertAllEqual([[[4, 5, 6, 7],
-                          [8, 9, 10, 11]],
-                         [[12, 13, 14, 15],
-                          [20, 21, 22, 23]]],
-                        y)
-
-
-if __name__ == "__main__":
-  tf.test.main()