update some TF file

cf66c525 · qianyj · 6b6f8b0c · cf66c525 · cf66c525 · cf66c525
Commit cf66c525 authored Apr 15, 2022 by qianyj
20 changed files
--- a/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/test_data/tfrecord_image_generator.py
+++ b/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/test_data/tfrecord_image_generator.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Generate black and white test TFRecords with Example protos.
+Each record within the TFRecord file is a
+serialized Example proto. The Example proto contains the following fields:
+  image/encoded: string containing JPEG encoded image in RGB colorspace
+  image/height: integer, image height in pixels
+  image/width: integer, image width in pixels
+  image/colorspace: string, specifying the colorspace, always 'RGB'
+  image/channels: integer, specifying the number of channels, always 3
+  image/format: string, specifying the format, always'JPEG'
+  image/filename: string containing the basename of the image file
+            e.g. 'n01440764_10026.JPEG' or 'ILSVRC2012_val_00000293.JPEG'
+  image/class/label: integer specifying the index in a classification layer.
+    The label ranges from [1, 1000] where 0 is not used.
+  image/class/synset: string specifying the unique ID of the label,
+    e.g. 'n01440764'
+  image/class/text: string specifying the human-readable version of the label
+    e.g. 'red fox, Vulpes vulpes'
+  image/object/bbox/xmin: list of integers specifying the 0+ human annotated
+    bounding boxes
+  image/object/bbox/xmax: list of integers specifying the 0+ human annotated
+    bounding boxes
+  image/object/bbox/ymin: list of integers specifying the 0+ human annotated
+    bounding boxes
+  image/object/bbox/ymax: list of integers specifying the 0+ human annotated
+    bounding boxes
+  image/object/bbox/label: integer specifying the index in a classification
+    layer. The label ranges from [1, 1000] where 0 is not used. Note this is
+    always identical to the image label.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import random
+import numpy as np
+import six
+import tensorflow.compat.v1 as tf
+def _int64_feature(value):
+  """Wrapper for inserting int64 features into Example proto."""
+  if not isinstance(value, list):
+    value = [value]
+  return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
+def _float_feature(value):
+  """Wrapper for inserting float features into Example proto."""
+  if not isinstance(value, list):
+    value = [value]
+  return tf.train.Feature(float_list=tf.train.FloatList(value=value))
+def _bytes_feature(value):
+  """Wrapper for inserting bytes features into Example proto."""
+  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
+def _convert_to_example(filename, image_buffer, label, synset, human, bbox,
+                        height, width):
+  """Build an Example proto for an example.
+  Args:
+    filename: string, path to an image file, e.g., '/path/to/example.JPG'
+    image_buffer: bytes, JPEG encoding of RGB image
+    label: integer, identifier for the ground truth for the network
+    synset: string, unique WordNet ID specifying the label, e.g., 'n02323233'
+    human: string, human-readable label, e.g., 'red fox, Vulpes vulpes'
+    bbox: list of bounding boxes; each box is a list of integers
+      specifying [xmin, ymin, xmax, ymax]. All boxes are assumed to belong to
+      the same label as the image label.
+    height: integer, image height in pixels
+    width: integer, image width in pixels
+  Returns:
+    Example proto
+  """
+  xmin = []
+  ymin = []
+  xmax = []
+  ymax = []
+  for b in bbox:
+    assert len(b) == 4
+    # pylint: disable=expression-not-assigned
+    [l.append(point) for l, point in zip([xmin, ymin, xmax, ymax], b)]
+    # pylint: enable=expression-not-assigned
+  colorspace = b'RGB'
+  channels = 3
+  image_format = b'JPEG'
+  example = tf.train.Example(features=tf.train.Features(feature={
+      'image/height': _int64_feature(height),
+      'image/width': _int64_feature(width),
+      'image/colorspace': _bytes_feature(colorspace),
+      'image/channels': _int64_feature(channels),
+      'image/class/label': _int64_feature(label),
+      'image/class/synset': _bytes_feature(six.ensure_binary(synset)),
+      'image/class/text': _bytes_feature(six.ensure_binary(human)),
+      'image/object/bbox/xmin': _float_feature(xmin),
+      'image/object/bbox/xmax': _float_feature(xmax),
+      'image/object/bbox/ymin': _float_feature(ymin),
+      'image/object/bbox/ymax': _float_feature(ymax),
+      'image/object/bbox/label': _int64_feature([label] * len(xmin)),
+      'image/format': _bytes_feature(image_format),
+      'image/filename': _bytes_feature(os.path.basename(six.ensure_binary(
+          filename))),
+      'image/encoded': _bytes_feature(image_buffer)}))
+  return example
+class ImageCoder(object):
+  """Helper class that provides TensorFlow image coding utilities."""
+  def __init__(self):
+    # Create a single Session to run all image coding calls.
+    self._sess = tf.Session()
+    # Initializes function that converts PNG to JPEG data.
+    self._image = tf.placeholder(dtype=tf.uint8)
+    self._encode_jpeg = tf.image.encode_jpeg(
+        self._image, format='rgb', quality=100)
+  def encode_jpeg(self, image):
+    jpeg_image = self._sess.run(self._encode_jpeg,
+                                feed_dict={self._image: image})
+    return jpeg_image
+def _process_image(coder, name):
+  """Process a single image file.
+  If name is "train", a black image is returned. Otherwise, a white image is
+  returned.
+  Args:
+    coder: instance of ImageCoder to provide TensorFlow image coding utils.
+    name: string, unique identifier specifying the data set.
+  Returns:
+    image_buffer: bytes, JPEG encoding of RGB image.
+    height: integer, image height in pixels.
+    width: integer, image width in pixels.
+  """
+  # Read the image file.
+  value = 0 if name == 'train' else 255
+  height = random.randint(30, 299)
+  width = random.randint(30, 299)
+  image = np.full((height, width, 3), value, np.uint8)
+  jpeg_data = coder.encode_jpeg(image)
+  return jpeg_data, height, width
+def _process_dataset(output_directory, num_classes, coder, name, num_images,
+                     num_shards):
+  """Process a complete data set and save it as a TFRecord.
+  Args:
+    output_directory: Where to put outputs.
+    num_classes: number of classes.
+    coder: Instance of an ImageCoder.
+    name: string, unique identifier specifying the data set.
+    num_images: number of images to generate.
+    num_shards: integer number of shards to create.
+  """
+  files_per_shard = num_images // num_shards
+  for shard in range(num_shards):
+    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
+    output_file = os.path.join(output_directory, output_filename)
+    with tf.python_io.TFRecordWriter(output_file) as writer:
+      for i in range(files_per_shard):
+        index = shard * files_per_shard + i
+        image_buffer, height, width = _process_image(coder, name)
+        filename = '{}_{}_{}'.format(name, shard, i)
+        label = index % num_classes
+        synset = str(index)
+        human = name
+        bbox = [[0.1, 0.1, 0.9, 0.9]]
+        example = _convert_to_example(filename, image_buffer, label,
+                                      synset, human, bbox,
+                                      height, width)
+        writer.write(example.SerializeToString())
+def write_black_and_white_tfrecord_data(
+    output_directory, num_classes, num_train_images=512,
+    num_validation_images=128, train_shards=8, validation_shards=2):
+  """Writes black and white images in tfrecord format.
+  Training images are black and validation images are white.
+  Args:
+    output_directory: Where to put outputs.
+    num_classes: number of classes.
+    num_train_images: number of training images to generate.
+    num_validation_images: number of validation images to generate.
+    train_shards: integer number of training shards to create.
+    validation_shards: integer number of validation shards to create.
+  """
+  coder = ImageCoder()
+  _process_dataset(output_directory, num_classes, coder, 'validation',
+                   num_validation_images, validation_shards)
+  _process_dataset(output_directory, num_classes, coder, 'train',
+                   num_train_images, train_shards)
--- a/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/test_util.py
+++ b/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/test_util.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Shared functionality across multiple test files."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from collections import namedtuple
+from contextlib import contextmanager
+import os
+import numpy as np
+import tensorflow.compat.v1 as tf
+import benchmark_cnn
+import cnn_util
+import datasets
+import preprocessing
+from models import model
+from platforms import util as platforms_util
+from test_data import tfrecord_image_generator
+from tensorflow.core.protobuf import rewriter_config_pb2  # pylint: disable=g-direct-tensorflow-import
+from tensorflow.python.platform import test
+@contextmanager
+def monkey_patch(obj, **kwargs):
+  """Context mgr to monkey patch attributes on an object (such as a module).
+  The attributes are patched back to their original value when the context
+  manager exits.
+  For example, to replace benchmark_cnn.get_data_type with an identity function,
+  do:
+  ```
+  with monkey_patch(benchmark_cnn, get_data_type=lambda x: x)
+    loss1 = benchmark_cnn.loss_function(1)  # loss1 will be 1
+  loss2 = benchmark_cnn.loss_function(params)  # Call the original function
+  ```
+  Args:
+    obj: The object (which can be a module) to monkey patch attributes on.
+    **kwargs: Dictionary mapping from attribute name to value that the attribute
+      will be patched with.
+  Yields:
+    Nothing.
+  """
+  old_values = {key: getattr(obj, key) for key in kwargs}
+  try:
+    for key, value in kwargs.items():
+      setattr(obj, key, value)
+    yield
+  finally:
+    for key, value in old_values.items():
+      setattr(obj, key, value)
+def monkey_patch_base_cluster_manager():
+  """Monkey patches get_cluster_manager to return a BaseClusterManager.
+  This function replaces platforms_util.get_cluster_manager with a function that
+  always return a BaseClusterManager.
+  This is useful for testing creating a graph in distributed mode, with only a
+  single process. GrpcClusterManager's constructor blocks until a cluster is set
+  up, which requires multiple processes to be created.
+  """
+  def get_test_cluster_manager(params, config_proto):
+    del config_proto
+    return cnn_util.BaseClusterManager(params)
+  platforms_util.get_cluster_manager = get_test_cluster_manager
+def print_and_add_to_list(print_list):
+  """Returns a function which prints the input, then adds it to print_list."""
+  def f(string):
+    print(string)
+    print_list.append(string)
+  return f
+TrainingOutput = namedtuple('TrainingOutput',
+                            ['loss', 'top_1_accuracy', 'top_5_accuracy'])
+EvalOutput = namedtuple('EvalOutput', ['top_1_accuracy', 'top_5_accuracy'])
+def get_training_outputs_from_logs(logs, print_training_accuracy):
+  """Returns a list of TrainingOutputs by parsing the logs of a training run.
+  Args:
+    logs: A list of strings, each which is a line from the standard output of
+      tf_cnn_benchmarks from training. Only lines in the form:
+        10 images/sec: 14.2 +/- 0.0 (jitter = 0.0) 7.020
+      are parsed (the line may also contain the training accuracies).
+    print_training_accuracy: The value of the param print_training_accuracy.
+  Returns:
+    A list of TrainingOutputs. The list has one element per element of logs
+    that is in the format above. top_1_accuracy and top_5_accuracy are set to -1
+    if the line does not contain accuracies.
+  """
+  outputs = []
+  for log in logs:
+    if 'images/sec' in log and '+/-' in log:
+      parts = log.split()
+      if print_training_accuracy:
+        # Example log with training accuracy:
+        #   10 images/sec: 0.2 +/- 0.0 (jitter = 0.0) 6.908 0.500 1.000
+        assert len(parts) == 11
+        top_1_acc = float(parts[9])
+        top_5_acc = float(parts[10])
+      else:
+        # Example log without training accuracy:
+        #   10 images/sec: 0.2 +/- 0.0 (jitter = 0.0) 6.908
+        assert len(parts) == 9
+        top_1_acc = -1
+        top_5_acc = -1
+      loss = float(parts[8])
+      outputs.append(TrainingOutput(loss=loss, top_1_accuracy=top_1_acc,
+                                    top_5_accuracy=top_5_acc))
+  assert len(outputs) >= 1
+  return outputs
+def get_evaluation_outputs_from_logs(logs):
+  """Returns the top 1 and 5 accuracies by parsing the logs of an eval run.
+  Args:
+    logs: A list of strings, each which is a line from the standard output of
+      tf_cnn_benchmarks from evaluation. Only lines in the form:
+        Accuracy @ 1 = 0.5000 Accuracy @ 5 = 1.0000 [80 examples]
+      is parsed.
+  Returns:
+    A list of EvalOutputs. Normally this list only has one EvalOutput, but can
+    contain multiple if training is done and
+    --eval_during_training_every_n_steps is specified.
+  """
+  eval_outputs = []
+  for log in logs:
+    if 'Accuracy @ ' in log:
+      # Example log:
+      #   Accuracy @ 1 = 0.5000 Accuracy @ 5 = 1.0000 [80 examples]
+      parts = log.split()
+      assert len(parts) == 12
+      top_1_accuracy = float(parts[4])
+      top_5_accuracy = float(parts[9])
+      eval_outputs.append(EvalOutput(top_1_accuracy, top_5_accuracy))
+  assert eval_outputs
+  return eval_outputs
+def check_training_outputs_are_reasonable(testcase, training_outputs,
+                                          print_training_accuracy,
+                                          max_final_loss=10.,
+                                          previous_final_loss=None):
+  """Checks the outputs from training a model are reasonable.
+  An assert is failed if the outputs are not reasonable. The final top-1 and
+  top-5 accuracies are asserted to be 1, and so the dataset used to train should
+  be trivial to learn. For example, the dataset could consist of a black image
+  with label 0 and a white image with label 1.
+  Args:
+    testcase: A tf.test.TestCase used for assertions.
+    training_outputs: A list of TrainingOutputs, as returned from
+      get_training_outputs_from_logs().
+    print_training_accuracy: Whether training accuracies were printed and stored
+      in training_outputs.
+    max_final_loss: The loss of the final training output is asserted to be at
+      most this value.
+    previous_final_loss: If training was resumed from a checkpoint, the loss of
+      the final step from the previous training run that saved the checkpoint.
+  """
+  if previous_final_loss is not None:
+    # Ensure the loss hasn't raised significantly from the final loss of the
+    # previous training run.
+    testcase.assertLessEqual(training_outputs[0].loss,
+                             previous_final_loss * 1.01)
+  for output in training_outputs:
+    testcase.assertLessEqual(output.loss, 100.)
+  last_output = training_outputs[-1]
+  if print_training_accuracy:
+    testcase.assertEqual(last_output.top_1_accuracy, 1.0)
+    testcase.assertEqual(last_output.top_5_accuracy, 1.0)
+  if max_final_loss is not None:
+    testcase.assertLessEqual(last_output.loss, max_final_loss)
+def train_and_eval(testcase,
+                   run_fn,
+                   params,
+                   check_output_values,
+                   max_final_loss=10.,
+                   skip=None):
+  """Trains a model then evaluates it.
+  This function should be used to verify training and evaluating
+  BenchmarkCNN works without crashing and that it outputs reasonable
+  values. BenchmarkCNN will be run three times. First, it will train a
+  model from scratch, saving a checkpoint. Second, it will load the checkpoint
+  to continue training. Finally, it evaluates based on the loaded checkpoint.
+  Args:
+    testcase: A tf.test.TestCase used for assertions.
+    run_fn: Must run `BenchmarkCNN` exactly once. BenchmarkCNN is
+      never used directly, but instead is only run through `run_fn`. `run_fn`
+      has the signature (run_type, inner_params) -> output_list, where:
+        * run_type is a string indicating how BenchmarkCNN will be run.
+          Either 'InitialTraining', 'TrainingFromCheckpoint' or 'Evaluation'.
+        * inner_params is the params BenchmarkCNN should be run with.
+        * output_list[i] is a list of lines from the ith worker's stdout.
+    params: The params BenchmarkCNN will be run with.
+      Will be passed to `run_fn` slightly modified in order to run with both
+      training and evaluation.
+    check_output_values: Whether the outputs of the workers, such as training
+      accuracy, should be checked to make sure their values are reasonable.
+      Fails an assert on `testcase` if a check fails.
+    max_final_loss: The loss of the final training output is asserted to be at
+      most this value for both training runs.
+    skip: If 'eval', evaluation is not done. if
+      'eval_and_train_from_checkpoint', evaluation and training from a
+      checkpoint are both not done.
+  """
+  assert not skip or skip in {'eval', 'eval_and_train_from_checkpoint'}
+  # Part 1: Train from scratch.
+  tf.logging.info('Training model from scratch')
+  print_training_accuracy = (params.print_training_accuracy or
+                             params.forward_only)
+  initial_train_logs = run_fn('InitialTraining', params)
+  testcase.assertGreaterEqual(len(initial_train_logs), 1)
+  for lines in initial_train_logs:
+    initial_train_outputs = get_training_outputs_from_logs(
+        lines, print_training_accuracy)
+    if params.cross_replica_sync and params.batch_group_size == 1:
+      testcase.assertEqual(len(initial_train_outputs), params.num_batches)
+    if check_output_values:
+      check_training_outputs_are_reasonable(testcase, initial_train_outputs,
+                                            print_training_accuracy,
+                                            max_final_loss=max_final_loss)
+  if params.train_dir is not None:
+    train_dir_entries = set(os.listdir(params.train_dir))
+    testcase.assertGreater(len(train_dir_entries), 0)
+  else:
+    train_dir_entries = None
+  if skip == 'eval_and_train_from_checkpoint':
+    return
+  # Part 2: Train from the loaded checkpoint.
+  testcase.assertIsNotNone(train_dir_entries)
+  tf.logging.info('Training model from loaded checkpoint')
+  # Run for same number of batches as before.
+  params = params._replace(num_batches=params.num_batches * 2)
+  train_logs_from_ckpt = run_fn('TrainingFromCheckpoint', params)
+  testcase.assertGreaterEqual(len(train_logs_from_ckpt), 1)
+  for lines in train_logs_from_ckpt:
+    train_outputs_from_ckpt = get_training_outputs_from_logs(
+        lines, print_training_accuracy)
+    if params.cross_replica_sync and params.batch_group_size == 1:
+      testcase.assertEqual(len(train_outputs_from_ckpt),
+                           params.num_batches // 2 - params.num_warmup_batches)
+    if check_output_values:
+      check_training_outputs_are_reasonable(
+          testcase, train_outputs_from_ckpt, print_training_accuracy,
+          max_final_loss=max_final_loss,
+          previous_final_loss=initial_train_outputs[-1].loss)
+  # Ensure a new checkpoint was written out.
+  testcase.assertNotEqual(train_dir_entries, set(os.listdir(params.train_dir)))
+  if skip == 'eval':
+    return
+  # Part 3: Evaluate from the loaded checkpoint.
+  tf.logging.info('Evaluating model from checkpoint')
+  params = params._replace(num_batches=params.num_batches // 2, eval=True)
+  eval_logs = run_fn('Evaluation', params)
+  testcase.assertGreaterEqual(len(eval_logs), 1)
+  for lines in eval_logs:
+    eval_outputs = get_evaluation_outputs_from_logs(lines)
+    assert len(eval_outputs) == 1
+    top_1_accuracy, top_5_accuracy = eval_outputs[0]
+    if check_output_values:
+      testcase.assertEqual(top_1_accuracy, 1.0)
+      testcase.assertEqual(top_5_accuracy, 1.0)
+def get_temp_dir(dir_name):
+  dir_path = os.path.join(test.get_temp_dir(), dir_name)
+  os.mkdir(dir_path)
+  return dir_path
+def create_black_and_white_images():
+  dir_path = get_temp_dir('black_and_white_images')
+  tfrecord_image_generator.write_black_and_white_tfrecord_data(dir_path,
+                                                               num_classes=1)
+  return dir_path
+def get_params(train_dir_name):
+  """Returns params that can be used to train."""
+  params = benchmark_cnn.make_params(
+      batch_size=2,
+      display_every=1,
+      init_learning_rate=0.005,
+      model='trivial',
+      num_batches=20,
+      num_gpus=2,
+      num_warmup_batches=5,
+      optimizer='sgd',
+      print_training_accuracy=True,
+      train_dir=get_temp_dir(train_dir_name),
+      variable_update='parameter_server',
+      weight_decay=0,
+      distortions=True,
+      distort_color_in_yiq=False)
+  return benchmark_cnn.set_default_param_values_and_env_vars(params)
+def get_var_update_params():
+  """Returns params that are used when testing variable updates."""
+  params = benchmark_cnn.make_params(
+      batch_size=2,
+      model='test_model',
+      num_gpus=2,
+      display_every=1,
+      num_warmup_batches=0,
+      num_batches=4,
+      weight_decay=2 ** -4,
+      init_learning_rate=2 ** -4,
+      optimizer='sgd')
+  return benchmark_cnn.set_default_param_values_and_env_vars(params)
+def get_fake_var_update_inputs():
+  """Returns fake input 1x1 images to use in variable update tests."""
+  # BenchmarkCNN divides by 127.5 then subtracts 1.0 from the images, so after
+  # that, the images will be -1., 0., 1., ..., 14.
+  return np.resize(127.5 * np.array(range(16)), (16, 1, 1, 1))
+def _worker_batches_in_numpy_array(numpy_inputs, batch_size, shift_ratio):
+  """Yields batches from a numpy array, for a single worker."""
+  numpy_inputs = cnn_util.roll_numpy_batches(numpy_inputs, batch_size,
+                                             shift_ratio)
+  i = 0
+  total_batches = numpy_inputs.shape[0]
+  assert total_batches % batch_size == 0
+  while True:
+    yield numpy_inputs[i:i + batch_size, ...]
+    i = (i + batch_size) % total_batches
+def manually_compute_losses(numpy_inputs, inputs_placeholder, loss, num_workers,
+                            params):
+  """Manually compute the losses each worker should report in tf_cnn_benchmarks.
+  This function essentially simulates tf_cnn_benchmarks, computing what the loss
+  of each worker should be. The caller should create a model, that takes in
+  images from `inputs_placeholder`, a tf.placeholder, and computes `loss`.
+  This function, and all ops passed to this function, must be run under a
+  tf.device('cpu:0') context manager.
+  Non-SGD optimizers are not supported with multiple workers.
+  Args:
+    numpy_inputs: A Numpy array to use as the input images.
+    inputs_placeholder: A tf.placeholder tensor, where input images can be fed
+      into.
+    loss: A scalar tensor representing the loss of the model, which is obtained
+      from the input images in inputs_placeholder.
+    num_workers: How many workers should be simulated.
+    params: Params tuple. This doesn't have to have information about the
+      distributed cluster, such as --num_workers, as num_workers is passed in
+      separately.
+  Returns:
+    A list of list of losses. return_value[i][j] is the loss of the ith worker
+    after the jth step.
+  """
+  batch_size = params.batch_size * params.num_gpus
+  assert numpy_inputs.shape[0] % (num_workers * batch_size) == 0
+  l2_loss = tf.add_n([tf.nn.l2_loss(x) for x in tf.trainable_variables()])
+  total_loss = loss + params.weight_decay * l2_loss
+  reported_loss = (loss if params.loss_type_to_report == 'base_loss'
+                   else total_loss)
+  gradient_multiplier = 1
+  if params.variable_update in ('replicated', 'distributed_all_reduce'):
+    # In certain variable updates, tf_cnn_benchmarks add the gradients of the
+    # GPUs instead of taking their mean, making the gradients effectively
+    # params.num_gpu times higher.
+    # TODO(b/62722498): Make all variable updates consistent.
+    gradient_multiplier = params.num_gpus
+  opt = benchmark_cnn.get_optimizer(params, params.init_learning_rate)
+  grad_vars = opt.compute_gradients(
+      total_loss, grad_loss=tf.constant(gradient_multiplier, dtype=tf.float32))
+  grads = [g for g, _ in grad_vars]
+  # We apply gradients from a placeholder. That way, we can first compute the
+  # gradients from each worker, then afterwards apply them one by one by feeding
+  # them into the placeholder.
+  placeholder_grad_vars = [(tf.placeholder(g.dtype, g.shape), v)
+                           for g, v in grad_vars]
+  placeholder_grads = [g for g, _ in placeholder_grad_vars]
+  apply_grads_op = opt.apply_gradients(placeholder_grad_vars)
+  batch_iterators = [_worker_batches_in_numpy_array(numpy_inputs, batch_size,
+                                                    shift_ratio=i / num_workers)
+                     for i in range(num_workers)]
+  # Set the GPU count to 0, to avoid taking all the GPU memory. Unfortunately,
+  # doing so still takes up about ~1GB for some reason.
+  config = tf.ConfigProto(device_count={'GPU': 0})
+  config.graph_options.rewrite_options.pin_to_host_optimization = (
+      rewriter_config_pb2.RewriterConfig.OFF)
+  with tf.Session(config=config) as sess:
+    sess.run(tf.global_variables_initializer())
+    losses = [[] for _ in range(num_workers)]
+    for i in range(params.num_batches):
+      computed_grads = []
+      for j in range(num_workers):
+        batch_feed = next(batch_iterators[j])
+        batch_feed = batch_feed / 127.5 - 1
+        worker_loss, worker_grads = sess.run((reported_loss, grads),
+                                             {inputs_placeholder: batch_feed})
+        losses[j].append(worker_loss)
+        computed_grads.append(worker_grads)
+      for worker_grads in computed_grads:
+        # TODO(reedwm): With multiple workers, applying the gradients
+        # sequentially per worker is not equivalent to what tf_cnn_benchmarks
+        # does when the optmizer is not SGD. Therefore, this currently does not
+        # work currently when num_workers > 1 and params.optimizer != 'sgd'.
+        feed_dict = dict(zip(placeholder_grads, worker_grads))
+        sess.run(apply_grads_op, feed_dict)
+  return losses
+class TestCNNModel(model.CNNModel):
+  """A simple model used for testing.
+  The input is a 1-channel 1x1 image, consisting of a single number. The model
+  has two scalar variables: A and B, initialized to 1 and 2 respectively. Given
+  an image x, the loss is defined as:
+      loss = x * A * B
+  """
+  def __init__(self):
+    super(TestCNNModel, self).__init__(
+        'test_cnn_model', image_size=1, batch_size=1, learning_rate=1)
+    self.depth = 1
+  VAR_A_INITIAL_VALUE = 1.
+  VAR_B_INITIAL_VALUE = 2.
+  def add_inference(self, cnn):
+    # This model only supports 1x1 images with 1 channel
+    assert cnn.top_layer.shape[1:] == (1, 1, 1)
+    # Multiply by variable A.
+    with tf.name_scope('mult_by_var_A'):
+      cnn.conv(1, 1, 1, 1, 1, use_batch_norm=None, activation=None, bias=None,
+               kernel_initializer=tf.constant_initializer(
+                   self.VAR_A_INITIAL_VALUE))
+    # Multiply by variable B.
+    with tf.name_scope('mult_by_var_B'):
+      cnn.conv(1, 1, 1, 1, 1, use_batch_norm=None, activation=None, bias=None,
+               kernel_initializer=tf.constant_initializer(
+                   self.VAR_B_INITIAL_VALUE))
+    with tf.name_scope('reshape_to_scalar'):
+      cnn.reshape([-1, 1])
+  def skip_final_affine_layer(self):
+    return True
+  def loss_function(self, inputs, build_network_result):
+    del inputs
+    return tf.reduce_mean(build_network_result.logits)
+  def manually_compute_losses(self, inputs, num_workers, params):
+    with tf.Graph().as_default(), tf.device('/cpu:0'):
+      a = tf.Variable(self.VAR_A_INITIAL_VALUE, name='A')
+      b = tf.Variable(self.VAR_B_INITIAL_VALUE, name='B')
+      inputs_placeholder = tf.placeholder(tf.float32,
+                                          (None, 1, 1, 1),
+                                          name='inputs_placeholder')
+      inputs_reshaped = tf.reshape(inputs_placeholder, (-1, 1))
+      loss = self.loss_function(
+          None,
+          model.BuildNetworkResult(logits=inputs_reshaped * a * b,
+                                   extra_info=None))
+      return manually_compute_losses(inputs, inputs_placeholder, loss,
+                                     num_workers, params)
+  def accuracy_function(self, inputs, logits):
+    del inputs
+    # Let the accuracy be the same as the loss function.
+    return {'top_1_accuracy': logits, 'top_5_accuracy': logits}
+class TestDataSet(datasets.ImageDataset):
+  """A Dataset consisting of 1x1 images with a depth of 1."""
+  def __init__(self, height=1, width=1, depth=1):
+    super(TestDataSet, self).__init__('test_dataset', height=height,
+                                      width=width, depth=depth, data_dir=None,
+                                      queue_runner_required=True, num_classes=1)
+  def num_examples_per_epoch(self, subset='train'):
+    del subset
+    return 1
+  def get_input_preprocessor(self, input_preprocessor='default'):
+    return preprocessing.TestImagePreprocessor
+  def use_synthetic_gpu_inputs(self):
+    return False
--- a/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py
+++ b/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmark script for TensorFlow.
+See the README for more information.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from absl import app
+from absl import flags as absl_flags
+import tensorflow.compat.v1 as tf
+import benchmark_cnn
+import cnn_util
+import flags
+import mlperf
+from cnn_util import log_fn
+flags.define_flags()
+for name in flags.param_specs.keys():
+  absl_flags.declare_key_flag(name)
+absl_flags.DEFINE_boolean(
+    'ml_perf_compliance_logging', False,
+    'Print logs required to be compliant with MLPerf. If set, must clone the '
+    'MLPerf training repo https://github.com/mlperf/training and add '
+    'https://github.com/mlperf/training/tree/master/compliance to the '
+    'PYTHONPATH')
+def main(positional_arguments):
+  # Command-line arguments like '--distortions False' are equivalent to
+  # '--distortions=True False', where False is a positional argument. To prevent
+  # this from silently running with distortions, we do not allow positional
+  # arguments.
+  assert len(positional_arguments) >= 1
+  if len(positional_arguments) > 1:
+    raise ValueError('Received unknown positional arguments: %s'
+                     % positional_arguments[1:])
+  params = benchmark_cnn.make_params_from_flags()
+  with mlperf.mlperf_logger(absl_flags.FLAGS.ml_perf_compliance_logging,
+                            params.model):
+    params = benchmark_cnn.setup(params)
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    tfversion = cnn_util.tensorflow_version_tuple()
+    log_fn('TensorFlow:  %i.%i' % (tfversion[0], tfversion[1]))
+    bench.print_info()
+    bench.run()
+if __name__ == '__main__':
+  tf.disable_v2_behavior()
+  app.run(main)  # Raises error on invalid flags, unlike tf.app.run()
--- a/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/variable_mgr.py
+++ b/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/variable_mgr.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Defines VariableMgr and subclasses used to manage variables.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import contextlib
+import re
+import tensorflow.compat.v1 as tf
+import allreduce
+import batch_allreduce
+import variable_mgr_util
+class VariableMgr(object):
+  """Abstract superclass for class used by BenchmarkCNN to control variables.
+    Functions on this class are used to control how variables are created and
+    managed, and how gradients are computed and applied.
+  """
+  def __init__(self, benchmark_cnn):
+    self.benchmark_cnn = benchmark_cnn
+    self.staging_delta_ops = []
+    self.use_resource_vars = benchmark_cnn.params.use_resource_vars
+    # A variable for automatic loss scaling.
+    self.grad_has_inf_nan = None
+    self._reuse_vars = False
+  def each_tower_has_variables(self):
+    """Returns True if each GPU tower of the model has separate variables."""
+    assert False, 'Must be implemented in subclass'
+  def supports_staged_vars(self):
+    """Whether staged variable management is supported."""
+    return False
+  def create_outer_variable_scope(self, device_num):
+    """Create the tf.variable_scope around all model graph operations."""
+    del device_num  # unused by this implementation
+    assert False, 'Must be implemented in subclass'
+  def preprocess_device_grads(self, device_grads):
+    """Preprocess the device gradients prior to applying them.
+    Args:
+      device_grads: List of lists of (gradient, variable) tuples.
+        device_grads[t][g] = (gradient, variable), where t is the index of the
+        tower and g is the index of the gradient-variable pair.
+    Returns: a tuple of (apply_gradients_devices, gradient_state).
+      gradient_state is an opaque structure that should be passed to
+      get_gradients_to_apply() and append_apply_gradients_ops() (in that order).
+      apply_gradients_devices is a list of devices where the gradients will be
+      applied with get_gradients_to_apply() and append_apply_gradients_ops().
+    """
+    del device_grads  # unused by this implementation
+    assert False, 'Must be implemented in subclass'
+  def get_gradients_to_apply(self, device_num, gradient_state):
+    """Returns the [(gradient, variable)] list to apply for device_num.
+    Args:
+      device_num: indexes into apply_gradients_devices, which was returned by an
+        earlier call to preprocess_device_grads.
+      gradient_state: from previous call to apply_gradients_devices.
+    """
+    del device_num, gradient_state  # unused by this implementation
+    assert False, 'Must be implemented in subclass'
+  def append_apply_gradients_ops(self, gradient_state, opt, grads, training_ops,
+                                 loss_scale_params):
+    """Adds training ops for grads to 'training_ops'.
+    Args:
+      gradient_state: from previous call to apply_gradients_devices.
+      opt: the underlying optimizer
+      grads: [(grad, var)] to apply
+      training_ops: list to which to add ops
+      loss_scale_params: parameters for loss scaling.
+    """
+    del gradient_state  # unused by this implementation
+    def get_apply_gradients_ops_func():
+      """Returns the apply_gradients op."""
+      return [opt.apply_gradients(grads)]
+    variable_mgr_util.append_gradients_with_loss_scale(
+        training_ops, get_apply_gradients_ops_func, loss_scale_params,
+        self.grad_has_inf_nan)
+  def get_post_init_ops(self):
+    """Returns ops that should run post-initialization."""
+    return []
+  def get_devices(self):
+    """Returns devices to use for computation; includes replica selection."""
+    assert False, 'Must be implemented in subclass'
+  def savable_variables(self):
+    """Returns a list/dict of savable variables to pass to tf.train.Saver."""
+    return tf.global_variables()
+  def trainable_variables_on_device(self,
+                                    rel_device_num,
+                                    abs_device_num,
+                                    writable=False):
+    """Return the set of trainable variables on device.
+    Args:
+      rel_device_num: local worker device index.
+      abs_device_num: global graph device index.
+      writable: whether to get a reference to the underlying variable.
+    Returns:
+      The set of trainable variables on the specified device.
+    """
+    del rel_device_num, writable
+    if self.each_tower_has_variables():
+      params = [
+          v for v in tf.trainable_variables()
+          if v.name.startswith('v%s/' % abs_device_num)
+      ]
+    else:
+      params = tf.trainable_variables()
+    return params
+  @contextlib.contextmanager
+  def reuse_variables(self):
+    """Context manager that causes variables requested to be reused.
+    Variables requested under this context manager must already exist, and will
+    be reused instead of being created again. This should be used if the
+    evaluation model is being built after the training model has already been
+    built. This is because the evaluation model should reuse variables from the
+    training model.
+    Yields:
+      Nothing.
+    """
+    old_reuse_vars = self._reuse_vars
+    try:
+      self._reuse_vars = True
+      yield
+    finally:
+      self._reuse_vars = old_reuse_vars
+class VariableMgrIndependent(VariableMgr):
+  """VariableMgr that implements the --independent mode for local jobs.
+     Each GPU has its own copy of the variables, and gradients are
+     not shared between towers. This can be used to check
+     performance when no data is moved between GPUs.
+  """
+  def each_tower_has_variables(self):
+    return True
+  def create_outer_variable_scope(self, device_num):
+    return tf.variable_scope('v%s' % device_num, reuse=self._reuse_vars,
+                             use_resource=self.use_resource_vars)
+  def preprocess_device_grads(self, device_grads):
+    return (self.benchmark_cnn.devices, device_grads)
+  def get_gradients_to_apply(self, device_num, gradient_state):
+    device_grads = gradient_state
+    tower_grad = device_grads[device_num]
+    if self.benchmark_cnn.enable_auto_loss_scale and device_num == 0:
+      # Since we don't aggregate variables in --independent mode, we cannot tell
+      # if there are NaNs on all GPUs. So we arbitrarily choose to only check
+      # NaNs on the first GPU.
+      has_inf_nan_list = []
+      for grad, _ in tower_grad:
+        has_inf_nan_list.append(tf.reduce_all(tf.is_finite(grad)))
+      self.grad_has_inf_nan = tf.logical_not(tf.reduce_all(has_inf_nan_list))
+    return tower_grad
+  def get_devices(self):
+    return self.benchmark_cnn.raw_devices
+class VariableMgrLocalFetchFromPS(VariableMgr):
+  """VariableMgr that implements the --parameter_server mode for local jobs.
+     Variables are stored on a parameter server.  For each step, each tower gets
+     a copy of the variables from the parameter server, and sends its gradients
+     to the param server.
+  """
+  def each_tower_has_variables(self):
+    return False
+  def create_outer_variable_scope(self, device_num):
+    return tf.variable_scope('v', reuse=bool(device_num) or self._reuse_vars,
+                             use_resource=self.use_resource_vars)
+  def preprocess_device_grads(self, device_grads):
+    return ([self.benchmark_cnn.param_server_device], device_grads)
+  def get_gradients_to_apply(self, device_num, gradient_state):
+    assert device_num == 0
+    device_grads = gradient_state
+    agg_grads, self.grad_has_inf_nan = (
+        variable_mgr_util.
+        aggregate_gradients_using_copy_with_variable_colocation(
+            device_grads,
+            use_mean=True,
+            check_inf_nan=self.benchmark_cnn.enable_auto_loss_scale))
+    return agg_grads
+  def get_devices(self):
+    raw_devices = self.benchmark_cnn.raw_devices
+    if self.benchmark_cnn.local_parameter_device_flag == 'gpu':
+      return [
+          variable_mgr_util.ParamServerDeviceSetter(d, raw_devices)
+          for d in raw_devices
+      ]
+    else:
+      return [
+          tf.train.replica_device_setter(
+              worker_device=d,
+              ps_device=self.benchmark_cnn.param_server_device,
+              ps_tasks=1) for d in raw_devices
+      ]
+class VariableMgrLocalFetchFromStagedPS(VariableMgrLocalFetchFromPS):
+  """Implements fetching a local variable through staging buffers.
+  """
+  def __init__(self, benchmark_cnn):
+    super(VariableMgrLocalFetchFromStagedPS, self).__init__(benchmark_cnn)
+    # A data structure to track where the variables are used on each device.
+    # Indexed by device_num and var_name, each entry stores the "put" and "get"
+    # ops used for that variable on that device:
+    #   staging_vars_on_devices[device_num][var_name] == (put_op, get_op)
+    self.staging_vars_on_devices = [
+        dict() for _ in self.benchmark_cnn.raw_devices
+    ]
+  def supports_staged_vars(self):
+    return True
+  def create_outer_variable_scope(self, device_num):
+    self._custom_getter = variable_mgr_util.StagedVariableGetter(
+        device_num, self.benchmark_cnn.raw_devices, None, self)
+    return tf.variable_scope(
+        'v', reuse=bool(device_num) or self._reuse_vars,
+        custom_getter=self._custom_getter, use_resource=self.use_resource_vars)
+  def trainable_variables_on_device(self,
+                                    rel_device_num,
+                                    abs_device_num,
+                                    writable=False):
+    return self._custom_getter.trainable_variables_on_device(
+        rel_device_num, abs_device_num, writable=writable)
+class VariableMgrLocalReplicated(VariableMgr):
+  """VariableMgr that implements the --replicated mode for local jobs.
+     Each GPU has its own copy of the variables. To apply gradients,
+     either a local all-reduce algorithm is applied or a regular
+     cross-device aggregation is used to replicate the combined
+     gradients to all towers.
+  """
+  def __init__(self, benchmark_cnn, all_reduce_spec,
+               agg_small_grads_max_bytes, agg_small_grads_max_group,
+               allreduce_merge_scope):
+    super(VariableMgrLocalReplicated, self).__init__(benchmark_cnn)
+    if all_reduce_spec:
+      spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
+      if len(spec) != 1:
+        raise ValueError(
+            'replicated mode does not support hybrid all-reduce strategies')
+      self._all_reduce_spec = spec[0]
+    else:
+      self._all_reduce_spec = None
+    self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
+    self._agg_small_grads_max_group = agg_small_grads_max_group
+    self._warmup_ops = []
+    self._allreduce_merge_scope = allreduce_merge_scope
+    self._gradient_put_ops = None
+  def each_tower_has_variables(self):
+    return True
+  def create_outer_variable_scope(self, device_num):
+    return tf.variable_scope('v%s' % device_num, reuse=self._reuse_vars,
+                             use_resource=self.use_resource_vars)
+  def preprocess_device_grads(self, device_grads):
+    compact_grads = (self.benchmark_cnn.params.use_fp16 and
+                     self.benchmark_cnn.params.compact_gradient_transfer)
+    defer_grads = (self.benchmark_cnn.params.variable_consistency == 'relaxed')
+    grads_to_reduce = [[g for g, _ in grad_vars] for grad_vars in device_grads]
+    algorithm = batch_allreduce.algorithm_from_params(self.benchmark_cnn.params)
+    reduced_grads, self._warmup_ops = algorithm.batch_all_reduce(
+        grads_to_reduce, self.benchmark_cnn.params.gradient_repacking,
+        compact_grads, defer_grads, self.benchmark_cnn.params.xla_compile)
+    if self.benchmark_cnn.enable_auto_loss_scale:
+      # Check for infs or nans
+      is_finite_list = []
+      with tf.name_scope('check_for_inf_and_nan'):
+        for tower_grads in reduced_grads:
+          with tf.colocate_with(tower_grads[0]):
+            # TODO(tanmingxing): Create fused op that takes in a list of tensors
+            # as input and returns scalar boolean True if there are any
+            # infs/nans.
+            is_finite_list.append(tf.reduce_all(
+                [tf.reduce_all(tf.is_finite(g)) for g in tower_grads]))
+        self.grad_has_inf_nan = tf.logical_not(tf.reduce_all(is_finite_list))
+    reduced_device_grads = [[
+        (g, v) for g, (_, v) in zip(grads, grad_vars)
+    ] for grads, grad_vars in zip(reduced_grads, device_grads)]
+    return self.benchmark_cnn.devices, reduced_device_grads
+  def get_gradients_to_apply(self, device_num, gradient_state):
+    device_grads = gradient_state
+    return device_grads[device_num]
+  def get_post_init_ops(self):
+    # Copy initialized values for variables on GPU 0 to other GPUs.
+    global_vars = tf.global_variables()
+    var_by_name = dict([(v.name, v) for v in global_vars])
+    post_init_ops = []
+    for v in global_vars:
+      split_name = v.name.split('/')
+      # TODO(b/62630508): use more specific prefix than v or v0.
+      if split_name[0] == 'v0' or not v.name.startswith('v'):
+        continue
+      split_name[0] = 'v0'
+      copy_from = var_by_name['/'.join(split_name)]
+      post_init_ops.append(v.assign(copy_from.read_value()))
+    post_init_ops += self._warmup_ops
+    return post_init_ops
+  def savable_variables(self):
+    """Return the set of variables used for saving/loading the model."""
+    params = []
+    for v in tf.global_variables():
+      split_name = v.name.split('/')
+      if split_name[0] == 'v0' or not v.name.startswith('v'):
+        params.append(v)
+    return params
+  def get_devices(self):
+    return self.benchmark_cnn.raw_devices
+class VariableMgrDistributedAllReduce(VariableMgr):
+  """VariableMgr that implements the --distributed_all_reduce mode.
+     Each GPU has its own copy of the variables. To apply gradients,
+     the specified all-reduce algorithm is used to reduce the gradients
+     and replicate the final value to all GPUs.
+  """
+  def __init__(self, benchmark_cnn, all_reduce_spec, job_name,
+               num_workers, agg_small_grads_max_bytes,
+               agg_small_grads_max_group, allreduce_merge_scope):
+    super(VariableMgrDistributedAllReduce, self).__init__(benchmark_cnn)
+    if not all_reduce_spec:
+      raise ValueError(
+          'distributed_all_reduce requires a non-empty all_reduce_spec')
+    self._all_reduce_spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
+    self._all_reduce_device_prefixes = (
+        allreduce.build_all_reduce_device_prefixes(job_name, num_workers))
+    self._num_workers = num_workers
+    self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
+    self._agg_small_grads_max_group = agg_small_grads_max_group
+    self._allreduce_merge_scope = allreduce_merge_scope
+    if not self._all_reduce_spec:
+      raise ValueError('all_reduce_spec must be specified')
+    self._single_session = True
+  def each_tower_has_variables(self):
+    return True
+  def create_outer_variable_scope(self, device_num):
+    """Create a scope for the named device.
+    Args:
+      device_num: index of device for variable scope. (Note that
+        device_num spans all processes in cluster since a single global
+        graph is used.)
+    Returns:
+      the requested variable_scope
+    """
+    return tf.variable_scope('v%s' % device_num, reuse=self._reuse_vars,
+                             use_resource=self.use_resource_vars)
+  def preprocess_device_grads(self, device_grads):
+    remaining_grads = device_grads
+    aggregated_grads = []
+    for spec_tuple in self._all_reduce_spec:
+      if spec_tuple.limit < 0:
+        this_grads = remaining_grads
+        remaining_grads = []
+      else:
+        (this_grads, remaining_grads) = allreduce.split_grads_by_size(
+            spec_tuple.limit, remaining_grads)
+      if this_grads:
+        range_agg_grads = allreduce.sum_gradients_all_reduce(
+            self._single_session,
+            self._all_reduce_device_prefixes,
+            this_grads,
+            self._num_workers,
+            spec_tuple.alg,
+            spec_tuple.shards,
+            self.benchmark_cnn.gpu_indices,
+            agg_small_grads_max_bytes=self._agg_small_grads_max_bytes,
+            agg_small_grads_max_group=self._agg_small_grads_max_group,
+            allreduce_merge_scope=self._allreduce_merge_scope)
+        if not aggregated_grads:
+          aggregated_grads = range_agg_grads
+        else:
+          assert len(aggregated_grads) == len(range_agg_grads)
+          for i in range(len(aggregated_grads)):
+            aggregated_grads[i] += range_agg_grads[i]
+    assert not remaining_grads
+    full_device_set = []
+    for grads in device_grads:
+      g, v = grads[0]
+      del v
+      full_device_set.append(g.device)
+    return (full_device_set, aggregated_grads)
+  def get_gradients_to_apply(self, device_num, gradient_state):
+    device_grads = gradient_state
+    if device_num >= len(device_grads):
+      raise ValueError('device_num %d exceeds length of device_grads (%d)' %
+                       (device_num, len(device_grads)))
+    return device_grads[device_num]
+  def get_post_init_ops(self):
+    """Copy initialized values for variables to other devices."""
+    global_vars = tf.global_variables()
+    var_by_name = dict([(v.name, v) for v in global_vars])
+    post_init_ops = []
+    for v in global_vars:
+      split_name = v.name.split('/')
+      # TODO(b/62630508): use more specific prefix than v or v0.
+      if split_name[0] == 'v0' or not v.name.startswith('v'):
+        continue
+      split_name[0] = 'v0'
+      copy_from = var_by_name['/'.join(split_name)]
+      post_init_ops.append(v.assign(copy_from.read_value()))
+    return post_init_ops
+  def savable_variables(self):
+    """Return the set of variables used for saving/loading the model."""
+    params = []
+    for v in tf.global_variables():
+      split_name = v.name.split('/')
+      if split_name[0] == 'v0' or not v.name.startswith('v'):
+        params.append(v)
+    return params
+  def get_devices(self):
+    return self.benchmark_cnn.raw_devices
+# TODO(tucker): Merge this mode with DistributedAllReduce.
+class VariableMgrCollectiveAllReduce(VariableMgr):
+  """VariableMgr that implements the --collective_all_reduce mode.
+     Each GPU has its own copy of the variables. To apply gradients
+     the TF native collective all-reduce op is used to reduce the gradients
+     and replicate the final value to all GPUs.
+  """
+  def __init__(self, benchmark_cnn, all_reduce_spec,
+               num_workers, num_gpus, task_id, allreduce_merge_scope):
+    super(VariableMgrCollectiveAllReduce, self).__init__(benchmark_cnn)
+    if not all_reduce_spec:
+      raise ValueError(
+          'collective_all_reduce requires a non-empty all_reduce_spec: %s'
+          % all_reduce_spec)
+    parsed_spec = allreduce.parse_all_reduce_spec(all_reduce_spec)
+    # So far we only support a length-1 all_reduce_spec
+    if len(parsed_spec) > 1 or parsed_spec[0].limit > 0:
+      raise ValueError(
+          'collective_all_reduce requires one single-range all_reduce_spec %s'
+          % parsed_spec)
+    self._all_reduce_spec = parsed_spec[0]
+    if self._all_reduce_spec.alg != 'collective':
+      raise ValueError(
+          'VariableMgrCollectiveAllReduce initialized with non-collective '
+          'all_reduce_spec %s' % self.all_reduce_spec)
+    self._num_workers = num_workers
+    self._num_gpus = num_gpus
+    self._task_id = task_id
+    self._allreduce_merge_scope = allreduce_merge_scope
+    self._instance_key_counter = 10000
+    self._instance_key_table = dict()
+    self._single_session = False
+    # List of prefixes for generating PS devices, unused here.
+    self._all_reduce_device_prefixes = None
+  def each_tower_has_variables(self):
+    return True
+  def create_outer_variable_scope(self, device_num):
+    """Create a scope for the named device.
+    Args:
+      device_num: index of device for variable scope.
+    Returns:
+      the requested variable_scope
+    """
+    return tf.variable_scope('v%s' % device_num, reuse=self._reuse_vars)
+  def preprocess_device_grads(self, device_grads):
+    reduced_grads = allreduce.sum_gradients_all_reduce(
+        self._single_session,
+        self._all_reduce_device_prefixes,
+        device_grads,
+        self._num_workers,
+        'collective',
+        self._all_reduce_spec.shards,
+        self.benchmark_cnn.gpu_indices,
+        allreduce_merge_scope=self._allreduce_merge_scope)
+    assert len(reduced_grads) == len(device_grads)
+    full_device_set = []
+    for grads in device_grads:
+      g, _ = grads[0]
+      full_device_set.append(g.device)
+    return (full_device_set, reduced_grads)
+  def get_gradients_to_apply(self, device_num, gradient_state):
+    device_grads = gradient_state
+    if device_num >= len(device_grads):
+      raise ValueError('device_num %d exceeds length of device_grads (%d)' %
+                       (device_num, len(device_grads)))
+    return device_grads[device_num]
+  def _get_instance_key(self, name):
+    if name not in self._instance_key_table.keys():
+      self._instance_key_counter += 1
+      self._instance_key_table[name] = self._instance_key_counter
+    return self._instance_key_table[name]
+  def get_post_init_ops(self):
+    """Broadcast initialized values of variables to other devices.
+    Returns:
+      At task 0 device 0, broadcast_send.
+      At all other devices and tasks, broadcast_recv.
+    """
+    global_vars = tf.global_variables()
+    group_size = self._num_workers * self._num_gpus
+    post_init_ops = []
+    # Gather variables into same-var-different-device groups.
+    vars_by_suffix = dict()
+    for v in global_vars:
+      split_name = v.name.split('/')
+      mo = re.match(r'v(\d+)$', split_name[0])
+      if mo:
+        device_id = int(mo.group(1))
+        suffix = '/'.join(split_name[1:])
+        if suffix in vars_by_suffix.keys():
+          vars_by_suffix[suffix].append(v)
+        else:
+          vars_by_suffix[suffix] = [v]
+    # Generate broadcast ops for each such group.
+    for suffix in sorted(vars_by_suffix):
+      vlist = vars_by_suffix[suffix]
+      assert self._num_gpus == len(vlist)
+      devices = [v.device for v in vlist]
+      # NOTE: this key should generate the same value for all tasks
+      group_key = allreduce.collective_group_key(devices)
+      group_size = self._num_workers * len(devices)
+      instance_key = self._get_instance_key(suffix)
+      for v in vlist:
+        split_name = v.name.split('/')
+        mo = re.match(r'v(\d+)$', split_name[0])
+        if mo:
+          device_id = int(mo.group(1))
+          if (self._task_id == 0 and device_id == 0):
+            with tf.device(v.device):
+              bcast_send = allreduce.broadcast_send(
+                  v, v.shape, v.dtype, group_size, group_key, instance_key)
+              post_init_ops.append(v.assign(bcast_send))
+          else:
+            with tf.device(v.device):
+              bcast_recv = allreduce.broadcast_recv(
+                  v.shape, v.dtype, group_size, group_key, instance_key)
+              post_init_ops.append(v.assign(bcast_recv))
+    return post_init_ops
+  def savable_variables(self):
+    """Return the set of variables used for saving/loading the model."""
+    params = []
+    if self._task_id == 0:
+      for v in tf.global_variables():
+        split_name = v.name.split('/')
+        if split_name[0] == 'v0' or not v.name.startswith('v'):
+          params.append(v)
+    return params
+  def get_devices(self):
+    return self.benchmark_cnn.raw_devices
+class VariableMgrDistributedFetchFromPS(VariableMgr):
+  """Implements --variable_update=parameter_server mode for distributed jobs.
+     Variables are stored on a parameter server.  For each step, each tower gets
+     a copy of the variables from the parameter server, and sends its gradients
+     to the param server.
+  """
+  def each_tower_has_variables(self):
+    return False
+  def create_outer_variable_scope(self, device_num):
+    if self.benchmark_cnn.local_parameter_device_flag == 'gpu':
+      caching_devices = self.benchmark_cnn.raw_devices
+    else:
+      caching_devices = [self.benchmark_cnn.cpu_device]
+    custom_getter = variable_mgr_util.OverrideCachingDevice(
+        caching_devices, self.benchmark_cnn.cpu_device, 1024 * 64)
+    return tf.variable_scope(
+        'v', reuse=bool(device_num) or self._reuse_vars,
+        custom_getter=custom_getter, use_resource=self.use_resource_vars)
+  def preprocess_device_grads(self, device_grads):
+    # Returns (gradient_devices, gradient_state)
+    return ([self.benchmark_cnn.param_server_device], device_grads)
+  def get_gradients_to_apply(self, device_num, gradient_state):
+    assert device_num == 0
+    agg_grads, self.grad_has_inf_nan = (
+        variable_mgr_util.aggregate_gradients_using_copy(
+            gradient_state,
+            use_mean=True,
+            check_inf_nan=self.benchmark_cnn.enable_auto_loss_scale))
+    return agg_grads
+  def get_devices(self):
+    ps_strategy = variable_mgr_util.GreedyLoadBalancingStrategy(
+        self.benchmark_cnn.num_ps, variable_mgr_util.byte_size_load_fn)
+    return [
+        tf.train.replica_device_setter(
+            worker_device=d,
+            cluster=self.benchmark_cnn.cluster_manager.get_cluster_spec(),
+            ps_strategy=ps_strategy) for d in self.benchmark_cnn.raw_devices
+    ]
+class VariableMgrDistributedFetchFromStagedPS(
+    VariableMgrDistributedFetchFromPS):
+  """Extends VariableMgrDistributedFetchFromPS for --staged_vars."""
+  def __init__(self, benchmark_cnn):
+    super(VariableMgrDistributedFetchFromStagedPS, self).__init__(benchmark_cnn)
+    self.staging_vars_on_devices = [
+        dict() for _ in self.benchmark_cnn.raw_devices
+    ]
+    self.staged_vars_on_cpu = {}
+  def create_outer_variable_scope(self, device_num):
+    self._custom_getter = variable_mgr_util.StagedVariableGetter(
+        device_num, self.benchmark_cnn.raw_devices,
+        self.benchmark_cnn.cpu_device, self)
+    return tf.variable_scope(
+        'v', reuse=bool(device_num) or self._reuse_vars,
+        custom_getter=self._custom_getter, use_resource=self.use_resource_vars)
+  def supports_staged_vars(self):
+    return True
+  def trainable_variables_on_device(self,
+                                    rel_device_num,
+                                    abs_device_num,
+                                    writable=False):
+    return self._custom_getter.trainable_variables_on_device(
+        rel_device_num, abs_device_num, writable=writable)
+class VariableMgrDistributedReplicated(VariableMgr):
+  """VariableMgr that implements the --distributed_replicated mode.
+     Each GPU has a copy of the variables, and updates its copy after the
+     parameter servers are all updated with the gradients from all servers. Only
+     works with cross_replica_sync=true. Unlike 'replicated', does not use nccl
+     all-reduce for replicating within a server.
+  """
+  def each_tower_has_variables(self):
+    return True
+  def create_outer_variable_scope(self, device_num):
+    return tf.variable_scope(
+        'v%s' % device_num, reuse=self._reuse_vars,
+        custom_getter=variable_mgr_util.OverrideToLocalVariableIfNotPsVar(),
+        use_resource=self.use_resource_vars)
+  def preprocess_device_grads(self, device_grads):
+    return ([self.benchmark_cnn.param_server_device], device_grads)
+  def get_gradients_to_apply(self, device_num, gradient_state):
+    device_grads = gradient_state  # From 2nd result of preprocess_device_grads.
+    avg_grads, self.grad_has_inf_nan = (
+        variable_mgr_util.aggregate_gradients_using_copy_with_device_selection(
+            self.benchmark_cnn,
+            device_grads,
+            use_mean=True,
+            check_inf_nan=self.benchmark_cnn.enable_auto_loss_scale))
+    # Make shadow variable on a parameter server for each original trainable
+    # variable.
+    for i, (g, v) in enumerate(avg_grads):
+      my_name = variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/' + v.name
+      if my_name.endswith(':0'):
+        my_name = my_name[:-2]
+      new_v = tf.get_variable(
+          my_name,
+          dtype=v.dtype.base_dtype,
+          initializer=v.initial_value,
+          trainable=True)
+      avg_grads[i] = (g, new_v)
+    return avg_grads
+  def append_apply_gradients_ops(self, gradient_state, opt, grads, training_ops,
+                                 loss_scale_params):
+    device_grads = gradient_state  # From 2nd result of preprocess_device_grads.
+    def get_apply_gradients_ops_func():
+      """Returns a list of ops for updating gradients."""
+      apply_gradients_ops = []
+      # For each variable, apply the combined gradients for this server on
+      # the parameter server, and then wait for all other servers to do this.
+      for i, (g, v) in enumerate(grads):
+        apply_gradient_op = opt.apply_gradients([(g, v)])
+        barrier = self.benchmark_cnn.add_sync_queues_and_barrier(
+            'replicate_variable_%s' % i, [apply_gradient_op])
+        with tf.control_dependencies([barrier]):
+          with tf.device(self.benchmark_cnn.cpu_device):
+            updated_value = v.read_value()
+            for my_d in range(len(self.benchmark_cnn.devices)):
+              apply_gradients_ops.append(
+                  device_grads[my_d][i][1].assign(updated_value))
+      return apply_gradients_ops
+    variable_mgr_util.append_gradients_with_loss_scale(
+        training_ops, get_apply_gradients_ops_func, loss_scale_params,
+        self.grad_has_inf_nan)
+  def _strip_port(self, s):
+    if s.endswith(':0'):
+      return s[:-2]
+    return s
+  def get_post_init_ops(self):
+    # Copy initialized variables for variables on the parameter server
+    # to the local copy of the variable.
+    local_vars = tf.local_variables()
+    local_var_by_name = dict(
+        [(self._strip_port(v.name), v) for v in local_vars])
+    post_init_ops = []
+    for v in tf.global_variables():
+      if v.name.startswith(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/v0/'):
+        prefix = self._strip_port(
+            v.name[len(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/v0'):])
+        for i in range(self.benchmark_cnn.num_gpus):
+          name = 'v%s%s' % (i, prefix)
+          if name in local_var_by_name:
+            copy_to = local_var_by_name[name]
+            post_init_ops.append(copy_to.assign(v.read_value()))
+    return post_init_ops
+  def _remove_shadow_var_prefix_if_present(self, var_name):
+    if var_name.startswith(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/'):
+      return var_name[len(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/'):]
+    else:
+      return var_name
+  def var_dict_name(self, v):
+    return self._strip_port(self._remove_shadow_var_prefix_if_present(v.name))
+  def savable_variables(self):
+    """Returns a list/dict of savable variables to pass to tf.train.Saver."""
+    params = {}
+    for v in tf.global_variables():
+      assert (v.name.startswith(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/v0/')
+              or v.name in ('global_step:0', 'loss_scale:0',
+                            'loss_scale_normal_steps:0')), (
+                                'Invalid global variable: %s' % v)
+      # We store variables in the checkpoint with the shadow variable prefix
+      # removed so we can evaluate checkpoints in non-distributed replicated
+      # mode. The checkpoints can also be loaded for training in
+      # distributed_replicated mode.
+      name = self._strip_port(self._remove_shadow_var_prefix_if_present(v.name))
+      params[name] = v
+    for v in tf.local_variables():
+      # Non-trainable variables, such as batch norm moving averages, do not have
+      # corresponding global shadow variables, so we add them here. Trainable
+      # local variables have corresponding global shadow variables, which were
+      # added in the global variable loop above.
+      if v.name.startswith('v0/') and v not in tf.trainable_variables():
+        params[self._strip_port(v.name)] = v
+    return params
+  def get_devices(self):
+    return self.benchmark_cnn.raw_devices
--- a/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/variable_mgr_util.py
+++ b/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/variable_mgr_util.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for VariableMgr."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import collections as pycoll
+import operator
+import numpy as np
+import tensorflow.compat.v1 as tf
+# pylint: disable=g-direct-tensorflow-import
+from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import math_ops
+PS_SHADOW_VAR_PREFIX = 'ps_var'
+AutoLossScaleParams = pycoll.namedtuple(
+    'AutoLossScaleParams',
+    [
+        # If true, enable automatic loss scaling.
+        'enable_auto_loss_scale',
+        # The value to scale the loss before computing gradients.
+        'loss_scale',
+        # Number of normal steps with the current `loss_scale`.
+        'loss_scale_normal_steps',
+        # Increase loss scale every n steps.
+        'inc_loss_scale_every_n',
+        # If true, the current worker is chief. The current implementation
+        # relies on the chief to update loss_scale value, but in future, we
+        # might change this to ask the parameter server to update loss_scales
+        # for better performance.
+        # TODO(tanmingxing): remove this if loss_scale is updated in ps.
+        'is_chief',
+    ])
+def get_loss_scale_update_op(loss_scale, loss_scale_normal_steps,
+                             inc_loss_scale_every_n):
+  """Returns the update op for loss scaling variables.
+  We maintain the counter `loss_scale_normal_steps` to count the number of steps
+  we have been using the current `loss_scale`. In most cases, this function
+  increments `loss_scale_normal_steps`. However, if `loss_scale_normal_steps` is
+  greater than the threshold `inc_loss_scale_every_n`, we double `loss_scale`
+  and reset `loss_scale_normal_steps` to zero.
+  This op is only called if the gradients don't have any infs or nans. Instead,
+  if infs or nans occur in the gradients, we immeditately halve `loss_scale` and
+  reset `loss_scale_normal_steps` to zero.
+  Args:
+    loss_scale: a tf.Variable represneting the loss_scale value.
+    loss_scale_normal_steps: a tf.Variable representing the number of training
+      steps that have run since the loss_scale last changed.
+    inc_loss_scale_every_n: a Python integer threshold. `loss_scale` is
+      increased every `inc_loss_scale_every_n` steps, unless the gradients have
+      infs or nans.
+  Returns:
+    An op for updating `loss_scale` and `loss_scale_normal_steps`.
+  """
+  def increment_loss_scale_normal_steps_func():
+    return tf.group(loss_scale_normal_steps.assign_add(1))
+  def increase_loss_scale_func():
+    return tf.group(
+        tf.assign(loss_scale_normal_steps, 0),
+        tf.assign(loss_scale, loss_scale * 2))
+  # true_fn and false_fn must have the same type.
+  return tf.cond(loss_scale_normal_steps < inc_loss_scale_every_n,
+                 increment_loss_scale_normal_steps_func,
+                 increase_loss_scale_func)
+def append_gradients_with_loss_scale(training_ops, get_apply_gradients_ops_func,
+                                     loss_scale_params, grad_has_inf_nan):
+  """Selectively appends gradients update ops with loss scaling.
+  Args:
+    training_ops: a list of training ops to be executed.
+    get_apply_gradients_ops_func: a function that returns a list of ops for
+      applying gradients. Here, we must pass a function instead of the actual
+      list of ops; otherwise, those ops would be executed unconditionally due to
+      the semantics of tf.cond.
+    loss_scale_params: An AutoLossScaleParams tuple.
+    grad_has_inf_nan: Boolean tensor indicating whether the gradients have infs
+      or nans.
+  """
+  is_chief = loss_scale_params.is_chief
+  loss_scale = loss_scale_params.loss_scale
+  loss_scale_normal_steps = loss_scale_params.loss_scale_normal_steps
+  inc_loss_scale_every_n = loss_scale_params.inc_loss_scale_every_n
+  enable_auto_loss_scale = loss_scale_params.enable_auto_loss_scale
+  if loss_scale is None or not enable_auto_loss_scale or not is_chief:
+    training_ops.extend(get_apply_gradients_ops_func())
+  else:
+    # If nans/infs occurred, skip applying gradients and instead update
+    # loss_scale (halve loss_scale and reset loss_scale_normal_steps to zero).
+    def update_op_if_nan_or_inf():
+      """Update loss_scale and discard gradients if nans/infs occurred."""
+      return tf.group(
+          tf.assign(loss_scale, loss_scale / 2.),
+          tf.assign(loss_scale_normal_steps, 0))
+    # Otherwise, apply gradients, and update loss_scale and
+    # loss_scale_normal_steps.
+    def update_op_if_no_nan_or_inf():
+      """Apply gradients, and update loss scaling."""
+      return tf.group(
+          get_loss_scale_update_op(loss_scale, loss_scale_normal_steps,
+                                   inc_loss_scale_every_n),
+          *get_apply_gradients_ops_func())
+    # TODO(tanmingxing): Add support for independent and distributed all_reduce.
+    assert grad_has_inf_nan is not None
+    update_op = tf.cond(
+        grad_has_inf_nan,
+        update_op_if_nan_or_inf,
+        update_op_if_no_nan_or_inf,
+        name='cond_if_grad_has_inf_nan'
+    )
+    training_ops.append(update_op)
+# To be used with custom_getter on tf.get_variable.
+class OverrideCachingDevice(object):
+  """Variable getter which caches variables on the least loaded device.
+  Variables smaller than a certain threshold are cached on a single specific
+  device, as specified in the constructor. All other variables are load balanced
+  across a pool of devices, by caching each variable on the least loaded device.
+  Note that variable creation only happen when building the model graph on the
+  first device (see how it sets the 'reuse' parameter in
+  VariableMgr.*.create_outer_variable_scope()). That means, for all other
+  devices, the variable scope will reuse the variables created before, which
+  requires that we set the caching_device correctly as otherwise it may not be
+  able to find the previously created variable and will create a new one. This
+  requires when building the model graph on different devices, variables with
+  the same name should have same size.
+  TODO(laigd): consider adding tests or verification logic to enforce this, or
+  refactor it.
+  """
+  def __init__(self, devices, device_for_small_variables,
+               small_variable_size_threshold):
+    self.devices = devices
+    self.sizes = [0] * len(self.devices)
+    self.device_for_small_variables = device_for_small_variables
+    self.small_variable_size_threshold = small_variable_size_threshold
+  def __call__(self, getter, *args, **kwargs):
+    size = tf.TensorShape(kwargs['shape']).num_elements()
+    if size < self.small_variable_size_threshold:
+      device_name = self.device_for_small_variables
+    else:
+      device_index, _ = min(enumerate(self.sizes), key=operator.itemgetter(1))
+      device_name = self.devices[device_index]
+      self.sizes[device_index] += size
+    kwargs['caching_device'] = device_name
+    var = getter(*args, **kwargs)
+    return var
+# To be used with custom_getter on tf.get_variable. Ensures the created variable
+# is in LOCAL_VARIABLES and not GLOBAL_VARIBLES collection.
+class OverrideToLocalVariableIfNotPsVar(object):
+  # args and kwargs come from the custom_getter interface for Tensorflow
+  # variables, and matches tf.get_variable's signature, with the addition of
+  # 'getter' at the beginning.
+  def __call__(self, getter, name, *args, **kwargs):
+    if name.startswith(PS_SHADOW_VAR_PREFIX):
+      return getter(*args, **kwargs)
+    if 'collections' in kwargs:
+      collections = kwargs['collections']
+    if not collections:
+      collections = [tf.GraphKeys.GLOBAL_VARIABLES]
+    else:
+      collections = collections[:]
+    collections.remove(tf.GraphKeys.GLOBAL_VARIABLES)
+    collections.append(tf.GraphKeys.LOCAL_VARIABLES)
+    kwargs['collections'] = list(collections)
+    return getter(name, *args, **kwargs)
+class ParamServerDeviceSetter(object):
+  """Helper class to assign variables on the least loaded ps-device."""
+  def __init__(self, worker_device, ps_devices):
+    """Initializer for ParamServerDevicSetter.
+    Args:
+      worker_device: the device to use for computer ops.
+      ps_devices: a list of device to use for Variable ops. Each variable is
+      assigned to the least loaded device.
+    """
+    self.ps_devices = ps_devices
+    self.worker_device = worker_device
+    self.ps_sizes = [0] * len(self.ps_devices)
+  def __call__(self, op):
+    if op.device:
+      return op.device
+    if op.type not in ['Variable', 'VariableV2']:
+      return self.worker_device
+    device_index, _ = min(enumerate(self.ps_sizes), key=operator.itemgetter(1))
+    device_name = self.ps_devices[device_index]
+    var_size = op.outputs[0].get_shape().num_elements()
+    self.ps_sizes[device_index] += var_size
+    return device_name
+class StagedModelVariable(object):
+  """Staging variable wrapper that decouples reads and updates.
+  This class represents a variable through a staging buffer. Reads from this
+  variable directly gets from the staging buffer. Updates are stacked into
+  another staging buffer, and will be processed later.
+  """
+  def __init__(self, real_var, var_stage_get, variable_mgr):
+    """Initializer for the model variables through a staging buffer.
+    Args:
+      real_var: the underlying real variable.
+      var_stage_get: the read op from the staging buffer.
+      variable_mgr: the parent variable-manager.
+    """
+    self.real_var = real_var
+    self.var_stage_get = var_stage_get
+    self.variable_mgr = variable_mgr
+  def _value(self):
+    """The read access of this variable. The content from the staging buffer."""
+    return self.var_stage_get
+  def _ref(self):
+    """Return the underlying variable ref, required by tf.colocate_with."""
+    return self.real_var._ref()  # pylint: disable=protected-access
+  def read_value(self):
+    """Mimics tf.Variable.read_value()."""
+    return tf.identity(self.var_stage_get, name='read')
+  @property
+  def dtype(self):
+    """Return the non-reference dtype."""
+    return self.var_stage_get.dtype
+  def assign_sub(self, delta, name=None, read_value=True):
+    """Mimic the updates to the variable.
+    Args:
+      delta: is pushed into a staging buffer and will be pumped later.
+      name: currently ignored; names of ops and the StagingArea are
+            computed without using this pass name.
+      read_value: if True, will return something which evaluates to the new
+              value of the variable; if False will return the assign op.
+    Returns:
+      The actual updates. The colocation constraint will be reapplied.
+    """
+    # This parameter is ignored: the StagingArea only supports setting
+    # the shared name, not the names of individual ops it uses.
+    del name
+    # colocate_with(None, True) clears the colocation constraints.
+    # Push the delta into a staging buffer.
+    with ops.colocate_with(None, True), tf.device(self.var_stage_get.device):
+      delta_staging_area = data_flow_ops.StagingArea(
+          [self.var_stage_get.dtype], shapes=[self.var_stage_get.shape])
+      delta_put_op = delta_staging_area.put([delta])
+      self.variable_mgr.staging_delta_ops.append(delta_put_op)
+      delta_get_op = delta_staging_area.get()[0]
+    # Return the actual updates. The colocation constraint will be reapplied.
+    return self.real_var.assign_sub(delta_get_op, read_value=read_value)
+  @staticmethod
+  # pylint: disable=bad-staticmethod-argument,invalid-name
+  def _TensorConversionFunction(self, dtype=None, name=None, as_ref=False):
+    """Utility function for converting a StagedModelVariable to a Tensor."""
+    del dtype, name  # unused: this function returns the cached ref or value.
+    if as_ref:
+      return self._ref()
+    else:
+      return self._value()
+ops.register_tensor_conversion_function(
+    StagedModelVariable, StagedModelVariable._TensorConversionFunction)  # pylint: disable=protected-access
+class StagedVariableGetter(object):
+  """A variable getter through staging buffers on devices.
+  Instead of a caching device, this getter tracks where the variable is used.
+  And on each device, it goes through a staging buffer.
+  """
+  def __init__(self, device_num, devices, cpu_device, variable_mgr):
+    """Initializer for StagedVariableGetter.
+    Args:
+      device_num: the current device index.
+      devices: a list of all the devices to build towers.
+      cpu_device: a cpu_device for this replica. If None, no cpu-caching is
+          done.
+      variable_mgr: the parent variable manager.
+    """
+    self.device_num = device_num
+    self.devices = devices
+    self.cpu_device = cpu_device
+    self.variable_mgr = variable_mgr
+  def __call__(self, getter, name, *args, **kwargs):
+    staging_ops = self.variable_mgr.staging_vars_on_devices[self.device_num]
+    if name in staging_ops:
+      put_op, get_op = staging_ops[name]
+      return get_op
+    real_var = getter(name, *args, **kwargs)
+    shape = kwargs['shape']
+    dtype = kwargs['dtype']
+    trainable = kwargs['trainable']
+    if self.cpu_device:
+      with tf.device(self.cpu_device):
+        # This helps copying the weights from the parameter to this server only
+        # once.
+        if name in self.variable_mgr.staged_vars_on_cpu:
+          cpu_var = self.variable_mgr.staged_vars_on_cpu[name]
+        else:
+          cpu_var = tf.identity(real_var)
+          self.variable_mgr.staged_vars_on_cpu[name] = cpu_var
+      var_to_stage = cpu_var
+    else:
+      var_to_stage = tf.identity(real_var)  # de-reference the variable.
+    with tf.device(self.devices[self.device_num]):
+      staging_area = data_flow_ops.StagingArea([dtype], shapes=[shape])
+      put_op = staging_area.put([var_to_stage])
+      get_op = staging_area.get()[0]
+      staging_ops[name] = (put_op, get_op)
+    if trainable:
+      # For trainable variables, they are managed separatedly through
+      # apply_gradients.
+      return get_op
+    else:
+      # For other shadow variables, the access is decoupled through a wrapper
+      # class.
+      return StagedModelVariable(real_var, get_op, self.variable_mgr)
+  def trainable_variables_on_device(self, rel_device_num, abs_device_num,
+                                    writable):
+    """Return the set of trainable variables on the specified device.
+    Args:
+      rel_device_num: local worker device index.
+      abs_device_num: global graph device index.
+      writable: whether the returned variables is writable or read-only.
+    Returns:
+      Return the set of trainable variables on the specified device.
+    """
+    del abs_device_num
+    params_refs = tf.trainable_variables()
+    if writable:
+      return params_refs
+    params = []
+    for param in params_refs:
+      var_name = param.name.split(':')[0]
+      _, var_get_op = self.variable_mgr.staging_vars_on_devices[rel_device_num][
+          var_name]
+      params.append(var_get_op)
+    return params
+def aggregate_gradients_using_copy_with_device_selection(
+    benchmark_cnn, tower_grads, use_mean, check_inf_nan):
+  """Aggregate gradients, controlling device for the aggregation.
+  Args:
+    benchmark_cnn: benchmark_cnn class.
+    tower_grads: List of lists of (gradient, variable) tuples. The outer list
+      is over towers. The inner list is over individual gradients.
+    use_mean: if True, mean is taken, else sum of gradients is taken.
+    check_inf_nan: If true, check grads for nans and infs.
+  Returns:
+    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
+      gradient has been averaged across all towers. The variable is chosen from
+      the first tower. The has_nan_or_inf indicates the grads has nan or inf.
+  """
+  if benchmark_cnn.local_parameter_device_flag == 'gpu':
+    avail_devices = benchmark_cnn.raw_devices
+  else:
+    avail_devices = [benchmark_cnn.param_server_device]
+  agg_grads = []
+  has_nan_or_inf_list = []
+  for i, single_grads in enumerate(zip(*tower_grads)):
+    with tf.device(avail_devices[i % len(avail_devices)]):
+      grad_and_var, has_nan_or_inf = aggregate_single_gradient_using_copy(
+          single_grads, use_mean, check_inf_nan)
+      agg_grads.append(grad_and_var)
+      has_nan_or_inf_list.append(has_nan_or_inf)
+  if check_inf_nan:
+    return agg_grads, tf.reduce_any(has_nan_or_inf_list)
+  else:
+    return agg_grads, None
+def aggregate_gradients_using_copy_with_variable_colocation(
+    tower_grads, use_mean, check_inf_nan):
+  """Aggregate gradients, colocating computation with the gradient's variable.
+  Args:
+    tower_grads: List of lists of (gradient, variable) tuples. The outer list
+      is over towers. The inner list is over individual gradients. All variables
+      of the same gradient across towers must be the same (that is,
+      tower_grads[x][a][1] == tower_grads[y][a][1] for all indices x, y, and a)
+    use_mean: if True, mean is taken, else sum of gradients is taken.
+    check_inf_nan: If true, check grads for nans and infs.
+  Returns:
+    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
+      gradient has been averaged across all towers. The variable is chosen from
+      the first tower. The has_nan_or_inf indicates the grads has nan or inf.
+  """
+  agg_grads = []
+  has_nan_or_inf_list = []
+  for single_grads in zip(*tower_grads):
+    # Note that each single_grads looks like the following:
+    #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+    var = single_grads[0][1]
+    for _, v in single_grads:
+      assert v == var
+    with tf.device(var.device):
+      grad_and_var, has_nan_or_inf = aggregate_single_gradient_using_copy(
+          single_grads, use_mean, check_inf_nan)
+      agg_grads.append(grad_and_var)
+      has_nan_or_inf_list.append(has_nan_or_inf)
+  if check_inf_nan:
+    return agg_grads, tf.reduce_any(has_nan_or_inf_list)
+  else:
+    return agg_grads, None
+def aggregate_gradients_using_copy(tower_grads, use_mean, check_inf_nan):
+  """Calculate the average gradient for each shared variable across all towers.
+  Note that this function provides a synchronization point across all towers.
+  Args:
+    tower_grads: List of lists of (gradient, variable) tuples. The outer list
+      is over towers. The inner list is over individual gradients.
+    use_mean: if True, mean is taken, else sum of gradients is taken.
+    check_inf_nan: check grads for nans and infs.
+  Returns:
+    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
+      gradient has been averaged across all towers. The variable is chosen from
+      the first tower. The has_nan_or_inf indicates the grads has nan or inf.
+  """
+  agg_grads = []
+  has_nan_or_inf_list = []
+  for single_grads in zip(*tower_grads):
+    grad_and_var, has_nan_or_inf = aggregate_single_gradient_using_copy(
+        single_grads, use_mean, check_inf_nan)
+    agg_grads.append(grad_and_var)
+    has_nan_or_inf_list.append(has_nan_or_inf)
+  if check_inf_nan:
+    return agg_grads, tf.reduce_any(has_nan_or_inf_list)
+  else:
+    return agg_grads, None
+# The following two functions are copied from
+# tensorflow/python/eager/backprop.py. We do not directly use them as they are
+# not exported and subject to change at any time.
+def flatten_nested_indexed_slices(grad):
+  assert isinstance(grad, ops.IndexedSlices)
+  if isinstance(grad.values, ops.Tensor):
+    return grad
+  else:
+    assert isinstance(grad.values, ops.IndexedSlices)
+    g = flatten_nested_indexed_slices(grad.values)
+    return ops.IndexedSlices(g.values, array_ops.gather(grad.indices,
+                                                        g.indices),
+                             g.dense_shape)
+def aggregate_indexed_slices_gradients(grads):
+  """Aggregates gradients containing `IndexedSlices`s."""
+  if len(grads) < 1:
+    return None
+  elif len(grads) == 1:
+    return grads[0]
+  else:
+    grads = [g for g in grads if g is not None]
+    # If any gradient is a `Tensor`, sum them up and return a dense tensor
+    # object.
+    if any(isinstance(g, ops.Tensor) for g in grads):
+      return math_ops.add_n(grads)
+    # The following `_as_indexed_slices_list` casts ids of IndexedSlices into
+    # int64. It is to make sure the inputs of `concat` all have same the data
+    # type.
+    grads = math_ops._as_indexed_slices_list(grads)  # pylint: disable=protected-access
+    grads = [flatten_nested_indexed_slices(x) for x in grads]
+    # Form IndexedSlices out of the concatenated values and indices.
+    concat_grad = ops.IndexedSlices(
+        array_ops.concat([x.values for x in grads], axis=0),
+        array_ops.concat([x.indices for x in grads], axis=0),
+        grads[0].dense_shape)
+    return concat_grad
+def aggregate_single_gradient_using_copy(grad_and_vars, use_mean,
+                                         check_inf_nan):
+  """Calculate the average gradient for a shared variable across all towers.
+  Note that this function provides a synchronization point across all towers.
+  Args:
+    grad_and_vars: A list or tuple of (gradient, variable) tuples. Each
+      (gradient, variable) pair within the outer list represents the gradient
+      of the variable calculated for a single tower, and the number of pairs
+      equals the number of towers.
+    use_mean: if True, mean is taken, else sum of gradients is taken.
+    check_inf_nan: check grads for nans and infs.
+  Returns:
+    The tuple ([(average_gradient, variable),], has_nan_or_inf) where the
+      gradient has been averaged across all towers. The variable is chosen from
+      the first tower. The has_nan_or_inf indicates the grads has nan or inf.
+  """
+  grads = [g for g, _ in grad_and_vars]
+  if any(isinstance(g, tf.IndexedSlices) for g in grads):
+    # TODO(reedwm): All-reduce IndexedSlices more effectively.
+    grad = aggregate_indexed_slices_gradients(grads)
+  else:
+    grad = tf.add_n(grads)
+  if use_mean and len(grads) > 1:
+    grad = tf.scalar_mul(1.0 / len(grads), grad)
+  v = grad_and_vars[0][1]
+  if check_inf_nan:
+    with tf.name_scope('check_for_inf_and_nan'):
+      has_nan_or_inf = tf.logical_not(tf.reduce_all(tf.is_finite(grads)))
+    return (grad, v), has_nan_or_inf
+  else:
+    return (grad, v), None
+# This class is copied from
+# https://github.com/tensorflow/tensorflow/blob/590d6eef7e91a6a7392c8ffffb7b58f2e0c8bc6b/tensorflow/contrib/training/python/training/device_setter.py#L56.
+# We copy it since contrib has been removed from TensorFlow.
+class GreedyLoadBalancingStrategy(object):
+  """Returns the least-loaded ps task for op placement.
+  The load is calculated by a user-specified load function passed in at
+  construction.  There are no units for load, and the load function is
+  responsible for providing an internally consistent measure.
+  Note that this strategy is very sensitive to the exact order in which
+  ps ops (typically variables) are created, as it greedily places ops
+  on the least-loaded ps at the point each op is processed.
+  One reasonable heuristic is the `byte_size_load_fn`, which
+  estimates load as the number of bytes that would be used to store and
+  transmit the entire variable.  More advanced load functions
+  could consider the difference in access patterns across ops, or trade
+  off CPU-intensive ops with RAM-intensive ops with network bandwidth.
+  This class is intended to be used as a `ps_strategy` in
+  `tf.compat.v1.train.replica_device_setter`.
+  """
+  def __init__(self, num_tasks, load_fn):
+    """Create a new `LoadBalancingStrategy`.
+    Args:
+      num_tasks: Number of ps tasks to cycle among.
+      load_fn: A callable that takes an `Operation` and returns a
+        numeric load value for that op.
+    """
+    self._num_tasks = num_tasks
+    self._load_fn = load_fn
+    self._ps_loads = np.zeros(num_tasks)
+  def __call__(self, op):
+    """Choose a ps task index for the given `Operation`.
+    Args:
+      op: A `Operation` to be placed on ps.
+    Returns:
+      The next ps task index to use for the `Operation`. Greedily
+      places the op on the least-loaded ps task so far, as determined
+      by the load function.
+    """
+    task = np.argmin(self._ps_loads)
+    self._ps_loads[task] += self._load_fn(op)
+    return task
+# This function is copied from
+# https://github.com/tensorflow/tensorflow/blob/590d6eef7e91a6a7392c8ffffb7b58f2e0c8bc6b/tensorflow/contrib/training/python/training/device_setter.py#L105.
+# We copy it since contrib has been removed from TensorFlow.
+def byte_size_load_fn(op):
+  """Load function that computes the byte size of a single-output `Operation`.
+  This is intended to be used with `"Variable"` ops, which have a single
+  `Tensor` output with the contents of the variable.  However, it can also be
+  used for calculating the size of any op that has a single output.
+  Intended to be used with `GreedyLoadBalancingStrategy`.
+  Args:
+    op: An `Operation` with a single output, typically a "Variable" op.
+  Returns:
+    The number of bytes in the output `Tensor`.
+  Raises:
+    ValueError: if `op` does not have a single output, or if the shape of the
+      single output is not fully-defined.
+  """
+  if len(op.outputs) != 1:
+    raise ValueError('Op %s must have a single output' % op)
+  output = op.outputs[0]
+  elem_size = output.dtype.size
+  shape = output.get_shape()
+  if not shape.is_fully_defined():
+    # Due to legacy behavior, scalar "Variable" ops have output Tensors that
+    # have unknown shape when the op is created (and hence passed to this
+    # load function for placement), even though the scalar shape is set
+    # explicitly immediately afterward.
+    shape = tensor_shape.TensorShape(op.get_attr('shape'))
+  shape.assert_is_fully_defined()
+  return shape.num_elements() * elem_size
--- a/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/variable_mgr_util_test.py
+++ b/TensorFlow/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/variable_mgr_util_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for variable_mgr_util."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow.compat.v1 as tf
+import variable_mgr_util
+class VariableMgrUtilTest(tf.test.TestCase):
+  def testGetLossScaleUpdateOpTruePath(self):
+    loss_scale = tf.Variable(4)
+    # loss_scale_normal_steps >= inc_loss_scale_every_n
+    loss_scale_normal_steps = tf.Variable(10)
+    inc_loss_scale_every_n = 10
+    update_op = variable_mgr_util.get_loss_scale_update_op(
+        loss_scale, loss_scale_normal_steps, inc_loss_scale_every_n)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(update_op)
+      self.assertEqual(sess.run(loss_scale), 8)
+      self.assertEqual(sess.run(loss_scale_normal_steps), 0)
+  def testGetLossScaleUpdateOpFalsePath(self):
+    loss_scale = tf.Variable(4)
+    # loss_scale_normal_steps < inc_loss_scale_every_n
+    loss_scale_normal_steps = tf.Variable(9)
+    inc_loss_scale_every_n = 10
+    update_op = variable_mgr_util.get_loss_scale_update_op(
+        loss_scale, loss_scale_normal_steps, inc_loss_scale_every_n)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(update_op)
+      self.assertEqual(sess.run(loss_scale), 4)
+      self.assertEqual(sess.run(loss_scale_normal_steps), 10)
+  def testAppendGradientsWithLossScaleWithAutoScaleDisabled(self):
+    v = tf.Variable(0)
+    training_ops = []
+    get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
+    loss_scale_params = variable_mgr_util.AutoLossScaleParams(
+        enable_auto_loss_scale=False,  # no auto loss scale.
+        loss_scale=tf.Variable(4),
+        loss_scale_normal_steps=tf.Variable(10),
+        inc_loss_scale_every_n=10,
+        is_chief=True)
+    variable_mgr_util.append_gradients_with_loss_scale(
+        training_ops,
+        get_apply_gradients_ops_func,
+        loss_scale_params,
+        grad_has_inf_nan=True)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(training_ops)
+      self.assertEqual(sess.run(v), 1)
+      self.assertEqual(sess.run(loss_scale_params.loss_scale), 4)
+      self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 10)
+  def testAppendGradientsWithLossScaleForNonChiefWorker(self):
+    v = tf.Variable(0)
+    training_ops = []
+    get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
+    loss_scale_params = variable_mgr_util.AutoLossScaleParams(
+        enable_auto_loss_scale=True,
+        loss_scale=tf.Variable(4),
+        loss_scale_normal_steps=tf.Variable(10),
+        inc_loss_scale_every_n=10,
+        is_chief=False)  # Non-chief
+    variable_mgr_util.append_gradients_with_loss_scale(
+        training_ops,
+        get_apply_gradients_ops_func,
+        loss_scale_params,
+        grad_has_inf_nan=False)
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(training_ops)
+      self.assertEqual(sess.run(v), 1)
+      self.assertEqual(sess.run(loss_scale_params.loss_scale), 4)
+      self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 10)
+  def testAppendGradientsWithLossScaleWithoutNan(self):
+    v = tf.Variable(0)
+    training_ops = []
+    get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
+    loss_scale_params = variable_mgr_util.AutoLossScaleParams(
+        enable_auto_loss_scale=True,
+        loss_scale=tf.Variable(4, dtype=tf.float32),
+        loss_scale_normal_steps=tf.Variable(10),
+        inc_loss_scale_every_n=10,
+        is_chief=True)
+    variable_mgr_util.append_gradients_with_loss_scale(
+        training_ops,
+        get_apply_gradients_ops_func,
+        loss_scale_params,
+        grad_has_inf_nan=tf.constant(False))
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(training_ops)
+      self.assertEqual(sess.run(v), 1)
+      self.assertEqual(sess.run(loss_scale_params.loss_scale), 8)
+      self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0)
+  def testAppendGradientsWithLossScaleWithtNan(self):
+    v = tf.Variable(0)
+    training_ops = []
+    get_apply_gradients_ops_func = lambda: [tf.assign(v, v + 1)]
+    loss_scale_params = variable_mgr_util.AutoLossScaleParams(
+        enable_auto_loss_scale=True,
+        loss_scale=tf.Variable(4, dtype=tf.float32),
+        loss_scale_normal_steps=tf.Variable(10),
+        inc_loss_scale_every_n=10,
+        is_chief=True)
+    variable_mgr_util.append_gradients_with_loss_scale(
+        training_ops,
+        get_apply_gradients_ops_func,
+        loss_scale_params,
+        grad_has_inf_nan=tf.constant(True))
+    with self.test_session() as sess:
+      sess.run(tf.global_variables_initializer())
+      sess.run(training_ops)
+      self.assertEqual(sess.run(v), 0)  # Skip updating for v.
+      # halve loss_scale and reset local_scale_normal_steps.
+      self.assertEqual(sess.run(loss_scale_params.loss_scale), 2)
+      self.assertEqual(sess.run(loss_scale_params.loss_scale_normal_steps), 0)
+if __name__ == '__main__':
+  tf.disable_v2_behavior()
+  tf.test.main()
--- a/TensorFlow/ComputeVision/Classification/scripts-run/single_process.sh
+++ b/TensorFlow/ComputeVision/Classification/scripts-run/single_process.sh
+#!/bin/bash
+lrank=$OMPI_COMM_WORLD_LOCAL_RANK
+APP=" python3 ./benchmarks-master/scripts/tf_cnn_benchmarks/tf_cnn_benchmarks.py --data_format=NCHW --batch_size=128 --model=resnet50  --optimizer=momentum --variable_update=horovod  --print_training_accuracy=true  --eval_during_training_every_n_epochs=1  --nodistortions --num_gpus=1 --num_epochs=90 --weight_decay=1e-4 --data_dir=$data_dir_path   --use_fp16=False --data_name=imagenet --train_dir=$save_checkpoint_path
+case ${lrank} in
+[0])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_0:1
+  export UCX_IB_PCI_BW=mlx5_0:50Gbs
+  echo numactl --cpunodebind=0 --membind=0 ${APP}
+  numactl --cpunodebind=0 --membind=0 ${APP}
+  ;;
+[1])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_1:1
+  export UCX_IB_PCI_BW=mlx5_1:50Gbs
+  echo numactl --cpunodebind=1 --membind=1 ${APP}
+  numactl --cpunodebind=1 --membind=1 ${APP}
+  ;;
+[2])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_2:1
+  export UCX_IB_PCI_BW=mlx5_2:50Gbs
+  echo numactl --cpunodebind=2 --membind=2 ${APP}
+  numactl --cpunodebind=2 --membind=2 ${APP}
+  ;;
+[3])
+  export HIP_VISIBLE_DEVICES=0,1,2,3
+  export UCX_NET_DEVICES=mlx5_3:1
+  export UCX_IB_PCI_BW=mlx5_3:50Gbs
+  echo numactl --cpunodebind=3 --membind=3 ${APP}
+  numactl --cpunodebind=3 --membind=3 ${APP}
+  ;;
+esac
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/.gitignore
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/.gitignore
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*,cover
+.hypothesis/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# IPython Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# mypy
+.mypy_cache
+# celery beat schedule file
+celerybeat-schedule
+# dotenv
+.env
+# virtualenv
+venv/
+ENV/
+# Spyder project settings
+.spyderproject
+# Rope project settings
+.ropeproject
+# PyCharm
+.idea/
+# For mac
+.DS_Store
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/AUTHORS
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/AUTHORS
+# This is the official list of authors for copyright purposes.
+# This file is distinct from the CONTRIBUTORS files.
+# See the latter for an explanation.
+# Names should be added to this file as:
+# Name or Organization <email address>
+# The email address is not required for organizations.
+Google Inc.
+David Dao <daviddao@broad.mit.edu>
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/CODEOWNERS
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/CODEOWNERS
+* @tensorflow/tf-model-garden-team
+/official/ @rachellj218 @saberkun @jaeyounkim
+/official/nlp/ @saberkun @lehougoogle @rachellj218 @jaeyounkim
+/official/recommendation/ranking/ @gagika
+/official/vision/ @xianzhidu @yeqingli @arashwan @saberkun @rachellj218 @jaeyounkim
+/official/vision/beta/projects/assemblenet/ @mryoo @yeqingli
+/official/vision/beta/projects/deepmac_maskrcnn/ @vighneshbirodkar
+/official/vision/beta/projects/movinet/ @hyperparticle @yuanliangzhe @yeqingli
+/official/vision/beta/projects/simclr/ @luotigerlsx @chentingpc @saxenasaurabh
+/official/vision/beta/projects/video_ssl/ @richardaecn @yeqingli
+/research/adversarial_text/ @rsepassi @a-dai
+/research/attention_ocr/ @xavigibert
+/research/audioset/ @plakal @dpwe
+/research/autoaugment/ @barretzoph
+/research/cognitive_planning/ @s-gupta
+/research/cvt_text/ @clarkkev @lmthang
+/research/deep_speech/ @yhliang2018
+/research/deeplab/ @aquariusjay @yknzhu
+/research/delf/ @andrefaraujo
+/research/efficient-hrl/ @ofirnachum
+/research/lfads/ @jazcollins @sussillo
+/research/lstm_object_detection/ @yinxiaoli @yongzhe2160
+/research/marco/ @vincentvanhoucke
+/research/object_detection/ @jch1 @tombstone @pkulzc
+/research/pcl_rl/ @ofirnachum
+/research/rebar/ @gjtucker
+/research/seq_flow_lite/ @thunderfyc
+/research/slim/ @sguada @marksandler2
+/research/vid2depth/ @rezama
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/CONTRIBUTING.md
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/CONTRIBUTING.md
+# How to contribute
+![Contributors](https://img.shields.io/github/contributors/tensorflow/models)
+We encourage you to contribute to the TensorFlow Model Garden.
+Please read our [guidelines](../../wiki/How-to-contribute) for details.
+**NOTE**: Only [code owners](./CODEOWNERS) are allowed to merge a pull request.
+Please contact the code owners of each model to merge your pull request.
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/ISSUES.md
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/ISSUES.md
+# If you open a GitHub issue, here is our policy.
+* It must be a **bug**, a **feature request**, or a significant problem
+with **documentation**.
+  * Please send a pull request instead for small documentation fixes.
+* The required form must be filled out.
+* The issue should be related to the repository it is created in.
+General help and support should be sought on [Stack Overflow](https://stackoverflow.com/questions/tagged/tensorflow-model-garden) or other non-GitHub channels.
+[![](https://img.shields.io/stackexchange/stackoverflow/t/tensorflow-model-garden)](https://stackoverflow.com/questions/tagged/tensorflow-model-garden)
+TensorFlow developers respond to issues.
+We want to focus on work that benefits the whole community such as fixing bugs
+and adding new features.
+It helps us to address bugs and feature requests in a timely manner.
+--- 
+Please understand that research models in the [research directory](https://github.com/tensorflow/models/tree/master/research)
+included in this repository are experimental and research-style code.
+They are not officially supported by the TensorFlow team.
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/LICENSE
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/LICENSE
+Copyright 2016 The TensorFlow Authors.  All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2016, The Authors.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/README.md
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/README.md
+# 介绍
+本测试用例用于图像分类ResNet50模型在ROCm平台的精度验证，测试流程如下。
+# 测试流程
+## 加载环境变量
+下载tensorflow官方github中的[model](https://github.com/tensorflow/models)
+设置python变量：
+        export PYTHONPATH=$PYTHONPATH:/path/to/tensorflow/model
+ROCm平台使用MIOpen进行加速，以下变量设置可以参考使用：
+        export HSA_FORCE_FINE_GRAIN_PCIE=1
+        export MIOPEN_DEBUG_DISABLE_FIND_DB=3
+## 运行示例
+可以使用单卡或多卡运行，4卡运行指令如下：
+        cd official/resnet
+        python3 official/vision/image_classification/resnet/resnet_ctl_imagenet_main.py --data_dir=/path/to/{ImageNet-tensorflow_data_dir} --model_dir=/path/to/{model_save_dir} --batch_size=128 --num_gpus=4  --use_synthetic_data=false
+# 参考
+[https://github.com/tensorflow/models/tree/v2.7.0](https://github.com/tensorflow/models/tree/v2.7.0/official/vision/image_classification/resnet)
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/README_ori.md
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/README_ori.md
+## Welcome to the Model Garden for TensorFlow
+The TensorFlow Model Garden is a repository with a number of different
+implementations of state-of-the-art (SOTA) models and modeling solutions for
+TensorFlow users. We aim to demonstrate the best practices for modeling so that
+TensorFlow users can take full advantage of TensorFlow for their research and
+product development.
+To improve the transparency and reproducibility of our models, training logs on
+[TensorBoard.dev](https://tensorboard.dev) are also provided for models to the
+extent possible though not all models are suitable.
+| Directory | Description |
+|-----------|-------------|
+| [official](official) | • A collection of example implementations for SOTA models using the latest TensorFlow 2's high-level APIs<br />• Officially maintained, supported, and kept up to date with the latest TensorFlow 2 APIs by TensorFlow<br />• Reasonably optimized for fast performance while still being easy to read |
+| [research](research) | • A collection of research model implementations in TensorFlow 1 or 2 by researchers<br />• Maintained and supported by researchers |
+| [community](community) | • A curated list of the GitHub repositories with machine learning models and implementations powered by TensorFlow 2 |
+| [orbit](orbit) | • A flexible and lightweight library that users can easily use or fork when writing customized training loop code in TensorFlow 2.x. It seamlessly integrates with `tf.distribute` and supports running on different device types (CPU, GPU, and TPU). |
+## [Announcements](https://github.com/tensorflow/models/wiki/Announcements)
+## Contributions
+[![help wanted:paper implementation](https://img.shields.io/github/issues/tensorflow/models/help%20wanted%3Apaper%20implementation)](https://github.com/tensorflow/models/labels/help%20wanted%3Apaper%20implementation)
+If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).
+## License
+[Apache License 2.0](LICENSE)
+## Citing TensorFlow Model Garden
+If you use TensorFlow Model Garden in your research, please cite this repository.
+```
+@misc{tensorflowmodelgarden2020,
+  author = {Hongkun Yu and Chen Chen and Xianzhi Du and Yeqing Li and
+            Abdullah Rashwan and Le Hou and Pengchong Jin and Fan Yang and
+            Frederick Liu and Jaeyoun Kim and Jing Li},
+  title = {{TensorFlow Model Garden}},
+  howpublished = {\url{https://github.com/tensorflow/models}},
+  year = {2020}
+}
+```
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/community/README.md
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/community/README.md
+![Logo](https://storage.googleapis.com/tf_model_garden/tf_model_garden_logo.png)
+# TensorFlow Community Models
+This repository provides a curated list of the GitHub repositories with machine learning models and implementations powered by TensorFlow 2.
+**Note**: Contributing companies or individuals are responsible for maintaining their repositories.
+## Computer Vision
+### Image Recognition
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [DenseNet 169](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/densenet169) | [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993) | • FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [Inception V3](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/inceptionv3) | [Rethinking the Inception Architecture<br/>for Computer Vision](https://arxiv.org/pdf/1512.00567.pdf) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [Inception V4](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/inceptionv4) | [Inception-v4, Inception-ResNet and the Impact<br/>of Residual Connections on Learning](https://arxiv.org/pdf/1602.07261) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [MobileNet V1](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/mobilenet_v1) | [MobileNets: Efficient Convolutional Neural Networks<br/>for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [ResNet 101](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet101) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [ResNet 50](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet50) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [ResNet 50v1.5](https://github.com/IntelAI/models/tree/master/benchmarks/image_recognition/tensorflow/resnet50v1_5) | [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385) | • Int8 Inference<br/>• FP32 Inference<br/>• FP32 Training | [Intel](https://github.com/IntelAI) |
+| [EfficientNet](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Classification/ConvNets/efficientnet) | [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/pdf/1905.11946.pdf) | • Automatic mixed precision<br/>• Horovod Multi-GPU training (NCCL)<br/>• Multi-node training on a Pyxis/Enroot Slurm cluster<br/>• XLA | [NVIDIA](https://github.com/NVIDIA) |
+### Object Detection
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [R-FCN](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/rfcn) | [R-FCN: Object Detection<br/>via Region-based Fully Convolutional Networks](https://arxiv.org/pdf/1605.06409) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [SSD-MobileNet](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/ssd-mobilenet) | [MobileNets: Efficient Convolutional Neural Networks<br/>for Mobile Vision Applications](https://arxiv.org/pdf/1704.04861) | • Int8 Inference<br/>• FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [SSD-ResNet34](https://github.com/IntelAI/models/tree/master/benchmarks/object_detection/tensorflow/ssd-resnet34) | [SSD: Single Shot MultiBox Detector](https://arxiv.org/pdf/1512.02325) | • Int8 Inference<br/>• FP32 Inference<br/>• FP32 Training | [Intel](https://github.com/IntelAI) |
+### Segmentation
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [Mask R-CNN](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/MaskRCNN) | [Mask R-CNN](https://arxiv.org/abs/1703.06870) | • Automatic Mixed Precision<br/>• Multi-GPU training support with Horovod<br/>• TensorRT | [NVIDIA](https://github.com/NVIDIA) |
+| [U-Net Medical Image Segmentation](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Segmentation/UNet_Medical) | [U-Net: Convolutional Networks for Biomedical Image Segmentation](https://arxiv.org/abs/1505.04597) | • Automatic Mixed Precision<br/>• Multi-GPU training support with Horovod<br/>• TensorRT | [NVIDIA](https://github.com/NVIDIA) |
+## Natural Language Processing
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [BERT](https://github.com/IntelAI/models/tree/master/benchmarks/language_modeling/tensorflow/bert_large) | [BERT: Pre-training of Deep Bidirectional Transformers<br/>for Language Understanding](https://arxiv.org/pdf/1810.04805) | • FP32 Inference<br/>• FP32 Training | [Intel](https://github.com/IntelAI) |
+| [BERT](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/LanguageModeling/BERT) | [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/pdf/1810.04805) | • Horovod Multi-GPU<br/>• Multi-node with Horovod and Pyxis/Enroot Slurm cluster<br/>• XLA<br/>• Automatic mixed precision<br/>• LAMB | [NVIDIA](https://github.com/NVIDIA) |
+| [ELECTRA](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/LanguageModeling/ELECTRA) | [ELECTRA: Pre-training Text Encoders as Discriminators Rather Than Generators](https://openreview.net/forum?id=r1xMH1BtvB) | • Automatic Mixed Precision<br/>• Multi-GPU training support with Horovod<br/>• Multi-node training on a Pyxis/Enroot Slurm cluster | [NVIDIA](https://github.com/NVIDIA) |
+| [GNMT](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/mlperf_gnmt) | [Google’s Neural Machine Translation System:<br/>Bridging the Gap between Human and Machine Translation](https://arxiv.org/pdf/1609.08144) | • FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [Transformer-LT (Official)](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/transformer_lt_official) | [Attention Is All You Need](https://arxiv.org/pdf/1706.03762) | • FP32 Inference | [Intel](https://github.com/IntelAI) |
+| [Transformer-LT (MLPerf)](https://github.com/IntelAI/models/tree/master/benchmarks/language_translation/tensorflow/transformer_mlperf) | [Attention Is All You Need](https://arxiv.org/pdf/1706.03762) | • FP32 Training | [Intel](https://github.com/IntelAI) |
+## Recommendation Systems
+| Model | Paper | Features | Maintainer |
+|-------|-------|----------|------------|
+| [Wide & Deep](https://github.com/IntelAI/models/tree/master/benchmarks/recommendation/tensorflow/wide_deep_large_ds) | [Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792) | • FP32 Inference<br/>• FP32 Training | [Intel](https://github.com/IntelAI) |
+| [Wide & Deep](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/WideAndDeep) | [Wide & Deep Learning for Recommender Systems](https://arxiv.org/pdf/1606.07792) | • Automatic mixed precision<br/>• Multi-GPU training support with Horovod<br/>• XLA | [NVIDIA](https://github.com/NVIDIA) |
+| [DLRM](https://github.com/NVIDIA/DeepLearningExamples/tree/master/TensorFlow2/Recommendation/DLRM) | [Deep Learning Recommendation Model for Personalization and Recommendation Systems](https://arxiv.org/pdf/1906.00091.pdf) | • Automatic Mixed Precision<br/>• Hybrid-parallel multiGPU training using Horovod all2all<br/>• Multinode training for Pyxis/Enroot Slurm clusters<br/>• XLA<br/>• Criteo dataset preprocessing with Spark on GPU | [NVIDIA](https://github.com/NVIDIA) |
+## Contributions
+If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/LICENSE
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/LICENSE
+Copyright 2015 The TensorFlow Authors.  All rights reserved.
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2015, The TensorFlow Authors.
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/README-TPU.md
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/README-TPU.md
+# Offically Supported TensorFlow 2.1+ Models on Cloud TPU
+## Natural Language Processing
+*   [bert](nlp/bert): A powerful pre-trained language representation model:
+    BERT, which stands for Bidirectional Encoder Representations from
+    Transformers.
+    [BERT FineTuning with Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/bert-2.x) provides step by step instructions on Cloud TPU training. You can look [Bert MNLI Tensorboard.dev metrics](https://tensorboard.dev/experiment/LijZ1IrERxKALQfr76gndA) for MNLI fine tuning task.
+*   [transformer](nlp/transformer): A transformer model to translate the WMT
+    English to German dataset.
+        [Training transformer on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/transformer-2.x) for step by step instructions on Cloud TPU training.
+## Computer Vision
+*   [efficientnet](vision/image_classification): A family of convolutional
+    neural networks that scale by balancing network depth, width, and
+    resolution and can be used to classify ImageNet's dataset of 1000 classes.
+    See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/KnaWjrq5TXGfv0NW5m7rpg/#scalars).
+*   [mnist](vision/image_classification): A basic model to classify digits
+    from the MNIST dataset. See [Running MNIST on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/mnist-2.x) tutorial and [Tensorboard.dev metrics](https://tensorboard.dev/experiment/mIah5lppTASvrHqWrdr6NA).
+*   [mask-rcnn](vision/detection): An object detection and instance segmentation model. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/LH7k0fMsRwqUAcE09o9kPA).
+*   [resnet](vision/image_classification): A deep residual network that can
+    be used to classify ImageNet's dataset of 1000 classes.
+    See [Training ResNet on Cloud TPU](https://cloud.google.com/tpu/docs/tutorials/resnet-2.x) tutorial and [Tensorboard.dev metrics](https://tensorboard.dev/experiment/CxlDK8YMRrSpYEGtBRpOhg).
+*   [retinanet](vision/detection): A fast and powerful object detector. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/b8NRnWU3TqG6Rw0UxueU6Q).
+*   [shapemask](vision/detection): An object detection and instance segmentation model using shape priors. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/ZbXgVoc6Rf6mBRlPj0JpLA).
+## Recommendation
+*   [dlrm](recommendation/ranking): [Deep Learning Recommendation Model for
+Personalization and Recommendation Systems](https://arxiv.org/abs/1906.00091).
+*   [dcn v2](recommendation/ranking): [Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535).
+*   [ncf](recommendation): Neural Collaborative Filtering. See [Tensorboard.dev training metrics](https://tensorboard.dev/experiment/0k3gKjZlR1ewkVTRyLB6IQ).
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/README.md
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/README.md
+![Logo](https://storage.googleapis.com/model_garden_artifacts/TF_Model_Garden.png)
+# TensorFlow Official Models
+The TensorFlow official models are a collection of models
+that use TensorFlow’s high-level APIs.
+They are intended to be well-maintained, tested, and kept up to date
+with the latest TensorFlow API.
+They should also be reasonably optimized for fast performance while still
+being easy to read.
+These models are used as end-to-end tests, ensuring that the models run
+with the same or improved speed and performance with each new TensorFlow build.
+## More models to come!
+The team is actively developing new models.
+In the near future, we will add:
+* State-of-the-art language understanding models.
+* State-of-the-art image classification models.
+* State-of-the-art object detection and instance segmentation models.
+## Table of Contents
+- [Models and Implementations](#models-and-implementations)
+  * [Computer Vision](#computer-vision)
+    + [Image Classification](#image-classification)
+    + [Object Detection and Segmentation](#object-detection-and-segmentation)
+  * [Natural Language Processing](#natural-language-processing)
+  * [Recommendation](#recommendation)
+- [How to get started with the official models](#how-to-get-started-with-the-official-models)
+## Models and Implementations
+### Computer Vision
+#### Image Classification
+| Model | Reference (Paper) |
+|-------|-------------------|
+| [MNIST](vision/image_classification) | A basic model to classify digits from the [MNIST dataset](http://yann.lecun.com/exdb/mnist/) |
+| [ResNet](vision/beta/MODEL_GARDEN.md) | [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) |
+| [ResNet-RS](vision/beta/MODEL_GARDEN.md) | [Revisiting ResNets: Improved Training and Scaling Strategies](https://arxiv.org/abs/2103.07579) |
+| [EfficientNet](vision/image_classification) | [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) |
+#### Object Detection and Segmentation
+| Model | Reference (Paper) |
+|-------|-------------------|
+| [RetinaNet](vision/beta/MODEL_GARDEN.md) | [Focal Loss for Dense Object Detection](https://arxiv.org/abs/1708.02002) |
+| [Mask R-CNN](vision/beta/MODEL_GARDEN.md) | [Mask R-CNN](https://arxiv.org/abs/1703.06870) |
+| [ShapeMask](vision/detection) | [ShapeMask: Learning to Segment Novel Objects by Refining Shape Priors](https://arxiv.org/abs/1904.03239) |
+| [SpineNet](vision/beta/MODEL_GARDEN.md) | [SpineNet: Learning Scale-Permuted Backbone for Recognition and Localization](https://arxiv.org/abs/1912.05027) |
+| [Cascade RCNN-RS and RetinaNet-RS](vision/beta/MODEL_GARDEN.md) | [Simple Training Strategies and Model Scaling for Object Detection](https://arxiv.org/abs/2107.00057)|
+### Natural Language Processing
+| Model | Reference (Paper) |
+|-------|-------------------|
+| [ALBERT (A Lite BERT)](nlp/albert) | [ALBERT: A Lite BERT for Self-supervised Learning of Language Representations](https://arxiv.org/abs/1909.11942) |
+| [BERT (Bidirectional Encoder Representations from Transformers)](nlp/bert) | [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805) |
+| [NHNet (News Headline generation model)](projects/nhnet) | [Generating Representative Headlines for News Stories](https://arxiv.org/abs/2001.09386) |
+| [Transformer](nlp/transformer) | [Attention Is All You Need](https://arxiv.org/abs/1706.03762) |
+| [XLNet](nlp/xlnet) | [XLNet: Generalized Autoregressive Pretraining for Language Understanding](https://arxiv.org/abs/1906.08237) |
+| [MobileBERT](nlp/projects/mobilebert) | [MobileBERT: a Compact Task-Agnostic BERT for Resource-Limited Devices](https://arxiv.org/abs/2004.02984) |
+### Recommendation
+Model                            | Reference (Paper)
+-------------------------------- | -----------------
+[DLRM](recommendation/ranking)   | [Deep Learning Recommendation Model for Personalization and Recommendation Systems](https://arxiv.org/abs/1906.00091)
+[DCN v2](recommendation/ranking) | [Improved Deep & Cross Network and Practical Lessons for Web-scale Learning to Rank Systems](https://arxiv.org/abs/2008.13535)
+[NCF](recommendation)            | [Neural Collaborative Filtering](https://arxiv.org/abs/1708.05031)
+## How to get started with the official models
+* The models in the master branch are developed using TensorFlow 2,
+and they target the TensorFlow [nightly binaries](https://github.com/tensorflow/tensorflow#installation)
+built from the
+[master branch of TensorFlow](https://github.com/tensorflow/tensorflow/tree/master).
+* The stable versions targeting releases of TensorFlow are available
+as tagged branches or [downloadable releases](https://github.com/tensorflow/models/releases).
+* Model repository version numbers match the target TensorFlow release,
+such that
+[release v2.5.0](https://github.com/tensorflow/models/releases/tag/v2.5.0)
+are compatible with
+[TensorFlow v2.5.0](https://github.com/tensorflow/tensorflow/releases/tag/v2.5.0).
+Please follow the below steps before running models in this repository.
+### Requirements
+* The latest TensorFlow Model Garden release and TensorFlow 2
+  * If you are on a version of TensorFlow earlier than 2.2, please
+upgrade your TensorFlow to [the latest TensorFlow 2](https://www.tensorflow.org/install/).
+```shell
+pip3 install tf-nightly
+```
+* Python 3.7+
+Our integration tests run with Python 3.7. Although Python 3.6 should work, we
+don't recommend earlier versions.
+### Installation
+#### Method 1: Install the TensorFlow Model Garden pip package
+**tf-models-official** is the stable Model Garden package.
+pip will install all models and dependencies automatically.
+```shell
+pip install tf-models-official
+```
+If you are using nlp packages, please also install **tensorflow-text**:
+```shell
+pip install tensorflow-text
+```
+Please check out our [example](colab/fine_tuning_bert.ipynb)
+to learn how to use a PIP package.
+Note that **tf-models-official** may not include the latest changes in this
+github repo. To include latest changes, you may install **tf-models-nightly**,
+which is the nightly Model Garden package created daily automatically.
+```shell
+pip install tf-models-nightly
+```
+#### Method 2: Clone the source
+1. Clone the GitHub repository:
+```shell
+git clone https://github.com/tensorflow/models.git
+```
+2. Add the top-level ***/models*** folder to the Python path.
+```shell
+export PYTHONPATH=$PYTHONPATH:/path/to/models
+```
+If you are using a Colab notebook, please set the Python path with os.environ.
+```python
+import os
+os.environ['PYTHONPATH'] += ":/path/to/models"
+```
+3. Install other dependencies
+```shell
+pip3 install --user -r official/requirements.txt
+```
+Finally, if you are using nlp packages, please also install
+**tensorflow-text-nightly**:
+```shell
+pip3 install tensorflow-text-nightly
+```
+## Contributions
+If you want to contribute, please review the [contribution guidelines](https://github.com/tensorflow/models/wiki/How-to-contribute).
--- a/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/__init__.py
+++ b/TensorFlow2x/Accuracy_Validation/ResNet50_Official/official/__init__.py
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.