update TensorFlow2x test method

a32ffa95 · qianyj · e286da17 · e286da17 · e286da17 · e286da17
Commit a32ffa95 authored Feb 03, 2023 by qianyj
20 changed files
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Tests running benchmark_cnn in distributed mode.
-
-This is done by spawning one process per task. Each process runs
-benchmark_cnn_distributed_test_runner.py.
-
-The output for each process is written to disk and can be viewed to debug tests.
-See get_test_output_dir() in platforms/default/util.py for more info.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-from collections import namedtuple
-import os
-import subprocess
-import time
-import unittest
-
-from absl import flags as absl_flags
-import portpicker
-import six
-import tensorflow.compat.v1 as tf
-import flags
-import test_util
-from platforms import util as platforms_util
-
-FLAGS = absl_flags.FLAGS
-
-
-def _convert_params_to_flags_list(params):
-  """Converts Params to a list of flags. Skips default-valued parameters.
-
-  E.g., converts
-    benchmark_cnn.make_params(batch_size=32, model='resnet50')
-  to
-    ['--batch_size=32', '--model=resnet50']
-
-  Args:
-    params: Params for BenchmarkCNN.
-  Returns:
-    A list of flags.
-  """
-  return [
-      '--%s=%s' % (k, str(v)) for k, v in six.iteritems(params._asdict())
-      if v != flags.param_specs[k].default_value
-  ]
-
-
-# When outputting a process's output in the log, maximum number of characters
-# to output. The log system does not allow us to output more than this in a
-# single log message, but this limit is also useful to avoid the logs from
-# becoming too large (the full process output is written to disk).
-MAX_OUTPUT_CHARS = 15000
-
-
-# A process. name is a string identifying the process in logs. stdout and
-# stderr are file objects of the process's stdout and stderr, respectively.
-_ProcessInfo = namedtuple('_ProcessInfo', ['name', 'popen', 'stdout', 'stderr'])
-
-
-def _create_task_process(job_name, task_index, args, env, output_dir):
-  """Creates a process for a single task for benchmark_cnn.
-
-  Args:
-    job_name: 'worker' or 'ps' or ''. Empty string used for non-distributed
-      mode.
-    task_index: The index of the task within the cluster.
-    args: A list of arguments to pass to the task. This function additionally
-      sets --task_index and --job_name
-    env: The environment to use for the task.
-    output_dir: Where to place the output files, storing the task's stdout and
-      stderr.
-  Returns:
-    A _ProcessInfo namedtuple of the running process. The stdout and stderr
-    fields of this tuple must be closed by the caller once the process ends.
-  """
-  args = args[:]
-  args += ['--task_index=%s' % task_index, '--job_name=%s' % job_name]
-  name_prefix = job_name or 'local'
-  process_name = '%s_%s' % (name_prefix, task_index)
-  tf.logging.info('Spawning %s process: %s' % (process_name, ' '.join(args)))
-  stdout_filename = os.path.join(output_dir, '%s_stdout.txt' % process_name)
-  stderr_filename = os.path.join(output_dir, '%s_stderr.txt' % process_name)
-  stdout_file = open(stdout_filename, 'w+')
-  stderr_file = open(stderr_filename, 'w+')
-  popen = subprocess.Popen(
-      args, stdout=stdout_file, stderr=stderr_file, env=env)
-  return _ProcessInfo(process_name, popen, stdout_file, stderr_file)
-
-
-def _wait_for_processes(wait_processes, kill_processes):
-  """Waits until all `wait_processes` finish, then kills `kill_processes`.
-
-  Fails an assert if a process in `wait_processes` finishes unsuccessfully.
-  The processes in `kill_processes` are assumed to never finish so they are
-  killed.
-
-  Args:
-    wait_processes: A list of _ProcessInfo tuples. This function will wait
-      for each to finish.
-    kill_processes: A list of _ProcessInfo tuples. Each will be killed once
-      every process in `wait_processes` is finished.
-  Returns:
-    A list of strings, each which is a string of the stdout of a wait process.
-  """
-  wait_process_stdouts = [None] * len(wait_processes)
-  finished_wait_processes = set()
-  while len(finished_wait_processes) < len(wait_processes):
-    for i, wait_process in enumerate(wait_processes):
-      if i in finished_wait_processes:
-        continue
-      ret_code = wait_process.popen.poll()
-      if ret_code is None:
-        continue
-      tf.logging.info('{} finished'.format(wait_process.name))
-      wait_process.stdout.seek(0)
-      wait_process_stdouts[i] = wait_process.stdout.read()
-      tf.logging.info('stdout for {} (last {} chars): {}\n'.format(
-          wait_process.name, MAX_OUTPUT_CHARS,
-          wait_process_stdouts[i][-MAX_OUTPUT_CHARS:]))
-      wait_process.stderr.seek(0)
-      tf.logging.info('stderr for {} (last {} chars): {}\n'.format(
-          wait_process.name, MAX_OUTPUT_CHARS,
-          wait_process.stderr.read()[-MAX_OUTPUT_CHARS:]))
-      assert ret_code == 0, 'Process failed with return code %d' % ret_code
-      finished_wait_processes.add(i)
-    for kill_process in kill_processes:
-      ret_code = kill_process.popen.poll()
-      # kill processes should not end until we kill them.
-      assert ret_code is None, 'Process returned early with code %d' % ret_code
-    time.sleep(0.25)
-  tf.logging.info('All wait processes finished')
-  for i, kill_process in enumerate(kill_processes):
-    # Kill each kill process.
-    kill_process.popen.kill()
-    kill_process.popen.wait()
-    kill_process.stdout.seek(0)
-    tf.logging.info('stdout for {} (last {} chars): {}\n'.format(
-        kill_process.name, MAX_OUTPUT_CHARS,
-        kill_process.stdout.read()[-MAX_OUTPUT_CHARS:]))
-    kill_process.stderr.seek(0)
-    tf.logging.info('stderr for {} (last {} chars): {}\n'.format(
-        kill_process.name, MAX_OUTPUT_CHARS,
-        kill_process.stderr.read()[-MAX_OUTPUT_CHARS:]))
-  return wait_process_stdouts
-
-
-def _spawn_benchmark_processes(output_dir_path, num_workers, num_ps,
-                               num_controllers, params):
-  """Run training or evaluation in spawned processes.
-
-  Runs locally if num_workers == 1, num_ps == 0, and num_controllers == 0,
-  otherwise runs in distributed mode. In either case, one process is spawned
-  per worker and ps. Waits for training/evaluation to finish before returning.
-
-  Args:
-    output_dir_path: Relative path where stdout and stderr files will be
-      placed.
-    num_workers: Number of workers to spawn.
-    num_ps: Number of ps processes to spawn.
-    num_controllers: Number of controller processes to spawn (must be 0 or 1).
-    params: Params for BenchmarkCNN in each subprocess.
-  Returns:
-    A list output_list of outputs from all processes that output the
-    images/sec and accuracy. This process is the controller host in
-    distributed_all_reduce, and the workers otherwise. output_list[i] is a
-    list of lines from the ith worker's stdout.
-  """
-  run_distributed = num_workers != 1 or num_ps != 0 or num_controllers != 0
-  if params.variable_update == 'distributed_all_reduce':
-    assert num_controllers == 1 or not run_distributed
-    assert num_ps == 0
-  else:
-    assert num_controllers == 0
-  output_base_dir = platforms_util.get_test_output_dir()
-  output_dir = os.path.join(output_base_dir, output_dir_path)
-  os.makedirs(output_dir)
-  tf.logging.info('Outputs of processes will be outputted to: %s' % output_dir)
-
-  args = platforms_util.get_command_to_run_python_module(
-      'benchmark_cnn_distributed_test_runner')
-  args += _convert_params_to_flags_list(params)
-  if run_distributed:
-    worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
-    ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
-    controller_ports = [portpicker.pick_unused_port()
-                        for _ in range(num_controllers)]
-    # The numerator is 0.7 instead of 1 to leave some memory for the Cuda
-    # runtime, etc.
-    gpu_memory_frac = 0.7 / num_workers
-    args += [
-        '--gpu_memory_frac_for_testing=%f' % gpu_memory_frac,
-        '--worker_hosts=' + ','.join('localhost:%d' % p for p in worker_ports)
-    ]
-    if num_ps > 0:
-      ps_hosts_str = ','.join('localhost:%d' % p for p in ps_ports)
-      args.append('--ps_hosts=' + ps_hosts_str)
-    else:
-      controller_host_str = ','.join('localhost:%d' % p
-                                     for p in controller_ports)
-      args.append('--controller_host=' + controller_host_str)
-  env = os.environ.copy()
-  # Allow stdout to be viewed before the process ends.
-  env['PYTHONUNBUFFERED'] = '1'
-
-  worker_processes = []
-  ps_processes = []
-  controller_processes = []
-  try:
-    for i in range(num_workers):
-      job_name = 'worker' if run_distributed else ''
-      process = _create_task_process(job_name, i, args, env, output_dir)
-      worker_processes.append(process)
-    # Don't let ps or controller processes use the gpu.
-    env['CUDA_VISIBLE_DEVICES'] = ''
-
-    for i in range(num_ps):
-      process = _create_task_process('ps', i, args, env, output_dir)
-      ps_processes.append(process)
-    for i in range(num_controllers):
-      process = _create_task_process('controller', i, args, env, output_dir)
-      controller_processes.append(process)
-    # If all distributed all reduce mode is being used, the controller process
-    # finishes and the worker processes block forever. Otherwise, the worker
-    # processes finish and the ps processes block forever. We set
-    # wait_processes and kill_processes accordingly.
-    if controller_processes:
-      wait_processes = controller_processes
-      kill_processes = worker_processes
-    else:
-      wait_processes = worker_processes
-      kill_processes = ps_processes
-    outputs = _wait_for_processes(wait_processes, kill_processes)
-  finally:
-    for process in worker_processes + ps_processes + controller_processes:
-      try:
-        process.popen.kill()
-      except OSError:
-        pass  # It's OK (and expected) if the process already exited.
-      process.stdout.close()
-      process.stderr.close()
-  return [output.splitlines() for output in outputs]
-
-
-# When this test class is run, a method will fail about 0.3% of the time with a
-# gRPC error. It is not clear why this occurs.
-# TODO(reedwm): Fix this test class.
-class TfCnnBenchmarksDistributedTest(tf.test.TestCase):
-  """Tests running benchmark_cnn in distributed mode."""
-
-  # We cannot check for a GPU via tf.test.is_gpu_available() before the tests in
-  # this class because it allocates all the GPU memory which would cause the
-  # spawned processes to run out of GPU memory.
-
-  def _test_distributed(self,
-                        test_name,
-                        num_workers,
-                        num_ps,
-                        params,
-                        num_controllers=0,
-                        check_output_values=False,
-                        skip=None):
-    # TODO(reedwm): check_output_values should default to True and be enabled
-    # on every test. See the TODO in benchmark_cnn_test.py.
-    def run_fn(run_type, inner_params):
-      output_dir_path = os.path.join(test_name, run_type)
-      if run_type == 'Evaluation':
-        # Distributed evaluation is not supported, so we use a single process.
-        # We still must spawn another process, because if we evaluate in the
-        # current process, it would allocate the GPU memory causing future test
-        # methods to fail.
-        if inner_params.variable_update == 'distributed_replicated':
-          inner_params = inner_params._replace(variable_update='replicated')
-        return _spawn_benchmark_processes(
-            output_dir_path, num_workers=1, num_ps=0, num_controllers=0,
-            params=inner_params)
-      else:
-        return _spawn_benchmark_processes(output_dir_path, num_workers, num_ps,
-                                          num_controllers, inner_params)
-
-    return test_util.train_and_eval(self, run_fn, params,
-                                    check_output_values=check_output_values,
-                                    skip=skip)
-
-  def testParameterServer(self):
-    test_name = 'testParameterServer'
-    params = test_util.get_params(test_name)
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testParameterServerStaged(self):
-    test_name = 'testParameterServerStaged'
-    params = test_util.get_params(test_name)._replace(staged_vars=True)
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testReplicated(self):
-    test_name = 'testReplicated'
-    params = test_util.get_params(test_name)._replace(
-        variable_update='distributed_replicated')
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testAllReducePsgpu(self):
-    test_name = 'testAllReducePsgpu'
-    flags_dict = test_util.get_params(test_name)._replace(
-        variable_update='distributed_all_reduce',
-        all_reduce_spec='psgpu#4')
-    self._test_distributed(test_name, 2, 0, flags_dict, num_controllers=1)
-
-  def testAllReducePscpuXring(self):
-    test_name = 'testAllReducePscpuXring'
-    flags_dict = test_util.get_params(test_name)._replace(
-        variable_update='distributed_all_reduce',
-        all_reduce_spec='pscpu:2k:xring')
-    self._test_distributed(test_name, 2, 0, flags_dict, num_controllers=1)
-
-  def testForwardOnly(self):
-    test_name = 'testForwardOnly'
-    params = test_util.get_params(test_name)._replace(forward_only=True)
-    # Evaluation is not supported with --forward_only, so we set skip='eval'.
-    self._test_distributed(test_name, 2, 2, params, skip='eval')
-
-  def testSingleWorkerAndPs(self):
-    test_name = 'testSingleWorkerAndPs'
-    params = test_util.get_params(test_name)
-    self._test_distributed(test_name, 1, 1, params)
-
-  def testThreeWorkersAndPses(self):
-    test_name = 'testThreeWorkersAndPses'
-    params = test_util.get_params(test_name)
-    self._test_distributed(test_name, 3, 3, params)
-
-  def testOneWorkerThreePses(self):
-    test_name = 'testOneWorkerThreePses'
-    params = test_util.get_params(test_name)
-    self._test_distributed(test_name, 1, 3, params)
-
-  def testThreeWorkersOnePs(self):
-    test_name = 'testThreeWorkersOnePs'
-    params = test_util.get_params(test_name)
-    self._test_distributed(test_name, 3, 1, params)
-
-  def testNoPrintTrainingAccuracy(self):
-    test_name = 'testNoPrintTrainingAccuracy'
-    params = test_util.get_params(test_name)._replace(
-        print_training_accuracy=False)
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testRmspropParameterServer(self):
-    test_name = 'testRmspropParameterServer'
-    params = test_util.get_params(test_name)._replace(optimizer='rmsprop')
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testMomentumReplicated(self):
-    test_name = 'testMomentumReplicated'
-    params = test_util.get_params(test_name)._replace(
-        optimizer='momentum', variable_update='distributed_replicated')
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testNoCrossReplicaSyncParameterServerStaged(self):
-    test_name = 'testNoCrossReplicaSyncParameterServerStaged'
-    params = test_util.get_params(test_name)._replace(
-        staged_vars=True, cross_replica_sync=False)
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testSingleGpu(self):
-    test_name = 'testSingleGpu'
-    params = test_util.get_params(test_name)._replace(num_gpus=1)
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testBatchGroupSize(self):
-    test_name = 'testBatchGroupSize'
-    params = test_util.get_params(test_name)._replace(
-        batch_group_size=4, num_batches=100, num_warmup_batches=5)
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testFp16WithFp32Vars(self):
-    test_name = 'testFp16WithFp32Vars'
-    params = test_util.get_params(test_name)._replace(
-        use_fp16=True, fp16_vars=False)
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testFp16WithFp16Vars(self):
-    test_name = 'testFp16WithFp16Vars'
-    params = test_util.get_params(test_name)._replace(
-        use_fp16=True, fp16_vars=True, fp16_loss_scale=1.)
-    self._test_distributed(test_name, 2, 2, params)
-
-  def testFp16Replicated(self):
-    test_name = 'testFp16Replicated'
-    params = test_util.get_params(test_name)._replace(
-        use_fp16=True, variable_update='distributed_replicated')
-    self._test_distributed(test_name, 2, 2, params)
-
-  @unittest.skip('b/147310862: Fails for unknown reason')
-  def testReplicatedRealData(self):
-    test_name = 'testReplicatedRealData'
-    imagenet_dir = os.path.join(platforms_util.get_test_data_dir(),
-                                'fake_tf_record_data')
-    params = test_util.get_params(test_name)._replace(
-        variable_update='distributed_replicated',
-        data_dir=imagenet_dir,
-        data_name='imagenet')
-    self._test_distributed(test_name, 2, 2, params)
-
-
-class DistributedVariableUpdateTest(tf.test.TestCase):
-  """Tests that variables are updated correctly in distributed mode."""
-
-  def _test_variable_update(self,
-                            test_name,
-                            num_workers,
-                            num_ps,
-                            params,
-                            num_controllers=0):
-    """Tests variables are updated correctly when the given params are used."""
-    output_dir_path = os.path.join(test_name, 'variable_update')
-    logs = _spawn_benchmark_processes(output_dir_path, num_workers, num_ps,
-                                      num_controllers, params)
-    actual_losses = []
-    for worker_logs in logs:
-      outputs = test_util.get_training_outputs_from_logs(
-          worker_logs, params.print_training_accuracy)
-      actual_losses.append([x.loss for x in outputs])
-
-    inputs = test_util.get_fake_var_update_inputs()
-    expected_losses = test_util.TestCNNModel().manually_compute_losses(
-        inputs, num_workers, params)
-    if params.variable_update == 'distributed_all_reduce':
-      # In distributed all reduce, each step, the controller outputs the average
-      # of the loss from each worker. So we modify expected losses accordingly.
-      # E.g, we change [[1, 2], [4, 5]] to [[2.5, 3.5]]
-      expected_losses = [[sum(losses) / num_workers
-                          for losses in zip(*expected_losses)]]
-    rtol = 3e-2 if params.use_fp16 else 1e-5
-    for worker_actual_losses, worker_expected_losses in zip(actual_losses,
-                                                            expected_losses):
-      self.assertAllClose(worker_actual_losses[:len(worker_expected_losses)],
-                          worker_expected_losses, rtol=rtol, atol=0.)
-
-  def _test_variable_updates(self, test_name, params):
-    """Tests variables are updated correctly with various variable updates."""
-
-    # Unfortunately, distributed parameter server is non-deterministic with
-    # multiple workers, because one worker may write to a variable before
-    # another worker reads it. This probably does not harm training, but it
-    # does mean we cannot easily test that case. So, we use one worker.
-    self._test_variable_update(
-        test_name + '_ps', num_workers=1, num_ps=2, num_controllers=0,
-        params=params._replace(variable_update='parameter_server'))
-
-    self._test_variable_update(
-        test_name + '_rep', num_workers=2, num_ps=1, num_controllers=0,
-        params=params._replace(variable_update='distributed_replicated'))
-
-    self._test_variable_update(
-        test_name + '_allreduce', num_workers=2, num_ps=0, num_controllers=1,
-        params=params._replace(variable_update='distributed_all_reduce',
-                               all_reduce_spec='psgpu#%d' % params.num_gpus))
-
-  def testVarUpdateDefault(self):
-    params = test_util.get_var_update_params()
-    self._test_variable_updates('testVarUpdateDefault', params)
-
-  def testVarUpdateCpuAsLocalParamDevice(self):
-    params = test_util.get_var_update_params()._replace(
-        local_parameter_device='cpu')
-    self._test_variable_updates('testVarUpdateCpuAsLocalParamDevice', params)
-
-  def testVarUpdateFp16(self):
-    params = test_util.get_var_update_params()._replace(use_fp16=True)
-    self._test_variable_updates('testVarUpdateFp16', params)
-
-  def testVarUpdateResourceVars(self):
-    params = test_util.get_var_update_params()._replace(use_resource_vars=True)
-    self._test_variable_updates('testVarUpdateResourceVars', params)
-
-
-if __name__ == '__main__':
-  tf.disable_v2_behavior()
-  tf.test.main()
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test_runner.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_distributed_test_runner.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Used to run benchmark_cnn for distributed tests.
-
-In distributed tests, we spawn processes to run tf_cnn_benchmark tasks. We could
-directly spawn tf_cnn_benchmark processes, but we want some added functionality,
-such as being able to inject custom images during training. So instead, this
-file is spawned as a Python process, which supports the added functionality.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl import flags as absl_flags
-import numpy as np
-import tensorflow.compat.v1 as tf
-import benchmark_cnn
-import flags
-import preprocessing
-import test_util
-
-
-absl_flags.DEFINE_string('fake_input', 'none',
-                         """What fake input to inject into benchmark_cnn. This
-                            is ignored if --model=test_model.
-                            Options are:
-                            none: Do not inject any fake input.
-                            zeros_and_ones: Half the images will be all 0s with
-                            a label of 0. Half the images will be all 1s with a
-                            label of 1.""")
-
-flags.define_flags()
-FLAGS = flags.FLAGS
-
-
-def get_test_image_preprocessor(batch_size, params):
-  """Returns the preprocessing.TestImagePreprocessor that should be injected.
-
-  Returns None if no preprocessor should be injected.
-
-  Args:
-    batch_size: The batch size across all GPUs.
-    params: BenchmarkCNN's parameters.
-  Returns:
-    Returns the preprocessing.TestImagePreprocessor that should be injected.
-  Raises:
-    ValueError: Flag --fake_input is an invalid value.
-  """
-  if FLAGS.fake_input == 'none':
-    return None
-  elif FLAGS.fake_input == 'zeros_and_ones':
-    half_batch_size = batch_size // 2
-    images = np.zeros((batch_size, 227, 227, 3), dtype=np.float32)
-    images[half_batch_size:, :, :, :] = 1
-    labels = np.array([0] * half_batch_size + [1] * half_batch_size,
-                      dtype=np.int32)
-    preprocessor = preprocessing.TestImagePreprocessor(
-        batch_size, [227, 227, 3], params.num_gpus,
-        benchmark_cnn.get_data_type(params))
-    preprocessor.set_fake_data(images, labels)
-    preprocessor.expected_subset = 'validation' if params.eval else 'train'
-    return preprocessor
-  else:
-    raise ValueError('Invalid --fake_input: %s' % FLAGS.fake_input)
-
-
-def run_with_real_model(params):
-  """Runs tf_cnn_benchmarks with a real model."""
-  bench = benchmark_cnn.BenchmarkCNN(params)
-  bench.print_info()
-  preprocessor = get_test_image_preprocessor(bench.batch_size, params)
-  if preprocessor is not None:
-    # The test image preprocessor requires queue runners. Since this file is
-    # used for testing, it is OK to access protected members.
-    # pylint: disable=protected-access
-    bench.dataset._queue_runner_required = True
-    # pylint: enable=protected-access
-    bench.input_preprocessor = preprocessor
-  bench.run()
-
-
-def run_with_test_model(params):
-  """Runs tf_cnn_benchmarks with a test model."""
-  model = test_util.TestCNNModel()
-  inputs = test_util.get_fake_var_update_inputs()
-  with test_util.monkey_patch(benchmark_cnn,
-                              LOSS_AND_ACCURACY_DIGITS_TO_SHOW=15):
-    bench = benchmark_cnn.BenchmarkCNN(params, dataset=test_util.TestDataSet(),
-                                       model=model)
-    # The test model does not use labels when computing loss, so the label
-    # values do not matter as long as it's the right shape.
-    labels = np.array([1] * inputs.shape[0])
-    bench.input_preprocessor.set_fake_data(inputs, labels)
-    bench.run()
-
-
-def main(_):
-  params = benchmark_cnn.make_params_from_flags()
-  params = benchmark_cnn.setup(params)
-  if params.model == 'test_model':
-    run_with_test_model(params)
-  else:
-    run_with_real_model(params)
-
-
-if __name__ == '__main__':
-  tf.disable_v2_behavior()
-  tf.app.run()
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for benchmark_cnn."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-import glob
-import os
-import re
-import unittest
-
-import mock
-import numpy as np
-import tensorflow.compat.v1 as tf
-from google.protobuf import text_format
-from tensorflow.core.framework import step_stats_pb2
-from tensorflow.core.profiler import tfprof_log_pb2
-from tensorflow.python.platform import test
-import benchmark_cnn
-import datasets
-import flags
-import preprocessing
-import test_util
-import variable_mgr_util
-from platforms import util as platforms_util
-
-
-def _check_has_gpu():
-  if not test.is_gpu_available(cuda_only=True):
-    raise ValueError(
-        """You have asked to run part or all of this on GPU, but it appears
-        that no GPU is available. If your machine has GPUs it is possible you
-        do not have a version of TensorFlow with GPU support. To build with GPU
-        support, add --config=cuda to the build flags.\n """)
-
-
-class TfCnnBenchmarksModelTest(tf.test.TestCase):
-  """Tests which are run with multiple models."""
-
-  def setUp(self):
-    super(TfCnnBenchmarksModelTest, self).setUp()
-    benchmark_cnn.setup(benchmark_cnn.make_params())
-
-  def get_model_name(self):
-    return None
-
-  # Return true to run tests that don't need to be run on every model.
-  # This should be done for one or two cheap models.
-  def extended_tests(self):
-    return False
-
-  # Return false to suppress actually running the model; this is useful
-  # for tests that are large.
-  def model_execution_test(self):
-    return False
-
-  # Return false to suppress actually saving and loading the model.
-  def model_save_load_test(self):
-    return False
-
-  def testSaveLoadModel(self):
-    _check_has_gpu()
-    if not self.get_model_name() or not self.model_save_load_test():
-      return
-
-    params = benchmark_cnn.make_params(
-        model=self.get_model_name(),
-        num_batches=1,
-        num_intra_threads=0,
-        num_inter_threads=0,
-        distortions=False,
-        batch_size=2,
-        variable_update='replicated',
-        num_warmup_batches=0,
-        num_gpus=2,
-        train_dir=test_util.get_temp_dir('testSaveLoadModel_' +
-                                         self.get_model_name()))
-
-    # Run one batch and save the model.
-    # Note that this uses a non-test session.
-    bench = benchmark_cnn.BenchmarkCNN(params)
-    bench.run()
-    self.assertEqual(bench.init_global_step, 0)
-    # Clear the default graph.
-    tf.reset_default_graph()
-    # Test if checkpoint had been saved.
-    ckpt = tf.train.get_checkpoint_state(params.train_dir)
-    match = re.match(os.path.join(params.train_dir, r'model.ckpt-(\d+).index'),
-                     ckpt.model_checkpoint_path + '.index')
-    self.assertTrue(match)
-    self.assertGreaterEqual(int(match.group(1)), params.num_batches)
-    params = params._replace(num_batches=2)
-    # Reload the model
-    bench = benchmark_cnn.BenchmarkCNN(params)
-    bench.run()
-    # Check if global step has been restored.
-    self.assertNotEqual(bench.init_global_step, 0)
-    ckpt = tf.train.get_checkpoint_state(params.train_dir)
-    match = re.match(os.path.join(params.train_dir, r'model.ckpt-(\d+).index'),
-                     ckpt.model_checkpoint_path + '.index')
-    self.assertTrue(match)
-    self.assertGreaterEqual(int(match.group(1)), params.num_batches)
-    # Check that the batch norm moving averages are restored from checkpoints
-    with tf.Graph().as_default():
-      bench = benchmark_cnn.BenchmarkCNN(params)
-      bench._build_model()
-      saver = tf.train.Saver(bench.variable_mgr.savable_variables())
-      with tf.Session(config=benchmark_cnn.create_config_proto(params)) as sess:
-        benchmark_cnn.load_checkpoint(saver, sess, params.train_dir)
-        sess.run(bench.variable_mgr.get_post_init_ops())
-        bn_moving_vars = [
-            v for v in tf.global_variables()
-            if '/batchnorm' in v.name and '/moving' in v.name
-        ]
-        self.assertGreater(len(bn_moving_vars), 0)
-        for moving_var in bn_moving_vars:
-          moving_var_value = sess.run(moving_var)
-          # Check that the moving means and moving variances have been restored
-          # by asserting they are not their default values of 0 and 1,
-          # respectively
-          if '/moving_mean' in moving_var.name:
-            self.assertFalse(np.array_equal(moving_var_value,
-                                            np.zeros(moving_var_value.shape,
-                                                     moving_var_value.dtype)))
-          else:
-            self.assertIn('/moving_variance', moving_var.name)
-            self.assertFalse(np.array_equal(moving_var_value,
-                                            np.ones(moving_var_value.shape,
-                                                    moving_var_value.dtype)))
-
-  def testModel(self):
-    _check_has_gpu()
-    if not self.get_model_name() or not self.model_execution_test():
-      return
-
-    params = benchmark_cnn.make_params(
-        model=self.get_model_name(),
-        num_batches=1,
-        num_intra_threads=1,
-        num_inter_threads=12,
-        batch_size=2,
-        distortions=False)
-
-    # Run this one; note that this uses a non-test session.
-    bench = benchmark_cnn.BenchmarkCNN(params)
-    bench.run()
-
-  def testSendRecvVariables(self):
-    self._testVariables('parameter_server')
-    if self.extended_tests():
-      self._testVariables('parameter_server', local_parameter_device='CPU')
-      self._testVariables('parameter_server', optimizer='sgd')
-
-  def testReplicatedVariables(self):
-    self._testVariables('replicated')
-    if self.extended_tests():
-      self._testVariables('replicated', all_reduce_spec=None)
-      self._testVariables('replicated', use_fp16=True, fp16_vars=False)
-      self._testVariables(
-          'replicated',
-          all_reduce_spec=None,
-          use_fp16=True,
-          fp16_vars=False,
-          fp16_enable_auto_loss_scale=True,
-          fp16_inc_loss_scale_every_n=4)
-
-  def testIndependentVariables(self):
-    self._testVariables('independent')
-    self._testVariables(
-        'independent',
-        all_reduce_spec=None,
-        use_fp16=True,
-        fp16_vars=False,
-        fp16_enable_auto_loss_scale=True,
-        fp16_inc_loss_scale_every_n=4)
-
-  def testSummaryVerbosity(self):
-    self._testVariables('parameter_server', summary_verbosity=1)
-    if self.extended_tests():
-      self._testVariables('parameter_server', summary_verbosity=2)
-      self._testVariables('parameter_server', summary_verbosity=3)
-
-  def testStagedVariables(self):
-    self._testVariables('parameter_server', staged_vars=True)
-    if self.extended_tests():
-      self._testVariables('parameter_server', staged_vars=True,
-                          local_parameter_device='CPU')
-      self._testVariables('parameter_server', staged_vars=True, use_fp16=True,
-                          fp16_vars=True)
-
-  def _assert_correct_var_type(self, var, params):
-    if 'gpu_cached_inputs' not in var.name:
-      if params.use_fp16 and params.fp16_vars and 'batchnorm' not in var.name:
-        expected_type = tf.float16
-      else:
-        expected_type = tf.float32
-      self.assertEqual(var.dtype.base_dtype, expected_type)
-
-  def _testVariables(self,
-                     variable_update,
-                     summary_verbosity=0,
-                     local_parameter_device='GPU',
-                     staged_vars=False,
-                     optimizer='momentum',
-                     # TODO(b/80125832): Enable nccl in tests
-                     # all_reduce_spec='nccl',
-                     all_reduce_spec='',
-                     use_fp16=False,
-                     fp16_vars=False,
-                     fp16_enable_auto_loss_scale=False,
-                     fp16_inc_loss_scale_every_n=10):
-    if not self.get_model_name():
-      return
-    _check_has_gpu()
-
-    params = benchmark_cnn.make_params(
-        model=self.get_model_name(),
-        num_batches=1,
-        num_intra_threads=1,
-        num_inter_threads=12,
-        distortions=False,
-        variable_update=variable_update,
-        local_parameter_device=local_parameter_device,
-        num_gpus=2,
-        summary_verbosity=summary_verbosity,
-        staged_vars=staged_vars,
-        optimizer=optimizer,
-        all_reduce_spec=all_reduce_spec,
-        compact_gradient_transfer=False if all_reduce_spec == 'nccl' else True,
-        use_fp16=use_fp16,
-        fp16_loss_scale=2.,
-        fp16_vars=fp16_vars,
-        fp16_enable_auto_loss_scale=fp16_enable_auto_loss_scale,
-        fp16_inc_loss_scale_every_n=fp16_inc_loss_scale_every_n,
-    )
-
-    # Test building models using multiple GPUs, but don't
-    # run them.
-    with self.test_session(graph=tf.Graph()):
-      bench = benchmark_cnn.BenchmarkCNN(params)
-      bench._build_model()
-
-      # Rough validation of variable type and placement, depending on mode.
-      all_vars = tf.global_variables() + tf.local_variables()
-      if params.variable_update == 'parameter_server':
-        for v in all_vars:
-          tf.logging.debug('var: %s' % v.name)
-          match = re.match(r'tower_(\d+)/v/gpu_cached_inputs:0', v.name)
-          if match:
-            self.assertEqual(v.device, '/device:GPU:%s' % match.group(1))
-          elif v.name.startswith('v/'):
-            self.assertEqual(v.device, '/device:%s:0' % local_parameter_device)
-            self._assert_correct_var_type(v, params)
-          elif v.name in ('input_processing/images:0',
-                          'input_processing/labels:0', 'init_learning_rate:0',
-                          'global_step:0', 'loss_scale:0',
-                          'loss_scale_normal_steps:0'):
-            self.assertEqual(v.device, '/device:CPU:0')
-          else:
-            raise ValueError('Unexpected variable %s' % v.name)
-      else:
-        v0_count = 0
-        v1_count = 0
-        for v in all_vars:
-          if v.name.startswith('tower_0/v0/'):
-            self.assertEqual(v.name, 'tower_0/v0/gpu_cached_inputs:0')
-            self.assertEqual(v.device, '/device:GPU:0')
-          elif v.name.startswith('tower_1/v1/'):
-            self.assertEqual(v.name, 'tower_1/v1/gpu_cached_inputs:0')
-            self.assertEqual(v.device, '/device:GPU:1')
-          elif v.name.startswith('v0/'):
-            v0_count += 1
-            self.assertEqual(v.device, '/device:GPU:0')
-            self._assert_correct_var_type(v, params)
-          elif v.name.startswith('v1/'):
-            v1_count += 1
-            self.assertEqual(v.device, '/device:GPU:1')
-            self._assert_correct_var_type(v, params)
-          elif v.name in ('input_processing/images:0',
-                          'input_processing/labels:0', 'init_learning_rate:0',
-                          'global_step:0', 'loss_scale:0',
-                          'loss_scale_normal_steps:0'):
-            self.assertEqual(v.device, '/device:CPU:0')
-          else:
-            raise ValueError('Unexpected variable %s' % v.name)
-        self.assertEqual(v0_count, v1_count)
-
-      # Validate summary ops in the model depending on verbosity level
-      summary_ops = tf.get_collection(tf.GraphKeys.SUMMARIES)
-      num_summary_ops = len(summary_ops)
-      self.assertEqual(num_summary_ops > 0, summary_verbosity > 0)
-      if summary_verbosity > 0:
-        has_affine_histogram = False
-        has_gradient_histogram = False
-        has_log_gradients_histogram = False
-        for op in summary_ops:
-          if '/gradients' in op.name:
-            has_gradient_histogram = True
-          elif '/affine' in op.name:
-            has_affine_histogram = True
-          elif 'log_gradients' in op.name:
-            has_log_gradients_histogram = True
-        self.assertEqual(summary_verbosity >= 3, has_affine_histogram)
-        self.assertEqual(summary_verbosity >= 3, has_gradient_histogram)
-        self.assertEqual(summary_verbosity >= 2, has_log_gradients_histogram)
-        if summary_verbosity == 1:
-          self.assertLess(num_summary_ops, 10)
-
-
-class TrivialModelTest(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'trivial'
-
-
-class TestVgg1Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'vgg11'
-
-
-class TestVgg19Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'vgg19'
-
-
-class TestLenet5Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'lenet'
-
-
-class TestGooglenetModel(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'googlenet'
-
-
-class TestOverfeatModel(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'overfeat'
-
-
-class TestAlexnetModel(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'alexnet'
-
-  def extended_tests(self):
-    return True
-
-
-class TestTrivialModel(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'trivial'
-
-
-class TestInceptionv3Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'inception3'
-
-  def extended_tests(self):
-    return True
-
-
-class TestInceptionv4Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'inception4'
-
-
-class TestResnet50Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'resnet50'
-
-  def model_save_load_test(self):
-    return True
-
-
-class TestResnet101Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'resnet101'
-
-
-class TestResnet152Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'resnet152'
-
-
-class TestResnet50V2Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'resnet50_v2'
-
-
-class TestResnet101V2Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'resnet101_v2'
-
-
-class TestResnet152V2Model(TfCnnBenchmarksModelTest):
-
-  def get_model_name(self):
-    return 'resnet152_v2'
-
-
-class TfCnnBenchmarksTest(tf.test.TestCase):
-  """Tests that benchmark_cnn runs correctly."""
-
-  def setUp(self):
-    super(TfCnnBenchmarksTest, self).setUp()
-    _check_has_gpu()
-    benchmark_cnn.setup(benchmark_cnn.make_params())
-
-  def _run_benchmark_cnn(self, params):
-    logs = []
-    benchmark_cnn.log_fn = test_util.print_and_add_to_list(logs)
-    benchmark_cnn.BenchmarkCNN(params).run()
-    return logs
-
-  def _run_benchmark_cnn_with_fake_images(self, params, images, labels):
-    logs = []
-    benchmark_cnn.log_fn = test_util.print_and_add_to_list(logs)
-    bench = benchmark_cnn.BenchmarkCNN(params)
-    bench.input_preprocessor = preprocessing.TestImagePreprocessor(
-        params.batch_size * params.num_gpus,
-        [[params.batch_size, 227, 227, 3], [params.batch_size]],
-        params.num_gpus,
-        bench.model.data_type)
-    bench.dataset._queue_runner_required = True
-    bench.input_preprocessor.set_fake_data(images, labels)
-    bench.input_preprocessor.expected_subset = ('validation'
-                                                if params.eval else 'train')
-    bench.run()
-    return logs
-
-  def _run_benchmark_cnn_with_black_and_white_images(self, params):
-    """Runs BenchmarkCNN with black and white images.
-
-    A BenchmarkCNN is created and run with black and white images as input. Half
-    the images are black (i.e., filled with 0s) and half are white (i.e., filled
-    with 255s).
-
-    Args:
-      params: Params for BenchmarkCNN.
-
-    Returns:
-      A list of lines from the output of BenchmarkCNN.
-    """
-    # TODO(reedwm): Instead of generating images here, use black and white
-    # tfrecords by calling test_util.create_black_and_white_images().
-    effective_batch_size = params.batch_size * params.num_gpus
-    half_batch_size = effective_batch_size // 2
-    images = np.zeros((effective_batch_size, 227, 227, 3), dtype=np.float32)
-    images[half_batch_size:, :, :, :] = 255
-    labels = np.array([0] * half_batch_size + [1] * half_batch_size,
-                      dtype=np.int32)
-    return self._run_benchmark_cnn_with_fake_images(params, images, labels)
-
-  def _train_and_eval_local(self,
-                            params,
-                            check_output_values=False,
-                            max_final_loss=10.,
-                            skip=None,
-                            use_test_preprocessor=True):
-    # TODO(reedwm): check_output_values should default to True and be enabled
-    # on every test. Currently, if check_output_values=True and the calls to
-    # tf.set_random_seed(...) and np.seed(...) are passed certain seed values in
-    # benchmark_cnn.py, then most tests will fail. This indicates the tests
-    # are brittle and could fail with small changes when
-    # check_output_values=True, so check_output_values defaults to False for
-    # now.
-
-    def run_fn(run_type, inner_params):
-      del run_type
-      if use_test_preprocessor:
-        return [
-            self._run_benchmark_cnn_with_black_and_white_images(inner_params)
-        ]
-      else:
-        return [self._run_benchmark_cnn(inner_params)]
-
-    return test_util.train_and_eval(self, run_fn, params,
-                                    check_output_values=check_output_values,
-                                    max_final_loss=max_final_loss,
-                                    skip=skip)
-
-  def testAlexnet(self):
-    params = test_util.get_params('testAlexnet')._replace(
-        num_batches=30, init_learning_rate=0.01, model='alexnet')
-    self._train_and_eval_local(params)
-
-  def testNoPrintAccuracy(self):
-    params = test_util.get_params('testNoPrintAccuracy')._replace(
-        print_training_accuracy=False)
-    self._train_and_eval_local(params)
-
-  def testLowAccuracy(self):
-    params = test_util.get_params('testLowAccuracy')._replace(
-        print_training_accuracy=True, batch_size=5, num_batches=10)
-    # We force low accuracy by having each batch containing 10 identical images,
-    # each with a different label. This guarantees a top-1 accuracy of exactly
-    # 0.1 and a top-5 accuracy of exactly 0.5.
-    images = np.zeros((10, 227, 227, 3), dtype=np.float32)
-    labels = np.arange(10, dtype=np.int32)
-    logs = self._run_benchmark_cnn_with_fake_images(params, images, labels)
-    training_outputs = test_util.get_training_outputs_from_logs(
-        logs, params.print_training_accuracy)
-    last_output = training_outputs[-1]
-    # TODO(reedwm): These should be assertEqual but for some reason,
-    # occasionally the accuracies are lower (Running this test 500 times, these
-    # asserts failed twice). Investigate this problem.
-    self.assertLessEqual(last_output.top_1_accuracy, 0.1)
-    self.assertLessEqual(last_output.top_5_accuracy, 0.5)
-
-  def testParameterServer(self):
-    params = test_util.get_params('testParameterServer')
-    self._train_and_eval_local(params)
-
-  def testParameterServerStaged(self):
-    params = test_util.get_params('testParameterServerStaged')._replace(
-        staged_vars=True)
-    self._train_and_eval_local(params)
-
-  def testReplicated(self):
-    params = test_util.get_params('testReplicated')._replace(
-        variable_update='replicated')
-    self._train_and_eval_local(params)
-
-  def testIndependent(self):
-    params = test_util.get_params('testIndependent')._replace(
-        variable_update='independent')
-    self._train_and_eval_local(params)
-
-  def testForwardOnly(self):
-    params = test_util.get_params('testForwardOnly')._replace(forward_only=True)
-    # Evaluation is not supported with --forward_only, so we set skip='eval'.
-    self._train_and_eval_local(params, skip='eval')
-
-  def testForwardOnlyAndFreeze(self):
-    params = test_util.get_params('testForwardOnlyAndFreeze')._replace(
-        forward_only=True, freeze_when_forward_only=True, train_dir=None)
-    # Training is not supported with --freeze_when_forward_only.
-    self._train_and_eval_local(params, skip='eval_and_train_from_checkpoint')
-
-  def testNoDistortions(self):
-    params = test_util.get_params('testNoDistortions')._replace(
-        distortions=False)
-    self._train_and_eval_local(params)
-
-  def testCpuAsLocalParamDevice(self):
-    params = test_util.get_params('testCpuAsLocalParamDevice')._replace(
-        local_parameter_device='cpu')
-    self._train_and_eval_local(params)
-
-  def testNHWC(self):
-    params = test_util.get_params('testNHWC')._replace(data_format='NHWC')
-    self._train_and_eval_local(params)
-
-  def testCpuAsDevice(self):
-    params = test_util.get_params('testCpuAsDevice')._replace(
-        device='cpu', data_format='NHWC')  # NHWC required when --device=cpu
-    self._train_and_eval_local(params)
-
-  def testMomentumParameterServer(self):
-    params = test_util.get_params('testMomentumParameterServer')._replace(
-        optimizer='momentum', momentum=0.8)
-    self._train_and_eval_local(params)
-
-  def testRmspropReplicated(self):
-    params = test_util.get_params('testRmspropReplicated')._replace(
-        variable_update='replicated',
-        optimizer='rmsprop',
-        rmsprop_decay=0.8,
-        rmsprop_momentum=0.6,
-        rmsprop_epsilon=0.7,
-        init_learning_rate=0.01)
-    self._train_and_eval_local(params)
-
-  def testBatchGroupSize(self):
-    params = test_util.get_params('testBatchGroupSize')._replace(
-        batch_group_size=4, num_batches=100, num_warmup_batches=5)
-    self._train_and_eval_local(params)
-
-  def testGradientClip(self):
-    params = test_util.get_params('testGradientClip')._replace(
-        gradient_clip=100.0)
-    self._train_and_eval_local(params)
-
-  def testWeightDecay(self):
-    params = test_util.get_params('testWeightDecay')._replace(
-        weight_decay=0.0001)
-    self._train_and_eval_local(params)
-
-  def testNoLayers(self):
-    params = test_util.get_params('testNoLayers')._replace(use_tf_layers=False)
-    self._train_and_eval_local(params)
-
-  def testSaveModelSteps(self):
-    params = test_util.get_params('testSaveModelSteps')._replace(
-        save_model_steps=2, num_warmup_batches=0, num_batches=10,
-        max_ckpts_to_keep=3)
-    self._train_and_eval_local(params)
-    for i in range(1, 20 + 1):
-      # We train for 20 steps, since self._train_and_eval_local() does two
-      # training runs of 10 steps each. We save a checkpoint every 2 steps and
-      # keep the last 3 checkpoints, so at the end, we should have checkpoints
-      # for steps 16, 18, and 20.
-      matches = glob.glob(os.path.join(params.train_dir,
-                                       'model.ckpt-{}.*'.format(i)))
-      if i in (16, 18, 20):
-        self.assertTrue(matches)
-      else:
-        self.assertFalse(matches)
-
-  def testFp16WithFp32Vars(self):
-    params = test_util.get_params('testFp16WithFp32Vars')._replace(
-        use_fp16=True, fp16_vars=False, fp16_loss_scale=1.)
-    self._train_and_eval_local(params)
-
-  def testFp16WithFp16Vars(self):
-    params = test_util.get_params('testFp16WithFp16Vars')._replace(
-        use_fp16=True, fp16_vars=True)
-    self._train_and_eval_local(params)
-
-  def testXlaCompile(self):
-    params = test_util.get_params('testXlaCompile')._replace(xla_compile=True)
-    self._train_and_eval_local(params)
-
-  @unittest.skip('Fails for unknown reason')
-  def testXlaCompileWithFp16(self):
-    params = test_util.get_params('testXlaCompileWithFp16')._replace(
-        use_fp16=True, xla_compile=True)
-    self._train_and_eval_local(params)
-
-  def testGradientRepacking(self):
-    params = test_util.get_params('testGradientRepacking1')._replace(
-        gradient_repacking=2)
-    self._train_and_eval_local(params, skip='eval_and_train_from_checkpoint')
-    params = test_util.get_params('testGradientRepacking2')._replace(
-        gradient_repacking=2, use_fp16=True)
-    self._train_and_eval_local(params, skip='eval_and_train_from_checkpoint')
-
-  def testTraceFileChromeTraceFormat(self):
-    trace_file = os.path.join(self.get_temp_dir(),
-                              'testTraceFileChromeTraceFormat_tracefile')
-    params = test_util.get_params('testTraceFileChromeTraceFormat')._replace(
-        trace_file=trace_file, use_chrome_trace_format=True)
-    self._train_and_eval_local(params)
-    self.assertGreater(os.stat(trace_file).st_size, 0)
-
-  def testTraceFileStepStatsProto(self):
-    trace_file = os.path.join(self.get_temp_dir(),
-                              'testTraceFileStepStatsProto_tracefile')
-    params = test_util.get_params('testTraceFileStepStatsProto')._replace(
-        trace_file=trace_file, use_chrome_trace_format=False)
-    self._train_and_eval_local(params)
-    self.assertGreater(os.stat(trace_file).st_size, 0)
-    with open(trace_file) as f:
-      step_stats = step_stats_pb2.StepStats()
-      # The following statement should not raise an exception.
-      contents = f.read()
-      text_format.Merge(contents, step_stats)
-
-  def testTfprofFile(self):
-    tfprof_file = os.path.join(self.get_temp_dir(), 'testTfprofFile_tfproffile')
-    params = test_util.get_params('testTfprofFile')._replace(
-        tfprof_file=tfprof_file)
-    self._train_and_eval_local(params, skip='eval_and_train_from_checkpoint')
-    self.assertGreater(os.stat(tfprof_file).st_size, 0)
-    with open(tfprof_file, 'rb') as f:
-      profile_proto = tfprof_log_pb2.ProfileProto()
-      # The following statement should not raise an exception.
-      profile_proto.ParseFromString(f.read())
-
-  @unittest.skip('Fails for unknown reason')
-  def testMoveTrainDir(self):
-    params = test_util.get_params('testMoveTrainDir')
-    self._train_and_eval_local(params)
-    new_train_dir = params.train_dir + '_moved'
-    os.rename(params.train_dir, new_train_dir)
-    params = params._replace(train_dir=new_train_dir, eval=True)
-    self._run_benchmark_cnn_with_black_and_white_images(params)
-
-  @mock.patch('tensorflow.compat.v1.train.Saver')
-  @mock.patch('benchmark_cnn._get_checkpoint_to_load')
-  def testLoadCheckpoint(self, mock_checkpoint_to_load, mock_saver):
-    """Tests load checkpoint with full path to checkpoint."""
-    expected_checkpoint = '/path/to/checkpoints/model.ckpt-1243'
-    mock_checkpoint_to_load.return_value = expected_checkpoint
-
-    global_batch = benchmark_cnn.load_checkpoint(mock_saver,
-                                                 None,
-                                                 expected_checkpoint)
-    self.assertEqual(global_batch, 1243)
-
-  def testGetCheckpointToLoadFullPath(self):
-    """Tests passing full path."""
-    ckpt_path = '/foo/bar/model.ckpt-189'
-    full_path = benchmark_cnn._get_checkpoint_to_load(ckpt_path)
-    self.assertEqual(full_path, ckpt_path)
-
-  def testGetCheckpointToLoadException(self):
-    """Tests exception for directory without a checkpoint."""
-    ckpt_path = '/foo/bar/checkpoints'
-    self.assertRaises(benchmark_cnn.CheckpointNotFoundException,
-                      benchmark_cnn._get_checkpoint_to_load, ckpt_path)
-
-  @mock.patch('tensorflow.compat.v1.train.get_checkpoint_state')
-  def testGetCheckpointToLoad(self, mock_checkpoint_state):
-    """Tests passing path to checkpoint folder."""
-    expected_checkpoint = '/path/to/checkpoints/model.ckpt-1243'
-    mock_checkpoint_state.return_value = mock.Mock(
-        model_checkpoint_path=expected_checkpoint)
-    ckpt_path = '/path/to/checkpoints/'
-    full_path = benchmark_cnn._get_checkpoint_to_load(ckpt_path)
-    self.assertEqual(full_path, expected_checkpoint)
-
-  def testImagenetPreprocessor(self):
-    imagenet_dir = os.path.join(platforms_util.get_test_data_dir(),
-                                'fake_tf_record_data')
-    params = test_util.get_params('testImagenetPreprocessor')._replace(
-        data_dir=imagenet_dir, data_name='imagenet')
-    self._train_and_eval_local(params, use_test_preprocessor=False)
-
-  def testImagenetPreprocessorNoDistortions(self):
-    imagenet_dir = os.path.join(platforms_util.get_test_data_dir(),
-                                'fake_tf_record_data')
-    params = test_util.get_params(
-        'testImagenetPreprocessorNoDistortions')._replace(
-            data_dir=imagenet_dir, data_name='imagenet', distortions=False)
-    self._train_and_eval_local(params, use_test_preprocessor=False)
-
-  def testImagenetPreprocessorVerboseSummary(self):
-    imagenet_dir = os.path.join(platforms_util.get_test_data_dir(),
-                                'fake_tf_record_data')
-    params = test_util.get_params(
-        'testImagenetPreprocessorVerboseSummary')._replace(
-            data_dir=imagenet_dir, data_name='imagenet', distortions=False,
-            summary_verbosity=2)
-    self._train_and_eval_local(params, use_test_preprocessor=False)
-
-  def testCifar10SyntheticData(self):
-    params = test_util.get_params('testCifar10SyntheticData')._replace(
-        data_name='cifar10')
-    self._train_and_eval_local(params)
-
-  def testShiftRatio(self):
-    test_util.monkey_patch_base_cluster_manager()
-    params = benchmark_cnn.make_params(
-        data_name='imagenet',
-        data_dir=os.path.join(platforms_util.get_test_data_dir(),
-                              'fake_tf_record_data'),
-        job_name='worker',
-        worker_hosts='w1,w2,w3,w4',
-        ps_hosts='p1',
-        task_index=0)
-    self.assertEqual(
-        benchmark_cnn.BenchmarkCNN(params).input_preprocessor.shift_ratio, 0.0)
-    params = params._replace(task_index=3)
-    self.assertEqual(
-        benchmark_cnn.BenchmarkCNN(params).input_preprocessor.shift_ratio, 0.75)
-
-  def testDistributedReplicatedSavableVars(self):
-    test_util.monkey_patch_base_cluster_manager()
-    params = benchmark_cnn.make_params(
-        variable_update='distributed_replicated',
-        model='inception4',
-        data_name='imagenet',
-        data_dir=os.path.join(platforms_util.get_test_data_dir(),
-                              'fake_tf_record_data'),
-        job_name='worker',
-        worker_hosts='w1,w2,w3,w4',
-        ps_hosts='p1',
-        datasets_use_prefetch=False)
-
-    bench = benchmark_cnn.BenchmarkCNN(params)
-    with tf.Graph().as_default():
-      bench._build_model()
-      savable_vars = bench.variable_mgr.savable_variables()
-      # Assert all global variables are in savable_vars
-      for v in tf.global_variables():
-        if not v.name.startswith(
-            variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/v0'):
-          self.assertEqual(v.name, 'global_step:0')
-        name = bench.variable_mgr._strip_port(v.name)
-        if name.startswith(variable_mgr_util.PS_SHADOW_VAR_PREFIX):
-          name = name[len(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/'):]
-        self.assertIn(name, savable_vars)
-        self.assertIn(savable_vars[name], tf.global_variables())
-      # Assert all local variables on the first tower are in savable_vars
-      for v in tf.local_variables():
-        if v.name.startswith('v0/'):
-          name = bench.variable_mgr._strip_port(v.name)
-          self.assertIn(name, savable_vars)
-
-  def _test_preprocessing_eval(self, image_height, image_width, output_height,
-                               output_width):
-    image = tf.fill((image_height, image_width, 3),
-                    tf.constant(128, dtype=tf.uint8))
-    params = benchmark_cnn.make_params()
-    new_image = preprocessing.eval_image(image, output_height, output_width, 0,
-                                         'bilinear', params.summary_verbosity)
-    with self.test_session() as sess:
-      new_image_value = sess.run(new_image)
-    self.assertAllEqual(new_image_value,
-                        np.full((output_height, output_width, 3), 128,
-                                dtype=np.uint8))
-
-  def testPreprocessingEval(self):
-    self._test_preprocessing_eval(10, 10, 4, 4)
-    self._test_preprocessing_eval(4, 4, 10, 10)
-    self._test_preprocessing_eval(1, 100, 100, 1)
-    self._test_preprocessing_eval(100, 1, 1, 100)
-    self._test_preprocessing_eval(1, 100, 1, 100)
-
-  def _test_preprocessing_traing(self, image_buf, image_color,
-                                 output_height, output_width, bbox,
-                                 batch_position, resize_method, distortions,
-                                 summary_verbosity, fuse_decode_and_crop):
-    new_image = preprocessing.train_image(
-        image_buf,
-        output_height,
-        output_width,
-        bbox,
-        batch_position,
-        resize_method,
-        distortions,
-        summary_verbosity=summary_verbosity,
-        fuse_decode_and_crop=fuse_decode_and_crop)
-    self.assertEqual(new_image.shape, [output_height, output_width, 3])
-    with self.test_session(use_gpu=True) as sess:
-      new_image_value = sess.run(new_image)
-    self.assertAllClose(
-        new_image_value,
-        np.full(
-            [output_height, output_width, 3],
-            image_color,
-            dtype=np.float32),
-        atol=50.,
-        rtol=0.)
-
-  def testPreprocessingTrain(self):
-    test_data_dir = os.path.join(platforms_util.get_test_data_dir(), 'images')
-    black_file = os.path.join(test_data_dir, 'black_image.jpg')
-    with open(black_file, 'rb') as f:
-      black_jpg_buffer = f.read()
-    white_file = os.path.join(test_data_dir, 'white_image.jpg')
-    with open(white_file, 'rb') as f:
-      white_jpg_buffer = f.read()
-    bbox = tf.zeros((1, 0, 4), dtype=tf.float32)
-    batch_position = 0
-    # Each size config is (output_height, output_width, resize_method)
-    size_configs = [(100, 100, 'round_robin'), (150, 10, 'bilinear'),
-                    (10, 150, 'nearest')]
-    # Each image config is (image_buf, image_color)
-    image_configs = [(white_jpg_buffer, 255), (black_jpg_buffer, 0)]
-    for (image_buf, image_color) in image_configs:
-      for output_height, output_width, resize_method in size_configs:
-        for distortions in [True, False]:
-          for summary_verbosity in [0, 2]:
-            for fuse_decode_and_crop in [True, False]:
-              self._test_preprocessing_traing(
-                  image_buf, image_color, output_height, output_width, bbox,
-                  batch_position, resize_method, distortions, summary_verbosity,
-                  fuse_decode_and_crop)
-
-  def _test_learning_rate(self, params, global_step_to_expected_learning_rate):
-    self.longMessage = True  # pylint: disable=invalid-name
-    bench = benchmark_cnn.BenchmarkCNN(params)
-    with tf.Graph().as_default() as graph:
-      bench._build_model()
-      global_step = graph.get_tensor_by_name('global_step:0')
-      learning_rate = graph.get_tensor_by_name('learning_rate_tensor:0')
-      with self.test_session(graph=graph, use_gpu=True) as sess:
-        items = global_step_to_expected_learning_rate.items()
-        for global_step_val, expected_learning_rate in items:
-          self.assertAlmostEqual(sess.run(learning_rate,
-                                          {global_step: global_step_val}),
-                                 expected_learning_rate,
-                                 msg='at global_step:{}'.
-                                 format(global_step_val))
-
-  def testLearningRateModelSpecificResNet(self):
-    params = benchmark_cnn.make_params(model='resnet50',
-                                       batch_size=256,
-                                       variable_update='parameter_server',
-                                       num_gpus=1)
-    self._test_learning_rate(params, {
-        0: 0,
-        150136: 0.128,
-        150137: 0.0128,
-        300273: 0.0128,
-        300274: 0.00128,
-        10000000: 0.0000128
-    })
-
-  def testLearningRateUserProvidedInitLr(self):
-    params = benchmark_cnn.make_params(model='resnet50',
-                                       batch_size=256,
-                                       variable_update='replicated',
-                                       init_learning_rate=1.)
-    self._test_learning_rate(params, {
-        0: 1.,
-        10000000: 1.
-    })
-
-  def testLearningRateUserProvidedInitLrAndWarmup(self):
-    params = benchmark_cnn.make_params(model='resnet50',
-                                       batch_size=256,
-                                       variable_update='replicated',
-                                       init_learning_rate=1.,
-                                       num_learning_rate_warmup_epochs=5)
-    self._test_learning_rate(params, {
-        0: 0.,
-        12511: 0.5,
-        25022: 1.,
-        10000000: 1.
-    })
-
-  def testLearningRateUserProvidedDecayInfo(self):
-    params = benchmark_cnn.make_params(model='resnet50',
-                                       init_learning_rate=1.,
-                                       learning_rate_decay_factor=0.5,
-                                       num_epochs_per_decay=2,
-                                       minimum_learning_rate=0.3750,
-                                       batch_size=32)
-    self._test_learning_rate(params, {
-        0: 1.,
-        80071: 1.,
-        80072: 0.5,
-        160143: 0.5,
-        160144: 0.375,
-        10000000: 0.375
-    })
-
-  def testLearningRateUserProvidedZeroDecay(self):
-    params = benchmark_cnn.make_params(model='resnet50',
-                                       num_learning_rate_warmup_epochs=0,
-                                       learning_rate_decay_factor=0.5,
-                                       num_epochs_per_decay=0,
-                                       minimum_learning_rate=0.3750,
-                                       batch_size=32)
-    with self.assertRaises(ValueError):
-      with tf.Graph().as_default():
-        # This will fail because params.learning_rate_decay_factor cannot be
-        # nonzero if params.num_epochs_per_decay is zero.
-        benchmark_cnn.BenchmarkCNN(params)._build_model()
-
-  def testLearningRateUserProvidedSchedule(self):
-    params = benchmark_cnn.make_params(
-        model='trivial',
-        batch_size=32,
-        piecewise_learning_rate_schedule='1;3;.1;5;.01')
-    self._test_learning_rate(params, {
-        0: 1.,
-        120108: 1.,
-        120109: 0.1,
-        200181: 0.1,
-        200182: 0.01,
-        100000000: 0.01
-    })
-
-  def testNumBatchesAndEpochs(self):
-    params = benchmark_cnn.make_params()
-    batches, epochs = benchmark_cnn.get_num_batches_and_epochs(params, 10, 100)
-    self.assertEqual(batches, benchmark_cnn._DEFAULT_NUM_BATCHES)
-    self.assertAlmostEqual(epochs,
-                           float(benchmark_cnn._DEFAULT_NUM_BATCHES) / 10)
-
-    params = benchmark_cnn.make_params(num_batches=21)
-    batches, epochs = benchmark_cnn.get_num_batches_and_epochs(params, 25, 50)
-    self.assertEqual(batches, 21)
-    self.assertAlmostEqual(epochs, 10.5)
-
-    params = benchmark_cnn.make_params(num_epochs=3)
-    batches, epochs = benchmark_cnn.get_num_batches_and_epochs(params, 2, 3)
-    self.assertEqual(batches, 5)
-    self.assertAlmostEqual(epochs, 10./3.)
-
-    params = benchmark_cnn.make_params(num_epochs=4)
-    batches, epochs = benchmark_cnn.get_num_batches_and_epochs(params, 2, 3)
-    self.assertEqual(batches, 6)
-    self.assertAlmostEqual(epochs, 4)
-
-    with self.assertRaises(ValueError):
-      params = benchmark_cnn.make_params(num_batches=100, num_epochs=100)
-      benchmark_cnn.get_num_batches_and_epochs(params, 1, 1)
-
-  def _testEvalDuringTraining(self, params, expected_num_eval_batches_found):
-    # The idea of this test is that all train images are black and all eval
-    # images are white. We pass the images through the TestModel, and ensure
-    # the outputs are as expected.
-
-    batch_size = params.batch_size
-    eval_batch_size = params.eval_batch_size or params.batch_size
-
-    class TestModel(test_util.TestCNNModel):
-
-      def __init__(self):
-        super(TestModel, self).__init__()
-        self.depth = 3
-
-      def add_inference(self, cnn):
-        if cnn.phase_train:
-          # This will allow us to test that 100 is only added during training
-          # and not during eval.
-          cnn.top_layer += 100
-          assert cnn.top_layer.shape[0] == batch_size
-        else:
-          assert cnn.top_layer.shape[0] == eval_batch_size
-
-        # Reduce the image to a single number. The number should be (-1 + 100)
-        # during training and 1 during testing.
-        cnn.top_layer = tf.reshape(cnn.top_layer, (cnn.top_layer.shape[0], -1))
-        cnn.top_layer = tf.reduce_mean(cnn.top_layer, axis=1)
-        cnn.top_layer = tf.reshape(cnn.top_layer,
-                                   (cnn.top_layer.shape[0], 1, 1, 1))
-        cnn.top_size = 1
-        trainable_vars = tf.trainable_variables()
-
-        # The super method will compute image*A*B, where A=1 and B=2.
-        super(TestModel, self).add_inference(cnn)
-
-        if not cnn.phase_train:
-          # Assert no new variables were added, since they should be reused from
-          # training.
-          assert len(trainable_vars) == len(tf.trainable_variables())
-
-    model = TestModel()
-    dataset = datasets.ImagenetDataset(params.data_dir)
-    logs = []
-    bench_cnn = benchmark_cnn.BenchmarkCNN(params, model=model, dataset=dataset)
-    with test_util.monkey_patch(benchmark_cnn,
-                                log_fn=test_util.print_and_add_to_list(logs)):
-      bench_cnn.run()
-    training_outputs = test_util.get_training_outputs_from_logs(
-        logs, print_training_accuracy=False)
-    self.assertEqual(len(training_outputs), params.num_batches)
-    expected_training_output = (-1 + 100) * 1 * 2
-    for training_output in training_outputs:
-      self.assertEqual(training_output.loss, expected_training_output)
-    eval_outputs = test_util.get_evaluation_outputs_from_logs(logs)
-    self.assertTrue(eval_outputs)
-    expected_eval_output = 1 * 1 * 2
-    for eval_output in eval_outputs:
-      self.assertEqual(eval_output.top_1_accuracy, expected_eval_output)
-      self.assertEqual(eval_output.top_5_accuracy, expected_eval_output)
-
-    num_eval_batches_found = 0
-    eval_batch_regex = re.compile(r'^\d+\t[0-9.]+ examples/sec$')
-    for log in logs:
-      if eval_batch_regex.match(log):
-        num_eval_batches_found += 1
-    self.assertEqual(num_eval_batches_found, expected_num_eval_batches_found)
-
-  def testEvalDuringTraining(self):
-    data_dir = test_util.create_black_and_white_images()
-    base_params = test_util.get_params('testEvalDuringTraining')
-    train_dir = base_params.train_dir
-    base_params = base_params._replace(
-        train_dir=None, print_training_accuracy=False, num_warmup_batches=0,
-        num_batches=7, num_eval_batches=2, display_every=1,
-        init_learning_rate=0, weight_decay=0,
-        distortions=False, data_dir=data_dir)
-    expected_num_eval_batches_found = (
-        base_params.num_eval_batches * (base_params.num_batches // 2 + 1))
-
-    # Test --eval_during_training_every_n_steps
-    self._testEvalDuringTraining(
-        base_params._replace(eval_during_training_every_n_steps=2,
-                             variable_update='parameter_server'),
-        expected_num_eval_batches_found)
-    self._testEvalDuringTraining(
-        base_params._replace(eval_during_training_every_n_steps=2,
-                             variable_update='replicated'),
-        expected_num_eval_batches_found)
-    self._testEvalDuringTraining(
-        base_params._replace(eval_during_training_every_n_steps=2,
-                             variable_update='replicated',
-                             summary_verbosity=2,
-                             save_summaries_steps=2,
-                             datasets_use_prefetch=False),
-        expected_num_eval_batches_found)
-    self._testEvalDuringTraining(
-        base_params._replace(eval_during_training_every_n_steps=2,
-                             variable_update='replicated',
-                             use_fp16=True, train_dir=train_dir,
-                             eval_batch_size=base_params.batch_size + 2),
-        expected_num_eval_batches_found)
-
-    # Test --eval_during_training_every_n_epochs
-    every_n_epochs = (2 * base_params.batch_size * base_params.num_gpus /
-                      datasets.IMAGENET_NUM_TRAIN_IMAGES)
-    self._testEvalDuringTraining(
-        base_params._replace(eval_during_training_every_n_epochs=every_n_epochs,
-                             variable_update='replicated'),
-        expected_num_eval_batches_found)
-
-    # Test --eval_during_training_at_specified_steps
-    list_steps = [2, 3, 5, 7, 1000]
-    num_eval_steps = 1 + sum(1 for step in list_steps
-                             if step < base_params.num_batches)
-    expected_num_eval_batches_found = (
-        base_params.num_eval_batches * num_eval_steps)
-
-    self._testEvalDuringTraining(
-        base_params._replace(eval_during_training_at_specified_steps=list_steps,
-                             variable_update='replicated'),
-        expected_num_eval_batches_found)
-
-    # Test --eval_during_training_at_specified_epochs
-    list_epochs = [(step * base_params.batch_size * base_params.num_gpus /
-                    datasets.IMAGENET_NUM_TRAIN_IMAGES)
-                   for step in list_steps]
-    self._testEvalDuringTraining(
-        base_params._replace(
-            eval_during_training_at_specified_epochs=list_epochs,
-            variable_update='replicated'),
-        expected_num_eval_batches_found)
-
-    # Test --eval_during_training_every_n_steps runs with synthetic data.
-    params = base_params._replace(
-        variable_update='replicated', data_dir=None,
-        eval_during_training_every_n_steps=2, num_batches=2)
-    benchmark_cnn.BenchmarkCNN(params).run()
-
-  def testEvalDuringTrainingNumEpochs(self):
-    params = benchmark_cnn.make_params(
-        batch_size=1, eval_batch_size=2, eval_during_training_every_n_steps=1,
-        num_batches=30, num_eval_epochs=100 / datasets.IMAGENET_NUM_VAL_IMAGES)
-    bench_cnn = benchmark_cnn.BenchmarkCNN(params)
-    self.assertEqual(bench_cnn.num_batches, 30)
-    self.assertAlmostEqual(bench_cnn.num_epochs,
-                           30 / datasets.IMAGENET_NUM_TRAIN_IMAGES)
-    self.assertAlmostEqual(bench_cnn.num_eval_batches, 50)
-    self.assertAlmostEqual(bench_cnn.num_eval_epochs,
-                           100 / datasets.IMAGENET_NUM_VAL_IMAGES)
-
-  def testEarlyStopping(self):
-    params = benchmark_cnn.make_params(
-        batch_size=2,
-        display_every=1,
-        num_batches=100,
-        eval_during_training_every_n_steps=2,
-        stop_at_top_1_accuracy=0.4,
-    )
-    with mock.patch.object(benchmark_cnn.BenchmarkCNN, '_eval_once',
-                           side_effect=[(0.1, 0.1), (0.5, 0.5), (0.2, 0.2)]
-                          ) as mock_eval_once:
-      logs = []
-      bench_cnn = benchmark_cnn.BenchmarkCNN(params)
-      with test_util.monkey_patch(benchmark_cnn,
-                                  log_fn=test_util.print_and_add_to_list(logs)):
-        bench_cnn.run()
-      training_outputs = test_util.get_training_outputs_from_logs(
-          logs, print_training_accuracy=False)
-      # We should stop after the second evaluation, and we evaluate every 2
-      # steps. So there should be 2 * 2 = 4 training outputs.
-      self.assertEqual(len(training_outputs), 4)
-      self.assertEqual(mock_eval_once.call_count, 2)
-
-  def testOutOfRangeErrorsAreNotIgnored(self):
-    error_msg = 'Fake OutOfRangeError error message'
-    with mock.patch.object(benchmark_cnn.BenchmarkCNN, 'benchmark_with_session',
-                           side_effect=tf.errors.OutOfRangeError(None, None,
-                                                                 error_msg)):
-      with self.assertRaisesRegex(RuntimeError, error_msg):
-        benchmark_cnn.BenchmarkCNN(benchmark_cnn.make_params()).run()
-
-  def testInvalidFlags(self):
-    params = benchmark_cnn.make_params(device='cpu', data_format='NCHW')
-    with self.assertRaises(ValueError):
-      benchmark_cnn.BenchmarkCNN(params)
-
-    params = benchmark_cnn.make_params(use_fp16=True, fp16_vars=True,
-                                       variable_update='replicated',
-                                       all_reduce_spec='nccl')
-    with self.assertRaises(ValueError):
-      benchmark_cnn.BenchmarkCNN(params)
-
-    # Automatic loss scaling is only supported for 'replicated', 'ps',
-    # and 'independent' variable_updates.
-    invalid_variable_updates = [
-        'distributed_replicated', 'distributed_all_reduce'
-    ]
-    for variable_update in invalid_variable_updates:
-      params = benchmark_cnn.make_params(
-          use_fp16=True,
-          fp16_vars=True,
-          fp16_enable_auto_loss_scale=True,
-          variable_update=variable_update)
-      with self.assertRaises(ValueError):
-        benchmark_cnn.BenchmarkCNN(params)
-
-    # Automatic loss scaling is not supported for 'nccl'.
-    params = benchmark_cnn.make_params(
-        use_fp16=True,
-        fp16_vars=True,
-        fp16_enable_auto_loss_scale=True,
-        all_reduce_spec='nccl')
-    with self.assertRaises(ValueError):
-      benchmark_cnn.BenchmarkCNN(params)
-
-    # Automatic loss scaling is not supported for 'staged_vars'.
-    params = benchmark_cnn.make_params(
-        use_fp16=True,
-        fp16_vars=True,
-        fp16_enable_auto_loss_scale=True,
-        staged_vars=True)
-    with self.assertRaises(ValueError):
-      benchmark_cnn.BenchmarkCNN(params)
-
-  def testMakeParams(self):
-    default_params = benchmark_cnn.make_params()
-    self.assertEqual(default_params.model,
-                     flags.param_specs['model'].default_value)
-    params = benchmark_cnn.make_params(model='foo')
-    self.assertEqual(params.model, 'foo')
-    with self.assertRaises(ValueError):
-      benchmark_cnn.make_params(job_name='foo')
-    with self.assertRaises(ValueError):
-      benchmark_cnn.make_params(gpu_memory_frac_for_testing=-1.)
-
-
-class VariableUpdateTest(tf.test.TestCase):
-  """Tests that variables are updated correctly.
-
-  These tests use a very simple deterministic model. For example, some tests use
-  the model
-
-    loss = image * A * B
-
-  where image is a 1x1 images (with a single scalar value), and A and B are
-  scalar variables. Tests will run tf_cnn_benchmarks with such a model, on a
-  sequence of scalar images, and assert that the losses are the correct value.
-  Since the losses depend on the variables, this indirectly tests variables are
-  updated correctly.
-  """
-
-  def setUp(self):
-    super(VariableUpdateTest, self).setUp()
-    _check_has_gpu()
-    benchmark_cnn.setup(benchmark_cnn.make_params())
-
-  def _get_benchmark_cnn_losses(self, inputs, params):
-    """Returns the losses of BenchmarkCNN on the given inputs and params."""
-    logs = []
-    model = test_util.TestCNNModel()
-    with test_util.monkey_patch(benchmark_cnn,
-                                log_fn=test_util.print_and_add_to_list(logs),
-                                LOSS_AND_ACCURACY_DIGITS_TO_SHOW=15):
-      bench = benchmark_cnn.BenchmarkCNN(
-          params, dataset=test_util.TestDataSet(), model=model)
-      # The test model does not use labels when computing loss, so the label
-      # values do not matter as long as it's the right shape.
-      labels = np.array([1] * inputs.shape[0])
-      bench.input_preprocessor.set_fake_data(inputs, labels)
-      if bench.eval_input_preprocessor:
-        bench.eval_input_preprocessor.set_fake_data(inputs, labels)
-      bench.run()
-
-    outputs = test_util.get_training_outputs_from_logs(
-        logs, params.print_training_accuracy)
-    return [x.loss for x in outputs]
-
-  def _test_variable_update(self, params):
-    """Tests variables are updated correctly when the given params are used.
-
-    A BenchmarkCNN is created with a TestCNNModel, and is run with some scalar
-    images. The losses are then compared with the losses obtained with
-    TestCNNModel().manually_compute_losses()
-
-    Args:
-      params: a Params tuple used to create BenchmarkCNN.
-    """
-    inputs = test_util.get_fake_var_update_inputs()
-    actual_losses = self._get_benchmark_cnn_losses(inputs, params)
-    expected_losses, = test_util.TestCNNModel().manually_compute_losses(
-        inputs, 1, params)
-    rtol = 3e-2 if params.use_fp16 else 1e-5
-    self.assertAllClose(actual_losses[:len(expected_losses)], expected_losses,
-                        rtol=rtol, atol=0.)
-
-  def _test_variable_updates(self, params,
-                             var_updates=('parameter_server', 'replicated')):
-    for var_update in var_updates:
-      self._test_variable_update(params._replace(variable_update=var_update))
-
-  def testDefault(self):
-    params = test_util.get_var_update_params()
-    self._test_variable_updates(params)
-
-  # For some reason, this test doesn't always pass
-
-  # def testCpuAsDevice(self):
-  #   params = test_util.get_var_update_params()._replace(
-  #       device='cpu',
-  #       data_format='NHWC')  # NHWC required when --device=cpu
-  #   self._test_variable_updates(params)
-
-  def testCpuAsLocalParamDevice(self):
-    params = test_util.get_var_update_params()._replace(
-        local_parameter_device='cpu')
-    self._test_variable_updates(params)
-
-  def testFp16(self):
-    params = test_util.get_var_update_params()._replace(use_fp16=True)
-    self._test_variable_updates(params)
-
-  def testMomentum(self):
-    params = test_util.get_var_update_params()._replace(optimizer='momentum')
-    self._test_variable_updates(params)
-
-  def testRmsprop(self):
-    params = test_util.get_var_update_params()._replace(optimizer='rmsprop')
-    self._test_variable_updates(params)
-
-  def testNoLayers(self):
-    params = test_util.get_var_update_params()._replace(use_tf_layers=False)
-    self._test_variable_updates(params)
-
-  def testVariousAllReduceSpecs(self):
-    # We do not test xring, because it requires all Variables to have at least
-    # two elements.
-    params = test_util.get_var_update_params()._replace(all_reduce_spec='pscpu')
-    self._test_variable_updates(params, var_updates=('replicated',))
-    params = params._replace(all_reduce_spec='psgpu')
-    self._test_variable_updates(params, var_updates=('replicated',))
-    # TODO(b/80125832): Enable nccl in tests
-    # params = params._replace(all_reduce_spec='nccl',
-    #                          compact_gradient_transfer=False)
-    # self._test_variable_updates(params, var_updates=('replicated',))
-
-  def testPrintBaseLoss(self):
-    params = test_util.get_var_update_params()._replace(
-        loss_type_to_report='base_loss')
-    self._test_variable_updates(params)
-
-  def testSingleL2LossOp(self):
-    params = test_util.get_var_update_params()._replace(
-        single_l2_loss_op=True)
-    self._test_variable_updates(params)
-
-  def testResourceVars(self):
-    params = test_util.get_var_update_params()._replace(
-        use_resource_vars=True)
-    self._test_variable_updates(params)
-
-  def testEvalDuringTrainingEveryNSteps(self):
-    # TODO(reedwm): Test that the eval results are correct. This only tests that
-    # training results are correct.
-    params = test_util.get_var_update_params()._replace(
-        eval_during_training_every_n_steps=1)
-    self._test_variable_updates(params, var_updates=('replicated',))
-
-
-class VariableMgrLocalReplicatedTest(tf.test.TestCase):
-
-  def _test_grad_aggregation_with_var_mgr(self, variable_mgr, num_towers,
-                                          num_vars, deferred_grads):
-    tower_devices = ['/gpu:%d' % i for i in range(num_towers)]
-    tower_grads = []
-    expected_sums = [0.] * num_vars
-    for i, tower_device in enumerate(tower_devices):
-      with tf.device(tower_device):
-        grad_vars = []
-        for j in range(num_vars):
-          n = num_towers * i + j
-          grad_vars.append((tf.constant(n, dtype=tf.float32),
-                            tf.Variable(n, dtype=tf.float32)))
-          expected_sums[j] += n
-      tower_grads.append(grad_vars)
-
-    _, agg_device_grads = variable_mgr.preprocess_device_grads(
-        tower_grads)
-    expected_device_grads = []
-    for i in range(num_towers):
-      expected_grad_vars = []
-      for j in range(num_vars):
-        expected_grad_and_var = [expected_sums[j], num_towers * i + j]
-        if isinstance(agg_device_grads[i][j], tuple):
-          # agg_device_grads[i][j] can be a list or tuple.
-          expected_grad_and_var = tuple(expected_grad_and_var)
-        expected_grad_vars.append(expected_grad_and_var)
-      if isinstance(agg_device_grads[i], tuple):
-        # agg_device_grads[i] can be a list or tuple.
-        expected_grad_vars = tuple(expected_grad_vars)
-      expected_device_grads.append(expected_grad_vars)
-    config = tf.ConfigProto(allow_soft_placement=True)
-    with tf.Session(config=config) as sess:
-      sess.run(tf.initialize_all_variables())
-      sess.run(variable_mgr._warmup_ops)
-      if deferred_grads:
-        # With deferred grads, the result of a session run is always the summed
-        # gradients from the previous session run.
-        sess.run(agg_device_grads)
-        feed_dict = {g: 0 for grad_vars in tower_grads for g, _ in grad_vars}
-        agg_device_grads_ = sess.run(agg_device_grads, feed_dict)
-      else:
-        agg_device_grads_ = sess.run(agg_device_grads)
-    self.assertEqual(agg_device_grads_, expected_device_grads)
-
-  def _test_grad_aggregation(self, params, num_vars):
-    bench = benchmark_cnn.BenchmarkCNN(params)
-    deferred_grads = (params.variable_consistency == 'relaxed')
-    self._test_grad_aggregation_with_var_mgr(bench.variable_mgr, bench.num_gpus,
-                                             num_vars, deferred_grads)
-
-  def test_grad_aggregation(self):
-    base_params = benchmark_cnn.make_params(num_gpus=10,
-                                            variable_update='replicated',
-                                            use_fp16=True)
-    params = base_params
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(gradient_repacking=3)
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(variable_consistency='relaxed')
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(compact_gradient_transfer=False)
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(gradient_repacking=3,
-                                  variable_consistency='relaxed')
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(gradient_repacking=3,
-                                  compact_gradient_transfer=False)
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(variable_consistency='relaxed',
-                                  compact_gradient_transfer=False)
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(gradient_repacking=3,
-                                  variable_consistency='relaxed',
-                                  compact_gradient_transfer=False)
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(num_gpus=8, hierarchical_copy=True)
-    self._test_grad_aggregation(params, 10)
-    # TODO(b/80125832): Enable nccl in tests
-    # params = base_params._replace(all_reduce_spec='nccl',
-    #                               compact_gradient_transfer=False,
-    #                               # For some reason, this test freezes when
-    #                               # num_gpus=10
-    #                               num_gpus=8)
-    # self._test_grad_aggregation(params, 10)
-    params = base_params._replace(all_reduce_spec='pscpu')
-    self._test_grad_aggregation(params, 10)
-
-    params = base_params._replace(num_gpus=8,
-                                  gradient_repacking=3,
-                                  variable_consistency='relaxed',
-                                  hierarchical_copy=True)
-    self._test_grad_aggregation(params, 10)
-    # TODO(b/80125832): Enable nccl in tests
-    # params = base_params._replace(num_gpus=8,
-    #                               gradient_repacking=3,
-    #                               variable_consistency='relaxed',
-    #                               all_reduce_spec='nccl',
-    #                               compact_gradient_transfer=False)
-    # self._test_grad_aggregation(params, 10)
-    params = base_params._replace(gradient_repacking=3,
-                                  variable_consistency='relaxed',
-                                  all_reduce_spec='pscpu')
-    self._test_grad_aggregation(params, 10)
-    params = base_params._replace(gradient_repacking=3,
-                                  variable_consistency='relaxed',
-                                  all_reduce_spec='xring')
-    self._test_grad_aggregation(params, 10)
-
-
-if __name__ == '__main__':
-  tf.disable_v2_behavior()
-  tf.test.main()
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Utilities for CNN benchmarks."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import sys
-import threading
-
-import numpy as np
-import tensorflow.compat.v1 as tf
-
-
-def tensorflow_version_tuple():
-  v = tf.__version__
-  major, minor, patch = v.split('.')
-  return (int(major), int(minor), patch)
-
-
-def tensorflow_version():
-  vt = tensorflow_version_tuple()
-  return vt[0] * 1000 + vt[1]
-
-
-def log_fn(log):
-  print(log, flush=True)
-
-
-def roll_numpy_batches(array, batch_size, shift_ratio):
-  """Moves a proportion of batches from start to the end of the array.
-
-  This function moves a proportion of batches, specified by `shift_ratio`, from
-  the starts of the array to the end. The number of batches moved is rounded
-  down to the nearest integer. For example,
-
-  ```
-  roll_numpy_batches([1, 2, 3, 4, 5, 6], 2, 0.34) == [3, 4, 5, 6, 1, 2]
-  ```
-
-  Args:
-    array: A Numpy array whose first dimension is the batch dimension.
-    batch_size: The batch size.
-    shift_ratio: Proportion of batches to move from the start of the array to
-      the end of the array.
-  Returns:
-    A new Numpy array, with a proportion of the batches at the start of `array`
-    moved to the end.
-  """
-  num_items = array.shape[0]
-  assert num_items % batch_size == 0
-  num_batches = num_items // batch_size
-  starting_batch = int(num_batches * shift_ratio)
-  starting_item = starting_batch * batch_size
-  return np.roll(array, -starting_item, axis=0)
-
-
-# For Python 2.7 compatibility, we do not use threading.Barrier.
-class Barrier(object):
-  """Implements a lightweight Barrier.
-
-  Useful for synchronizing a fixed number of threads at known synchronization
-  points.  Threads block on 'wait()' and simultaneously return once they have
-  all made that call.
-
-  # Implementation adopted from boost/thread/barrier.hpp
-  """
-
-  def __init__(self, parties):
-    """Create a barrier, initialised to 'parties' threads."""
-    self.cond = threading.Condition(threading.Lock())
-    self.parties = parties
-    # Indicates the number of waiting parties.
-    self.waiting = 0
-    # generation is needed to deal with spurious wakeups. If self.cond.wait()
-    # wakes up for other reasons, generation will force it go back to wait().
-    self.generation = 0
-    self.broken = False
-
-  def wait(self):
-    """Wait for the barrier."""
-    with self.cond:
-      # Check if the barrier has been disabled or not.
-      if self.broken:
-        return
-      gen = self.generation
-      self.waiting += 1
-      if self.waiting == self.parties:
-        self.waiting = 0
-        self.generation += 1
-        self.cond.notify_all()
-      # loop because of spurious wakeups
-      while gen == self.generation:
-        self.cond.wait()
-
-  # TODO(huangyp): Remove this method once we find a way to know which step
-  # is the last barrier.
-  def abort(self):
-    """Clear existing barrier and disable this barrier."""
-    with self.cond:
-      if self.waiting > 0:
-        self.generation += 1
-        self.cond.notify_all()
-      self.broken = True
-
-
-class ImageProducer(object):
-  """An image producer that puts images into a staging area periodically.
-
-  This class is useful for periodically running a set of ops, `put_ops` on a
-  different thread every `batch_group_size` steps.
-
-  The notify_image_consumption() method is used to increment an internal counter
-  so that every `batch_group_size` times it is called, `put_ops` is executed. A
-  barrier is placed so that notify_image_consumption() will block until
-  the previous call to `put_ops` has been executed.
-
-  The start() method is used to start the thread that runs `put_ops`.
-
-  The done() method waits until the last put_ops is executed and stops the
-  thread.
-
-  The purpose of this class is to fill an image input pipeline every
-  `batch_group_size` steps. Suppose `put_ops` supplies `batch_group_size` images
-  to the input pipeline when run, and that every step, 1 batch of images is
-  consumed. Then, by calling notify_image_consumption() every step, images are
-  supplied to the input pipeline at the same amount they are consumed.
-
-  Example usage:
-  ```
-  put_ops = ... # Enqueues `batch_group_size` batches to a StagingArea
-  get_op = ...  # Dequeues 1 batch, and does some operations on it
-  batch_group_size = 4
-  with tf.Session() as sess:
-    image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size)
-    image_producer.start()
-    for _ in range(100):
-      sess.run(get_op)
-      image_producer.notify_image_consumption()
-  ```
-  """
-
-  def __init__(self, sess, put_ops, batch_group_size, use_python32_barrier):
-    self.sess = sess
-    self.num_gets = 0
-    self.put_ops = put_ops
-    self.batch_group_size = batch_group_size
-    self.done_event = threading.Event()
-    if (use_python32_barrier and
-        sys.version_info[0] == 3 and sys.version_info[1] >= 2):
-      self.put_barrier = threading.Barrier(2)
-    else:
-      self.put_barrier = Barrier(2)
-
-  def _should_put(self):
-    return (self.num_gets + 1) % self.batch_group_size == 0
-
-  def done(self):
-    """Stop the image producer."""
-    self.done_event.set()
-    self.put_barrier.abort()
-    self.thread.join()
-
-  def start(self):
-    """Start the image producer."""
-    self.sess.run([self.put_ops])
-    self.thread = threading.Thread(target=self._loop_producer)
-    # Set daemon to true to allow Ctrl + C to terminate all threads.
-    self.thread.daemon = True
-    self.thread.start()
-
-  def notify_image_consumption(self):
-    """Increment the counter of image_producer by 1.
-
-    This should only be called by the main thread that consumes images and runs
-    the model computation. One batch of images should be consumed between
-    calling start() and the first call to this method. Then, one batch of images
-    should be consumed between any two successive calls to this method.
-    """
-    if self._should_put():
-      self.put_barrier.wait()
-    self.num_gets += 1
-
-  def _loop_producer(self):
-    while not self.done_event.isSet():
-      self.sess.run([self.put_ops])
-      self.put_barrier.wait()
-
-
-class BaseClusterManager(object):
-  """The manager for the cluster of servers running the benchmark."""
-
-  def __init__(self, params):
-    worker_hosts = params.worker_hosts.split(',')
-    ps_hosts = params.ps_hosts.split(',') if params.ps_hosts else []
-    cluster = {'worker': worker_hosts}
-    if ps_hosts:
-      cluster['ps'] = ps_hosts
-    self._cluster_spec = tf.train.ClusterSpec(cluster)
-
-  def get_target(self):
-    """Returns a target to be passed to tf.Session()."""
-    raise NotImplementedError('get_target must be implemented by subclass')
-
-  def join_server(self):
-    raise NotImplementedError('join must be implemented by subclass')
-
-  def get_cluster_spec(self):
-    return self._cluster_spec
-
-  def num_workers(self):
-    return len(self._cluster_spec.job_tasks('worker'))
-
-  def num_ps(self):
-    if 'ps' in self._cluster_spec.jobs:
-      return len(self._cluster_spec.job_tasks('ps'))
-    else:
-      return 0
-
-
-class GrpcClusterManager(BaseClusterManager):
-  """A cluster manager for a cluster networked with gRPC."""
-
-  def __init__(self, params, config_proto):
-    super(GrpcClusterManager, self).__init__(params)
-    if params.job_name == 'controller':
-      self._target = 'grpc://%s' % self._cluster_spec.job_tasks('worker')[0]
-    else:
-      self._server = tf.train.Server(self._cluster_spec,
-                                     job_name=params.job_name,
-                                     task_index=params.task_index,
-                                     config=config_proto,
-                                     protocol=params.server_protocol)
-      self._target = self._server.target
-
-  def get_target(self):
-    return self._target
-
-  def join_server(self):
-    return self._server.join()
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Tests for tf_cnn_benchmarks.cnn_util."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import threading
-import time
-
-import tensorflow.compat.v1 as tf
-
-import cnn_util
-
-
-class CnnUtilBarrierTest(tf.test.TestCase):
-
-  def testBarrier(self):
-    num_tasks = 20
-    num_waits = 4
-    barrier = cnn_util.Barrier(num_tasks)
-    threads = []
-    sync_matrix = []
-    for i in range(num_tasks):
-      sync_times = [0] * num_waits
-      thread = threading.Thread(
-          target=self._run_task, args=(barrier, sync_times))
-      thread.start()
-      threads.append(thread)
-      sync_matrix.append(sync_times)
-    for thread in threads:
-      thread.join()
-    for wait_index in range(num_waits - 1):
-      # Max of times at iteration i < min of times at iteration i + 1
-      self.assertLessEqual(
-          max([sync_matrix[i][wait_index] for i in range(num_tasks)]),
-          min([sync_matrix[i][wait_index + 1] for i in range(num_tasks)]))
-
-  def _run_task(self, barrier, sync_times):
-    for wait_index in range(len(sync_times)):
-      sync_times[wait_index] = time.time()
-      barrier.wait()
-
-  def testBarrierAbort(self):
-    num_tasks = 2
-    num_waits = 1
-    sync_times = [0] * num_waits
-    barrier = cnn_util.Barrier(num_tasks)
-    thread = threading.Thread(
-        target=self._run_task, args=(barrier, sync_times))
-    thread.start()
-    barrier.abort()
-    # thread won't be blocked by done barrier.
-    thread.join()
-
-
-class ImageProducerTest(tf.test.TestCase):
-
-  def _slow_tensorflow_op(self):
-    """Returns a TensorFlow op that takes approximately 0.1s to complete."""
-    def slow_func(v):
-      time.sleep(0.1)
-      return v
-    return tf.py_func(slow_func, [tf.constant(0.)], tf.float32).op
-
-  def _test_image_producer(self, batch_group_size, put_slower_than_get):
-    # We use the variable x to simulate a staging area of images. x represents
-    # the number of batches in the staging area.
-    x = tf.Variable(0, dtype=tf.int32)
-    if put_slower_than_get:
-      put_dep = self._slow_tensorflow_op()
-      get_dep = tf.no_op()
-    else:
-      put_dep = tf.no_op()
-      get_dep = self._slow_tensorflow_op()
-    with tf.control_dependencies([put_dep]):
-      put_op = x.assign_add(batch_group_size, use_locking=True)
-    with tf.control_dependencies([get_dep]):
-      get_op = x.assign_sub(1, use_locking=True)
-    with self.test_session() as sess:
-      sess.run(tf.variables_initializer([x]))
-      image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size,
-                                              use_python32_barrier=False)
-      image_producer.start()
-      for _ in range(5 * batch_group_size):
-        sess.run(get_op)
-        # We assert x is nonnegative, to ensure image_producer never causes
-        # an unstage op to block. We assert x is at most 2 * batch_group_size,
-        # to ensure it doesn't use too much memory by storing too many batches
-        # in the staging area.
-        self.assertGreaterEqual(sess.run(x), 0)
-        self.assertLessEqual(sess.run(x), 2 * batch_group_size)
-        image_producer.notify_image_consumption()
-        self.assertGreaterEqual(sess.run(x), 0)
-        self.assertLessEqual(sess.run(x), 2 * batch_group_size)
-
-      image_producer.done()
-      time.sleep(0.1)
-      self.assertGreaterEqual(sess.run(x), 0)
-      self.assertLessEqual(sess.run(x), 2 * batch_group_size)
-
-  def test_image_producer(self):
-    self._test_image_producer(1, False)
-    self._test_image_producer(1, True)
-    self._test_image_producer(2, False)
-    self._test_image_producer(2, True)
-    self._test_image_producer(3, False)
-    self._test_image_producer(3, True)
-    self._test_image_producer(8, False)
-    self._test_image_producer(8, True)
-
-
-if __name__ == '__main__':
-  tf.disable_v2_behavior()
-  tf.test.main()
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
-# Copyright 2018 Google. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""COCO-style evaluation metrics.
-
-Forked from reference model implementation.
-
-COCO API: github.com/cocodataset/cocoapi/
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import atexit
-import tempfile
-
-from absl import flags
-
-import numpy as np
-from pycocotools.coco import COCO
-from pycocotools.cocoeval import COCOeval
-import six
-
-import tensorflow.compat.v1 as tf
-
-import mlperf
-import ssd_constants
-
-FLAGS = flags.FLAGS
-
-
-# https://github.com/cocodataset/cocoapi/issues/49
-if six.PY3:
-  import pycocotools.coco
-  pycocotools.coco.unicode = str
-
-
-def async_eval_runner(queue_predictions, queue_results, val_json_file):
-  """Load intermediate eval results and get COCO metrics."""
-  while True:
-    message = queue_predictions.get()
-    if message == 'STOP':  # poison pill
-      break
-    step, predictions = message
-    results = compute_map(predictions, val_json_file)
-    queue_results.put((step, results))
-
-
-def compute_map(predictions, val_json_file):
-  """Use model predictions to compute mAP.
-
-  Args:
-    predictions: a list of tuples returned by decoded_predictions function,
-      each containing the following elements:
-      image source_id, box coordinates in XYWH order, probability score, label
-    val_json_file: path to COCO annotation file
-  Returns:
-    A dictionary that maps all COCO metrics (keys) to their values
-  """
-
-  if val_json_file.startswith("gs://"):
-    _, local_val_json = tempfile.mkstemp(suffix=".json")
-    tf.gfile.Remove(local_val_json)
-
-    tf.gfile.Copy(val_json_file, local_val_json)
-    atexit.register(tf.gfile.Remove, local_val_json)
-  else:
-    local_val_json = val_json_file
-
-  cocoGt = COCO(local_val_json)
-  cocoDt = cocoGt.loadRes(np.array(predictions))
-  E = COCOeval(cocoGt, cocoDt, iouType='bbox')
-  E.evaluate()
-  E.accumulate()
-  E.summarize()
-  print("Current AP: {:.5f}".format(E.stats[0]))
-  metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
-                  'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
-
-  # Prefix with "COCO" to group in TensorBoard.
-  return {"COCO/" + key: value for key, value in zip(metric_names, E.stats)}
-
-
-def calc_iou(target, candidates):
-  target_tiled = np.tile(target[np.newaxis, :], (candidates.shape[0], 1))
-  # Left Top & Right Bottom
-  lt = np.maximum(target_tiled[:,:2], candidates[:,:2])
-
-  rb = np.minimum(target_tiled[:,2:], candidates[:,2:])
-
-  delta = np.maximum(rb - lt, 0)
-
-  intersect = delta[:,0] * delta[:,1]
-
-  delta1 = target_tiled[:,2:] - candidates[:,:2]
-  area1 = delta1[:,0] * delta1[:,1]
-  delta2 = target_tiled[:,2:] - candidates[:,:2]
-  area2 = delta2[:,0] * delta2[:,1]
-
-  iou = intersect/(area1 + area2 - intersect)
-  return iou
-
-
-# TODO(haoyuzhang): Rewrite this NumPy based implementation to TensorFlow based
-# implementation under ssd_model.py accuracy_function.
-def decode_predictions(labels_and_predictions):
-  """Decode predictions and remove unused boxes and labels."""
-  predictions = []
-  for example in labels_and_predictions:
-    source_id = int(example[ssd_constants.SOURCE_ID])
-    pred_box = example[ssd_constants.PRED_BOXES]
-    pred_scores = example[ssd_constants.PRED_SCORES]
-
-    locs, labels, probs = decode_single(
-        pred_box, pred_scores, ssd_constants.OVERLAP_CRITERIA,
-        ssd_constants.MAX_NUM_EVAL_BOXES, ssd_constants.MAX_NUM_EVAL_BOXES)
-
-    raw_height, raw_width, _ = example[ssd_constants.RAW_SHAPE]
-    for loc, label, prob in zip(locs, labels, probs):
-      # Ordering convention differs, hence [1], [0] rather than [0], [1]
-      x, y = loc[1] * raw_width, loc[0] * raw_height
-      w, h = (loc[3] - loc[1]) * raw_width, (loc[2] - loc[0]) * raw_height
-      predictions.append(
-          [source_id, x, y, w, h, prob, ssd_constants.CLASS_INV_MAP[label]])
-  mlperf.logger.log(key=mlperf.tags.NMS_THRESHOLD,
-                    value=ssd_constants.OVERLAP_CRITERIA)
-  mlperf.logger.log(key=mlperf.tags.NMS_MAX_DETECTIONS,
-                    value=ssd_constants.MAX_NUM_EVAL_BOXES)
-  return predictions
-
-
-def decode_single(bboxes_in, scores_in, criteria, max_output, max_num=200):
-  # Reference to https://github.com/amdegroot/ssd.pytorch
-
-  bboxes_out = []
-  scores_out = []
-  labels_out = []
-
-  for i, score in enumerate(np.split(scores_in, scores_in.shape[1], 1)):
-    score = np.squeeze(score, 1)
-
-    # skip background
-    if i == 0:
-      continue
-
-    mask = score > ssd_constants.MIN_SCORE
-    if not np.any(mask):
-      continue
-
-    bboxes, score = bboxes_in[mask, :], score[mask]
-
-    score_idx_sorted = np.argsort(score)
-    score_sorted = score[score_idx_sorted]
-
-    score_idx_sorted = score_idx_sorted[-max_num:]
-    candidates = []
-
-    # perform non-maximum suppression
-    while len(score_idx_sorted):
-      idx = score_idx_sorted[-1]
-      bboxes_sorted = bboxes[score_idx_sorted, :]
-      bboxes_idx = bboxes[idx, :]
-      iou = calc_iou(bboxes_idx, bboxes_sorted)
-
-      score_idx_sorted = score_idx_sorted[iou < criteria]
-      candidates.append(idx)
-
-    bboxes_out.append(bboxes[candidates, :])
-    scores_out.append(score[candidates])
-    labels_out.extend([i]*len(candidates))
-
-  if len(scores_out) == 0:
-    tf.logging.info("No objects detected. Returning dummy values.")
-    return (
-        np.zeros(shape=(1, 4), dtype=np.float32),
-        np.zeros(shape=(1,), dtype=np.int32),
-        np.ones(shape=(1,), dtype=np.float32) * ssd_constants.DUMMY_SCORE,
-    )
-
-  bboxes_out = np.concatenate(bboxes_out, axis=0)
-  scores_out = np.concatenate(scores_out, axis=0)
-  labels_out = np.array(labels_out)
-
-  max_ids = np.argsort(scores_out)[-max_output:]
-
-  return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids]
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
-# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Constants used in tf_cnn_benchmarks."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from enum import Enum
-
-# Results fetched with this prefix will not be reduced. Instead, they will be
-# passed as matrices to model's postprocess function.
-UNREDUCED_ACCURACY_OP_PREFIX = "tensor:"
-
-# Eval result values with this name prefix will be included in summary.
-SIMPLE_VALUE_RESULT_PREFIX = "simple_value:"
-
-
-class BenchmarkMode(object):
-  """Benchmark running mode."""
-  TRAIN = "training"
-  EVAL = "evaluation"
-  TRAIN_AND_EVAL = "training + evaluation"
-  FORWARD_ONLY = "forward only"
-
-
-class NetworkTopology(str, Enum):
-  """Network topology describes how multiple GPUs are inter-connected.
-  """
-  # DGX-1 uses hybrid cube mesh topology with the following device peer to peer
-  # matrix:
-  # DMA: 0 1 2 3 4 5 6 7
-  # 0:   Y Y Y Y Y N N N
-  # 1:   Y Y Y Y N Y N N
-  # 2:   Y Y Y Y N N Y N
-  # 3:   Y Y Y Y N N N Y
-  # 4:   Y N N N Y Y Y Y
-  # 5:   N Y N N Y Y Y Y
-  # 6:   N N Y N Y Y Y Y
-  # 7:   N N N Y Y Y Y Y
-  DGX1 = "dgx1"
-
-  # V100 in GCP are connected with the following device peer to peer matrix.
-  # In this topology, bandwidth of the connection depends on if it uses NVLink
-  # or PCIe link.
-  # DMA: 0 1 2 3 4 5 6 7
-  # 0:   Y Y Y Y N Y N N
-  # 1:   Y Y Y Y N N N N
-  # 2:   Y Y Y Y N N N Y
-  # 3:   Y Y Y Y N N N N
-  # 4:   N N N N Y Y Y Y
-  # 5:   Y N N N Y Y Y Y
-  # 6:   N N N N Y Y Y Y
-  # 7:   N N Y N Y Y Y Y
-  GCP_V100 = "gcp_v100"
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""CNN builder."""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import defaultdict
-import contextlib
-
-import numpy as np
-
-import tensorflow.compat.v1 as tf
-
-# pylint: disable=g-direct-tensorflow-import
-import mlperf
-from tensorflow.python.layers import convolutional as conv_layers
-from tensorflow.python.layers import core as core_layers
-from tensorflow.python.layers import normalization as normalization_layers
-from tensorflow.python.layers import pooling as pooling_layers
-from tensorflow.python.training import moving_averages
-
-
-_data_format_to_channel_axis = {'NCHW': 1, 'NHWC': 3}
-
-
-class ConvNetBuilder(object):
-  """Builder of cnn net."""
-
-  def __init__(self,
-               input_op,
-               input_nchan,
-               phase_train,
-               use_tf_layers,
-               data_format='NCHW',
-               dtype=tf.float32,
-               variable_dtype=tf.float32):
-    self.top_layer = input_op
-    self.top_size = input_nchan
-    self.phase_train = phase_train
-    self.use_tf_layers = use_tf_layers
-    self.data_format = data_format
-    self.dtype = dtype
-    self.variable_dtype = variable_dtype
-    self.counts = defaultdict(lambda: 0)
-    self.use_batch_norm = False
-    self.batch_norm_config = {}  # 'decay': 0.997, 'scale': True}
-    self.channel_pos = ('channels_last'
-                        if data_format == 'NHWC' else 'channels_first')
-    self.aux_top_layer = None
-    self.aux_top_size = 0
-
-  def get_custom_getter(self):
-    """Returns a custom getter that this class's methods must be called under.
-
-    All methods of this class must be called under a variable scope that was
-    passed this custom getter. Example:
-
-    ```python
-    network = ConvNetBuilder(...)
-    with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
-      network.conv(...)
-      # Call more methods of network here
-    ```
-
-    Currently, this custom getter only does anything if self.use_tf_layers is
-    True. In that case, it causes variables to be stored as dtype
-    self.variable_type, then casted to the requested dtype, instead of directly
-    storing the variable as the requested dtype.
-    """
-    def inner_custom_getter(getter, *args, **kwargs):
-      """Custom getter that forces variables to have type self.variable_type."""
-      if not self.use_tf_layers:
-        return getter(*args, **kwargs)
-      requested_dtype = kwargs['dtype']
-      if not (requested_dtype == tf.float32 and
-              self.variable_dtype == tf.float16):
-        # Only change the variable dtype if doing so does not decrease variable
-        # precision.
-        kwargs['dtype'] = self.variable_dtype
-      var = getter(*args, **kwargs)
-      # This if statement is needed to guard the cast, because batch norm
-      # assigns directly to the return value of this custom getter. The cast
-      # makes the return value not a variable so it cannot be assigned. Batch
-      # norm variables are always in fp32 so this if statement is never
-      # triggered for them.
-      if var.dtype.base_dtype != requested_dtype:
-        var = tf.cast(var, requested_dtype)
-      return var
-    return inner_custom_getter
-
-  @contextlib.contextmanager
-  def switch_to_aux_top_layer(self):
-    """Context that construct cnn in the auxiliary arm."""
-    if self.aux_top_layer is None:
-      raise RuntimeError('Empty auxiliary top layer in the network.')
-    saved_top_layer = self.top_layer
-    saved_top_size = self.top_size
-    self.top_layer = self.aux_top_layer
-    self.top_size = self.aux_top_size
-    yield
-    self.aux_top_layer = self.top_layer
-    self.aux_top_size = self.top_size
-    self.top_layer = saved_top_layer
-    self.top_size = saved_top_size
-
-  def get_variable(self, name, shape, dtype, cast_dtype, *args, **kwargs):
-    # TODO(reedwm): Currently variables and gradients are transferred to other
-    # devices and machines as type `dtype`, not `cast_dtype`. In particular,
-    # this means in fp16 mode, variables are transferred as fp32 values, not
-    # fp16 values, which uses extra bandwidth.
-    var = tf.get_variable(name, shape, dtype, *args, **kwargs)
-    return tf.cast(var, cast_dtype)
-
-  def _conv2d_impl(self, input_layer, num_channels_in, filters, kernel_size,
-                   strides, padding, kernel_initializer):
-    if self.use_tf_layers:
-      return conv_layers.conv2d(input_layer, filters, kernel_size, strides,
-                                padding, self.channel_pos,
-                                kernel_initializer=kernel_initializer,
-                                use_bias=False)
-    else:
-      weights_shape = [kernel_size[0], kernel_size[1], num_channels_in, filters]
-      # We use the name 'conv2d/kernel' so the variable has the same name as its
-      # tf.layers equivalent. This way, if a checkpoint is written when
-      # self.use_tf_layers == True, it can be loaded when
-      # self.use_tf_layers == False, and vice versa.
-      weights = self.get_variable('conv2d/kernel', weights_shape,
-                                  self.variable_dtype, self.dtype,
-                                  initializer=kernel_initializer)
-      if self.data_format == 'NHWC':
-        strides = [1] + strides + [1]
-      else:
-        strides = [1, 1] + strides
-      return tf.nn.conv2d(input_layer, weights, strides, padding,
-                          data_format=self.data_format)
-
-  def conv(self,
-           num_out_channels,
-           k_height,
-           k_width,
-           d_height=1,
-           d_width=1,
-           mode='SAME',
-           input_layer=None,
-           num_channels_in=None,
-           use_batch_norm=None,
-           stddev=None,
-           activation='relu',
-           bias=0.0,
-           kernel_initializer=None):
-    """Construct a conv2d layer on top of cnn."""
-    if input_layer is None:
-      input_layer = self.top_layer
-    if num_channels_in is None:
-      num_channels_in = self.top_size
-    if stddev is not None and kernel_initializer is None:
-      kernel_initializer = tf.truncated_normal_initializer(stddev=stddev)
-    if kernel_initializer is None:
-      kernel_initializer = tf.variance_scaling_initializer()
-    name = 'conv' + str(self.counts['conv'])
-    self.counts['conv'] += 1
-    with tf.variable_scope(name):
-      strides = [1, d_height, d_width, 1]
-      if self.data_format == 'NCHW':
-        strides = [strides[0], strides[3], strides[1], strides[2]]
-      if mode != 'SAME_RESNET':
-        conv = self._conv2d_impl(input_layer, num_channels_in, num_out_channels,
-                                 kernel_size=[k_height, k_width],
-                                 strides=[d_height, d_width], padding=mode,
-                                 kernel_initializer=kernel_initializer)
-      else:  # Special padding mode for ResNet models
-        if d_height == 1 and d_width == 1:
-          conv = self._conv2d_impl(input_layer, num_channels_in,
-                                   num_out_channels,
-                                   kernel_size=[k_height, k_width],
-                                   strides=[d_height, d_width], padding='SAME',
-                                   kernel_initializer=kernel_initializer)
-        else:
-          rate = 1  # Unused (for 'a trous' convolutions)
-          kernel_height_effective = k_height + (k_height - 1) * (rate - 1)
-          pad_h_beg = (kernel_height_effective - 1) // 2
-          pad_h_end = kernel_height_effective - 1 - pad_h_beg
-          kernel_width_effective = k_width + (k_width - 1) * (rate - 1)
-          pad_w_beg = (kernel_width_effective - 1) // 2
-          pad_w_end = kernel_width_effective - 1 - pad_w_beg
-          padding = [[0, 0], [pad_h_beg, pad_h_end],
-                     [pad_w_beg, pad_w_end], [0, 0]]
-          if self.data_format == 'NCHW':
-            padding = [padding[0], padding[3], padding[1], padding[2]]
-          padded_input_layer = tf.pad(input_layer, padding)
-          conv = self._conv2d_impl(padded_input_layer, num_channels_in,
-                                   num_out_channels,
-                                   kernel_size=[k_height, k_width],
-                                   strides=[d_height, d_width], padding='VALID',
-                                   kernel_initializer=kernel_initializer)
-      if use_batch_norm is None:
-        use_batch_norm = self.use_batch_norm
-      mlperf.logger.log_conv2d(input_tensor=input_layer, output_tensor=conv,
-                               stride_height=d_height, stride_width=d_width,
-                               filters=num_out_channels,
-                               initializer=kernel_initializer,
-                               use_bias=not use_batch_norm and bias is not None)
-      if not use_batch_norm:
-        if bias is not None:
-          biases = self.get_variable('biases', [num_out_channels],
-                                     self.variable_dtype, self.dtype,
-                                     initializer=tf.constant_initializer(bias))
-          biased = tf.reshape(
-              tf.nn.bias_add(conv, biases, data_format=self.data_format),
-              conv.get_shape())
-        else:
-          biased = conv
-      else:
-        self.top_layer = conv
-        self.top_size = num_out_channels
-        biased = self.batch_norm(**self.batch_norm_config)
-      if activation == 'relu':
-        mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
-        conv1 = tf.nn.relu(biased)
-      elif activation == 'linear' or activation is None:
-        conv1 = biased
-      elif activation == 'tanh':
-        conv1 = tf.nn.tanh(biased)
-      else:
-        raise KeyError('Invalid activation type \'%s\'' % activation)
-      self.top_layer = conv1
-      self.top_size = num_out_channels
-      return conv1
-
-  def _pool(self,
-            pool_name,
-            pool_function,
-            k_height,
-            k_width,
-            d_height,
-            d_width,
-            mode,
-            input_layer,
-            num_channels_in):
-    """Construct a pooling layer."""
-    if input_layer is None:
-      input_layer = self.top_layer
-    else:
-      self.top_size = num_channels_in
-    name = pool_name + str(self.counts[pool_name])
-    self.counts[pool_name] += 1
-    if self.use_tf_layers:
-      pool = pool_function(
-          input_layer, [k_height, k_width], [d_height, d_width],
-          padding=mode,
-          data_format=self.channel_pos,
-          name=name)
-    else:
-      if self.data_format == 'NHWC':
-        ksize = [1, k_height, k_width, 1]
-        strides = [1, d_height, d_width, 1]
-      else:
-        ksize = [1, 1, k_height, k_width]
-        strides = [1, 1, d_height, d_width]
-      pool = tf.nn.max_pool(input_layer, ksize, strides, padding=mode,
-                            data_format=self.data_format, name=name)
-    if pool_name == 'mpool':
-      mlperf.logger.log_max_pool(input_tensor=input_layer,
-                                 output_tensor=pool)
-    self.top_layer = pool
-    return pool
-
-  def mpool(self,
-            k_height,
-            k_width,
-            d_height=2,
-            d_width=2,
-            mode='VALID',
-            input_layer=None,
-            num_channels_in=None):
-    """Construct a max pooling layer."""
-    return self._pool('mpool', pooling_layers.max_pooling2d, k_height, k_width,
-                      d_height, d_width, mode, input_layer, num_channels_in)
-
-  def apool(self,
-            k_height,
-            k_width,
-            d_height=2,
-            d_width=2,
-            mode='VALID',
-            input_layer=None,
-            num_channels_in=None):
-    """Construct an average pooling layer."""
-    return self._pool('apool', pooling_layers.average_pooling2d, k_height,
-                      k_width, d_height, d_width, mode, input_layer,
-                      num_channels_in)
-
-  def reshape(self, shape, input_layer=None):
-    if input_layer is None:
-      input_layer = self.top_layer
-    self.top_layer = tf.reshape(input_layer, shape)
-    self.top_size = shape[-1]  # HACK This may not always work
-    return self.top_layer
-
-  def affine(self,
-             num_out_channels,
-             input_layer=None,
-             num_channels_in=None,
-             bias=0.0,
-             stddev=None,
-             activation='relu'):
-    if input_layer is None:
-      input_layer = self.top_layer
-    if num_channels_in is None:
-      num_channels_in = self.top_size
-    name = 'affine' + str(self.counts['affine'])
-    self.counts['affine'] += 1
-    with tf.variable_scope(name):
-      init_factor = 2. if activation == 'relu' else 1.
-      stddev = stddev or np.sqrt(init_factor / num_channels_in)
-      kernel = self.get_variable(
-          'weights', [num_channels_in, num_out_channels],
-          self.variable_dtype, self.dtype,
-          initializer=tf.truncated_normal_initializer(stddev=stddev))
-      biases = self.get_variable('biases', [num_out_channels],
-                                 self.variable_dtype, self.dtype,
-                                 initializer=tf.constant_initializer(bias))
-      mlperf.logger.log(key=mlperf.tags.MODEL_HP_DENSE,
-                        value=num_out_channels)
-      logits = tf.nn.xw_plus_b(input_layer, kernel, biases)
-      if activation == 'relu':
-        mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
-        affine1 = tf.nn.relu(logits, name=name)
-      elif activation == 'linear' or activation is None:
-        affine1 = logits
-      else:
-        raise KeyError('Invalid activation type \'%s\'' % activation)
-      self.top_layer = affine1
-      self.top_size = num_out_channels
-      return affine1
-
-  def inception_module(self, name, cols, input_layer=None, in_size=None):
-    if input_layer is None:
-      input_layer = self.top_layer
-    if in_size is None:
-      in_size = self.top_size
-    name += str(self.counts[name])
-    self.counts[name] += 1
-    with tf.variable_scope(name):
-      col_layers = []
-      col_layer_sizes = []
-      for c, col in enumerate(cols):
-        col_layers.append([])
-        col_layer_sizes.append([])
-        for l, layer in enumerate(col):
-          ltype, args = layer[0], layer[1:]
-          kwargs = {
-              'input_layer': input_layer,
-              'num_channels_in': in_size
-          } if l == 0 else {}
-          if ltype == 'conv':
-            self.conv(*args, **kwargs)
-          elif ltype == 'mpool':
-            self.mpool(*args, **kwargs)
-          elif ltype == 'apool':
-            self.apool(*args, **kwargs)
-          elif ltype == 'share':  # Share matching layer from previous column
-            self.top_layer = col_layers[c - 1][l]
-            self.top_size = col_layer_sizes[c - 1][l]
-          else:
-            raise KeyError(
-                'Invalid layer type for inception module: \'%s\'' % ltype)
-          col_layers[c].append(self.top_layer)
-          col_layer_sizes[c].append(self.top_size)
-      catdim = 3 if self.data_format == 'NHWC' else 1
-      self.top_layer = tf.concat([layers[-1] for layers in col_layers], catdim)
-      self.top_size = sum([sizes[-1] for sizes in col_layer_sizes])
-      return self.top_layer
-
-  def spatial_mean(self, keep_dims=False):
-    name = 'spatial_mean' + str(self.counts['spatial_mean'])
-    self.counts['spatial_mean'] += 1
-    axes = [1, 2] if self.data_format == 'NHWC' else [2, 3]
-    self.top_layer = tf.reduce_mean(
-        self.top_layer, axes, keepdims=keep_dims, name=name)
-    return self.top_layer
-
-  def dropout(self, keep_prob=0.5, input_layer=None):
-    if input_layer is None:
-      input_layer = self.top_layer
-    else:
-      self.top_size = None
-    name = 'dropout' + str(self.counts['dropout'])
-    with tf.variable_scope(name):
-      if not self.phase_train:
-        keep_prob = 1.0
-      if self.use_tf_layers:
-        dropout = core_layers.dropout(input_layer, 1. - keep_prob,
-                                      training=self.phase_train)
-      else:
-        dropout = tf.nn.dropout(input_layer, keep_prob)
-      self.top_layer = dropout
-      return dropout
-
-  def _batch_norm_without_layers(self, input_layer, decay, use_scale, epsilon):
-    """Batch normalization on `input_layer` without tf.layers."""
-    # We make this function as similar as possible to the
-    # tf.contrib.layers.batch_norm, to minimize the differences between using
-    # layers and not using layers.
-    shape = input_layer.shape
-    num_channels = shape[3] if self.data_format == 'NHWC' else shape[1]
-    beta = self.get_variable('beta', [num_channels], tf.float32, tf.float32,
-                             initializer=tf.zeros_initializer())
-    if use_scale:
-      gamma = self.get_variable('gamma', [num_channels], tf.float32,
-                                tf.float32, initializer=tf.ones_initializer())
-    else:
-      gamma = tf.constant(1.0, tf.float32, [num_channels])
-    # For moving variables, we use tf.get_variable instead of self.get_variable,
-    # since self.get_variable returns the result of tf.cast which we cannot
-    # assign to.
-    moving_mean = tf.get_variable('moving_mean', [num_channels],
-                                  tf.float32,
-                                  initializer=tf.zeros_initializer(),
-                                  trainable=False)
-    moving_variance = tf.get_variable('moving_variance', [num_channels],
-                                      tf.float32,
-                                      initializer=tf.ones_initializer(),
-                                      trainable=False)
-    if self.phase_train:
-      bn, batch_mean, batch_variance = tf.nn.fused_batch_norm(
-          input_layer, gamma, beta, epsilon=epsilon,
-          data_format=self.data_format, is_training=True)
-      mean_update = moving_averages.assign_moving_average(
-          moving_mean, batch_mean, decay=decay, zero_debias=False)
-      variance_update = moving_averages.assign_moving_average(
-          moving_variance, batch_variance, decay=decay, zero_debias=False)
-      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mean_update)
-      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variance_update)
-    else:
-      bn, _, _ = tf.nn.fused_batch_norm(
-          input_layer, gamma, beta, mean=moving_mean,
-          variance=moving_variance, epsilon=epsilon,
-          data_format=self.data_format, is_training=False)
-    return bn
-
-  def batch_norm(self, input_layer=None, decay=0.999, scale=False,
-                 epsilon=0.001):
-    """Adds a Batch Normalization layer."""
-    if input_layer is None:
-      input_layer = self.top_layer
-    else:
-      self.top_size = None
-    name = 'batchnorm' + str(self.counts['batchnorm'])
-    self.counts['batchnorm'] += 1
-
-    center = True
-    with tf.variable_scope(name) as scope:
-      if self.use_tf_layers:
-        layer_obj = normalization_layers.BatchNormalization(
-            momentum=decay,
-            scale=scale,
-            epsilon=epsilon,
-            fused=True,
-            axis=_data_format_to_channel_axis[self.data_format],
-            # We pass this 'scope' argument for compatibility with checkpoints
-            # created with the contrib version of batch norm. tf_cnn_benchmarks
-            # used to use the contrib version.
-            _scope=scope,
-            center=center,
-            name=scope.name)
-        bn = layer_obj.apply(input_layer, training=self.phase_train)
-      else:
-        bn = self._batch_norm_without_layers(input_layer, decay, scale, epsilon)
-    self.top_layer = bn
-    self.top_size = bn.shape[3] if self.data_format == 'NHWC' else bn.shape[1]
-    self.top_size = int(self.top_size)
-    mlperf.logger.log_batch_norm(
-        input_tensor=input_layer, output_tensor=bn, momentum=decay,
-        epsilon=epsilon, center=center, scale=scale, training=self.phase_train)
-    return bn
-
-  def lrn(self, depth_radius, bias, alpha, beta):
-    """Adds a local response normalization layer."""
-    name = 'lrn' + str(self.counts['lrn'])
-    self.counts['lrn'] += 1
-    self.top_layer = tf.nn.lrn(
-        self.top_layer, depth_radius, bias, alpha, beta, name=name)
-    return self.top_layer
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Benchmark dataset utilities.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from abc import abstractmethod
-import os
-
-import numpy as np
-import six
-from six.moves import cPickle
-from six.moves import xrange  # pylint: disable=redefined-builtin
-import tensorflow.compat.v1 as tf
-
-from tensorflow.python.platform import gfile
-import preprocessing
-
-IMAGENET_NUM_TRAIN_IMAGES = 1281167
-IMAGENET_NUM_VAL_IMAGES = 50000
-
-COCO_NUM_TRAIN_IMAGES = 118287
-COCO_NUM_VAL_IMAGES = 4952
-
-
-class Dataset(object):
-  """Abstract class for cnn benchmarks dataset."""
-
-  def __init__(self,
-               name,
-               data_dir=None,
-               queue_runner_required=False,
-               num_classes=None):
-    self.name = name
-    self.data_dir = data_dir
-    self._queue_runner_required = queue_runner_required
-    self._num_classes = num_classes
-
-  def tf_record_pattern(self, subset):
-    return os.path.join(self.data_dir, '%s-*-of-*' % subset)
-
-  def reader(self):
-    return tf.TFRecordReader()
-
-  @property
-  def num_classes(self):
-    return self._num_classes
-
-  @num_classes.setter
-  def num_classes(self, val):
-    self._num_classes = val
-
-  @abstractmethod
-  def num_examples_per_epoch(self, subset):
-    pass
-
-  def __str__(self):
-    return self.name
-
-  def get_input_preprocessor(self, input_preprocessor='default'):
-    assert not self.use_synthetic_gpu_inputs()
-    return _SUPPORTED_INPUT_PREPROCESSORS[self.name][input_preprocessor]
-
-  def queue_runner_required(self):
-    return self._queue_runner_required
-
-  def use_synthetic_gpu_inputs(self):
-    return not self.data_dir
-
-
-class LibrispeechDataset(Dataset):
-  """Configuration for LibriSpeech dataset."""
-
-  def __init__(self, data_dir=None):
-    super(LibrispeechDataset, self).__init__(
-        'librispeech', data_dir, num_classes=29)
-
-  def tf_record_pattern(self, subset):
-    if subset == 'train':
-      return os.path.join(self.data_dir, 'train-clean-*.tfrecords')
-    elif subset == 'validation':
-      return os.path.join(self.data_dir, 'test-clean.tfrecords')
-    else:
-      return ''
-
-  def num_examples_per_epoch(self, subset='train'):
-    del subset
-    return 2  # TODO(laigd): currently this is an arbitrary number.
-
-
-class ImageDataset(Dataset):
-  """Abstract class for image datasets."""
-
-  def __init__(self,
-               name,
-               height,
-               width,
-               depth=None,
-               data_dir=None,
-               queue_runner_required=False,
-               num_classes=1001):
-    super(ImageDataset, self).__init__(name, data_dir, queue_runner_required,
-                                       num_classes)
-    self.height = height
-    self.width = width
-    self.depth = depth or 3
-
-
-class ImagenetDataset(ImageDataset):
-  """Configuration for Imagenet dataset."""
-
-  def __init__(self, data_dir=None):
-    super(ImagenetDataset, self).__init__(
-        'imagenet', 300, 300, data_dir=data_dir)
-
-  def num_examples_per_epoch(self, subset='train'):
-    if subset == 'train':
-      return IMAGENET_NUM_TRAIN_IMAGES
-    elif subset == 'validation':
-      return IMAGENET_NUM_VAL_IMAGES
-    else:
-      raise ValueError('Invalid data subset "%s"' % subset)
-
-
-class Cifar10Dataset(ImageDataset):
-  """Configuration for cifar 10 dataset.
-
-  It will mount all the input images to memory.
-  """
-
-  def __init__(self, data_dir=None):
-    super(Cifar10Dataset, self).__init__(
-        'cifar10',
-        32,
-        32,
-        data_dir=data_dir,
-        queue_runner_required=True,
-        num_classes=11)
-
-  def read_data_files(self, subset='train'):
-    """Reads from data file and returns images and labels in a numpy array."""
-    assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
-                           'data')
-    if subset == 'train':
-      filenames = [
-          os.path.join(self.data_dir, 'data_batch_%d' % i)
-          for i in xrange(1, 6)
-      ]
-    elif subset == 'validation':
-      filenames = [os.path.join(self.data_dir, 'test_batch')]
-    else:
-      raise ValueError('Invalid data subset "%s"' % subset)
-
-    inputs = []
-    for filename in filenames:
-      with gfile.Open(filename, 'rb') as f:
-        # python2 does not have the encoding parameter
-        encoding = {} if six.PY2 else {'encoding': 'bytes'}
-        inputs.append(cPickle.load(f, **encoding))
-    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
-    # input format.
-    all_images = np.concatenate(
-        [each_input[b'data'] for each_input in inputs]).astype(np.float32)
-    all_labels = np.concatenate(
-        [each_input[b'labels'] for each_input in inputs])
-    return all_images, all_labels
-
-  def num_examples_per_epoch(self, subset='train'):
-    if subset == 'train':
-      return 50000
-    elif subset == 'validation':
-      return 10000
-    else:
-      raise ValueError('Invalid data subset "%s"' % subset)
-
-
-class COCODataset(ImageDataset):
-  """COnfiguration for COCO dataset."""
-
-  def __init__(self, data_dir=None, image_size=300):
-    super(COCODataset, self).__init__(
-        'coco', image_size, image_size, data_dir=data_dir, num_classes=81)
-
-  def num_examples_per_epoch(self, subset='train'):
-    if subset == 'train':
-      return COCO_NUM_TRAIN_IMAGES
-    elif subset == 'validation':
-      return COCO_NUM_VAL_IMAGES
-    else:
-      raise ValueError('Invalid data subset "%s"' % subset)
-
-
-_SUPPORTED_DATASETS = {
-    'imagenet': ImagenetDataset,
-    'cifar10': Cifar10Dataset,
-    'librispeech': LibrispeechDataset,
-    'coco': COCODataset,
-}
-
-_SUPPORTED_INPUT_PREPROCESSORS = {
-    'imagenet': {
-        'default': preprocessing.RecordInputImagePreprocessor,
-        'official_models_imagenet': preprocessing.ImagenetPreprocessor,
-    },
-    'cifar10': {
-        'default': preprocessing.Cifar10ImagePreprocessor
-    },
-    'librispeech': {
-        'default': preprocessing.LibrispeechPreprocessor
-    },
-    'coco': {
-        'default': preprocessing.COCOPreprocessor
-    },
-}
-
-
-def create_dataset(data_dir, data_name):
-  """Create a Dataset instance based on data_dir and data_name."""
-  if not data_dir and not data_name:
-    # When using synthetic data, use synthetic imagenet images by default.
-    data_name = 'imagenet'
-
-  # Infere dataset name from data_dir if data_name is not provided.
-  if data_name is None:
-    for supported_name in _SUPPORTED_DATASETS:
-      if supported_name in data_dir:
-        data_name = supported_name
-        break
-    else:  # Failed to identify dataset name from data dir.
-      raise ValueError('Could not identify name of dataset. '
-                       'Please specify with --data_name option.')
-  if data_name not in _SUPPORTED_DATASETS:
-    raise ValueError('Unknown dataset. Must be one of %s' % ', '.join(
-        [key for key in sorted(_SUPPORTED_DATASETS.keys())]))
-
-  return _SUPPORTED_DATASETS[data_name](data_dir)
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains functions to define flags and params.
-
-Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec
-dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates
-a command-line flag for every ParamSpec defined by a DEFINE_* functions.
-
-The reason we don't use absl flags directly is that we want to be able to use
-tf_cnn_benchmarks as a library. When using it as a library, we don't want to
-define any flags, but instead pass parameters to the BenchmarkCNN constructor.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import namedtuple
-
-from absl import flags as absl_flags
-import six
-
-
-FLAGS = absl_flags.FLAGS
-
-
-# ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters.
-ParamSpec = namedtuple('_ParamSpec',
-                       ['flag_type', 'default_value', 'description',
-                        'kwargs'])
-
-
-# Maps from parameter name to its ParamSpec.
-param_specs = {}
-
-
-def DEFINE_string(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
-  param_specs[name] = ParamSpec('string', default, help, {})
-
-
-def DEFINE_boolean(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
-  param_specs[name] = ParamSpec('boolean', default, help, {})
-
-
-def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None):  # pylint: disable=invalid-name,redefined-builtin
-  kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
-  param_specs[name] = ParamSpec('integer', default, help, kwargs)
-
-
-def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None):  # pylint: disable=invalid-name,redefined-builtin
-  kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
-  param_specs[name] = ParamSpec('float', default, help, kwargs)
-
-
-def DEFINE_enum(name, default, enum_values, help):  # pylint: disable=invalid-name,redefined-builtin
-  kwargs = {'enum_values': enum_values}
-  param_specs[name] = ParamSpec('enum', default, help, kwargs)
-
-
-def DEFINE_list(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
-  param_specs[name] = ParamSpec('list', default, help, {})
-
-
-def define_flags(specs=None):
-  """Define a command line flag for each ParamSpec in flags.param_specs."""
-  specs = specs or param_specs
-  define_flag = {
-      'boolean': absl_flags.DEFINE_boolean,
-      'float': absl_flags.DEFINE_float,
-      'integer': absl_flags.DEFINE_integer,
-      'string': absl_flags.DEFINE_string,
-      'enum': absl_flags.DEFINE_enum,
-      'list': absl_flags.DEFINE_list
-  }
-  for name, param_spec in six.iteritems(specs):
-    if param_spec.flag_type not in define_flag:
-      raise ValueError('Unknown flag_type %s' % param_spec.flag_type)
-    else:
-      define_flag[param_spec.flag_type](name, param_spec.default_value,
-                                        help=param_spec.description,
-                                        **param_spec.kwargs)
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/leading_indicators_test.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/leading_indicators_test.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Benchmark various leading indicators CNNs.
-
-The purpose of these tests is to test each model as a high level baseline and
-to ensure the various variable_update options have not regressing. Not all
-options are tested.  The tests focus on the most viable options.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import ctypes
-import logging
-import os
-import sys
-
-from absl import flags
-from absl.testing import absltest  # pylint: disable=unused-import
-import tensorflow.compat.v1 as tf  # pylint: disable=g-bad-import-order
-import benchmark_cnn
-from platforms import util as platforms_util
-
-flags.DEFINE_integer('num_batches', None,
-                     'number of batches to run, excluding warmup')
-
-
-class BenchmarkBase(tf.test.Benchmark):
-  """Base class for all benchmarks in this file."""
-
-  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
-    """Base class for all benchmarks in this file.
-
-    Args:
-      output_dir: directory where to output e.g. log files
-      root_data_dir: directory under which to look for dataset
-      **kwargs: arbitrary named arguments. This is needed to make the
-                constructor forward compatible in case PerfZero provides more
-                named arguments before updating the constructor.
-    """
-
-    # Load default values if the benchmark is not run with absl.app.run()
-    if not flags.FLAGS.is_parsed():
-      flags.FLAGS.mark_as_parsed()
-
-    self.fake_data_dir = os.path.join(platforms_util.get_test_data_dir(),
-                                      'fake_tf_record_data')
-    self.output_dir = output_dir
-    if root_data_dir is None:
-      self.data_dir = ('/readahead/200M/placer/prod/home/distbelief/'
-                       'imagenet-tensorflow/imagenet-2012-tfrecord')
-    else:
-      self.data_dir = os.path.join(root_data_dir, 'imagenet')
-
-  def _run_benchmark(self, params):
-    """Run a CNN benchmark and report its results.
-
-    Args:
-      params: Params tuple, typically created by benchmark_cnn.make_params or
-        benchmark_cnn.make_params_from_flags.
-    """
-    logging.info('Running benchmark [%s]', self._get_name())
-    params = benchmark_cnn.setup(params)
-    bench = benchmark_cnn.BenchmarkCNN(params)
-    bench.print_info()
-    stats = bench.run()
-    extras = {}
-    extras['examples_per_sec'] = stats.get('images_per_sec')
-    if 'last_average_loss' in stats:
-      extras['last_average_loss'] = stats['last_average_loss']
-    if 'top_1_accuracy' in stats:
-      extras['top_1_accuracy'] = stats['top_1_accuracy']
-    if 'top_5_accuracy' in stats:
-      extras['top_5_accuracy'] = stats['top_5_accuracy']
-    self.report_benchmark(
-        iters=stats.get('num_steps'),
-        wall_time=stats.get('average_wall_time'),
-        extras=extras)
-
-  def _shared_params(self):
-    """Returns shared parameters for all benchmarks in this file."""
-    params = {}
-    if flags.FLAGS.num_batches is not None:
-      params['num_batches'] = flags.FLAGS.num_batches
-    if self.output_dir is not None:
-      params['benchmark_log_dir'] = self.output_dir
-    return benchmark_cnn.make_params(**params)
-
-  def _binary_search_batch_size(self, params, init_batch_size):
-    """Find the max batch_size using binary search."""
-    assert init_batch_size > 0
-    low_batch_size = 0
-    high_batch_size = None
-    batch_size = init_batch_size
-
-    # No need to run a warmup or many batches; if it doesn't OOM after 10
-    # batches, it should work in general.
-    params = params._replace(num_batches=10, num_warmup_batches=0)
-
-    # Find high_batch_size first.
-    tf.logging.info(
-        'Looking for upper bound to batch size, starting with %d' % batch_size)
-    while high_batch_size is None:
-      tf.logging.info('Trying batch_size %d' % batch_size)
-      params = params._replace(batch_size=batch_size)
-      bench = benchmark_cnn.BenchmarkCNN(params)
-      bench.print_info()
-      try:
-        bench.run()
-        low_batch_size = batch_size
-        batch_size *= 2
-      except tf.errors.ResourceExhaustedError:
-        high_batch_size = batch_size - 1
-
-    # Binary Search
-    tf.logging.info(
-        'Max batch size is in range (%d, %d].  Starting binary search to find '
-        'exact max batch size.' % (low_batch_size, batch_size))
-    while low_batch_size < high_batch_size:
-      batch_size = (low_batch_size + high_batch_size + 1) // 2
-      tf.logging.info('Trying batch_size %d' % batch_size)
-      params = params._replace(batch_size=batch_size)
-      bench = benchmark_cnn.BenchmarkCNN(params)
-      bench.print_info()
-      try:
-        bench.run()
-        low_batch_size = batch_size
-      except tf.errors.ResourceExhaustedError:
-        high_batch_size = batch_size - 1
-    self.report_benchmark(extras={'max_batch_size': low_batch_size})
-
-
-class Resnet50BenchmarksInferenceCpu(BenchmarkBase):
-  """"Benchmarks for ResNet50 inference on CPU."""
-
-  def _shared_params(self):
-    """Returns shared parameters for all ResNet50 benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        num_gpus=1,
-        model='resnet50',
-        num_warmup_batches=5,
-        num_batches=50,
-        distortions=False,
-        forward_only=True,
-        device='cpu',
-        data_format='NHWC',
-        num_intra_threads=0)
-
-  def benchmark_synth_forward_batch1(self):
-    """Tests 1 CPU batch size 1."""
-    params = self._shared_params()._replace(batch_size=1)
-    self._run_benchmark(params)
-
-  def benchmark_synth_forward_batch16(self):
-    """Tests 1 CPU batch size 16."""
-    params = self._shared_params()._replace(batch_size=16)
-    self._run_benchmark(params)
-
-
-class FrozenResnet50BenchmarksInferenceCpu(Resnet50BenchmarksInferenceCpu):
-  """"Benchmarks for ResNet50 frozen graph inference on CPU."""
-
-  def _shared_params(self):
-    return super(FrozenResnet50BenchmarksInferenceCpu,
-                 self)._shared_params()._replace(freeze_when_forward_only=True)
-
-
-class Resnet50BenchmarksInference(BenchmarkBase):
-  """"Benchmarks for ResNet50 inference."""
-
-  def _shared_params(self):
-    """Returns shared parameters for all ResNet50 benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        num_gpus=1, model='resnet50', distortions=False, forward_only=True)
-
-  def benchmark_synth_forward_batch128(self):
-    """Tests 1 GPU batch size 128."""
-    params = self._shared_params()._replace(batch_size=128)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_forward_batch128(self):
-    """Tests 1 GPU batch size 128 FP16."""
-    params = self._shared_params()._replace(batch_size=128, use_fp16=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_forward_batch16(self):
-    """Tests 1 GPU batch size 16 FP16."""
-    params = self._shared_params()._replace(batch_size=16, use_fp16=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_synth_forward_batch128(self):
-    """Tests 1 GPU batch size 128 with XLA."""
-    params = self._shared_params()._replace(batch_size=128, xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_synth_forward_batch128(self):
-    """Tests 1 GPU batch size 128 FP16 with XLA."""
-    params = self._shared_params()._replace(
-        batch_size=128, use_fp16=True, xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_synth_forward_batch16(self):
-    """Tests 1 GPU batch size 16 FP16 with XLA."""
-    params = self._shared_params()._replace(
-        batch_size=16, use_fp16=True, xla=True)
-    self._run_benchmark(params)
-
-
-class FrozenResnet50BenchmarksInference(Resnet50BenchmarksInference):
-  """"Benchmarks for ResNet50 frozen graph inference."""
-
-  def _shared_params(self):
-    return super(FrozenResnet50BenchmarksInference,
-                 self)._shared_params()._replace(freeze_when_forward_only=True)
-
-  def benchmark_trt_synth_forward_batch128(self):
-    """Tests 1 GPU batch size 128."""
-    params = self._shared_params()._replace(batch_size=128, trt_mode='FP32')
-    self._run_benchmark(params)
-
-  # TODO(laigd): enable fp16 tests for TF-TRT, it's currently not supported yet.
-  # def benchmark_fp16_trt_synth_forward_batch128(self):
-  #   """Tests 1 GPU batch size 128 FP16."""
-  #   params = self._shared_params()._replace(
-  #       batch_size=128, use_fp16=True, trt_mode='FP16')
-  #   self._run_benchmark(params)
-
-  # Test with batch size 16 to compare with native TF GPU implementation and
-  # XLA.
-  # def benchmark_fp16_trt_synth_forward_batch16(self):
-  #   """Tests 1 GPU batch size 16 FP16."""
-  #   params = self._shared_params()._replace(
-  #       batch_size=16, use_fp16=True, trt_mode='FP16')
-  #   self._run_benchmark(params)
-
-
-class Resnet50Benchmarks(BenchmarkBase):
-  """"Benchmark resnet50 configurations."""
-
-  def _shared_params(self):
-    """Returns shared parameters for all ResNet50 benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        model='resnet50', batch_size=128, distortions=False,
-        optimizer='momentum')
-
-  def _shared_params_fp16(self):
-    """Returns shared parameters for all ResNet50 FP16 benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        model='resnet50',
-        batch_size=256,
-        distortions=False,
-        use_fp16=True,
-        optimizer='momentum',
-        loss_type_to_report='base_loss',
-        compute_lr_on_cpu=True,
-        single_l2_loss_op=True
-    )
-
-  def benchmark_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data."""
-    params = self._shared_params()._replace(num_gpus=1)
-    self._run_benchmark(params)
-
-  def benchmark_fake_1gpu_gpuparams(self):
-    """Tests 1 gpu with fake data."""
-    params = self._shared_params()._replace(
-        num_gpus=1, data_dir=self.fake_data_dir, data_name='imagenet')
-    self._run_benchmark(params)
-
-  def benchmark_synth_1gpu_max_batch_size(self):
-    """Finds largest batch size that can be run with 1 gpu using synth data."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server')
-    self._binary_search_batch_size(params, init_batch_size=128)
-
-  def benchmark_synth_4gpu_gpureplicated(self):
-    """Tests 4 gpu with synthetic data with parameters replicated."""
-    params = self._shared_params()._replace(
-        num_gpus=4,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  def benchmark_synth_8gpu_gpureplicated(self):
-    """Tests 8 gpu with synthetic data with parameters replicated."""
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  def benchmark_fake_8gpu_gpureplicated(self):
-    """Tests 8 gpu with fake data with parameters replicated."""
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  # FP16 mixed-precision tests.
-
-  def benchmark_fp16_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data with parameters on the gpu."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1, variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_1gpu_gpuparams_batch128(self):
-    """Tests 1 gpu with synthetic data with parameters on the gpu."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1, batch_size=128, variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_4gpu_gpureplicated(self):
-    """Tests 4 gpu with synthetic data with nccl and all_reduce."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=4,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_8gpu_gpureplicated(self):
-    """Tests 8 gpu with synthetic with nccl and all_reduce."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_fake_1gpu_gpuparams(self):
-    """Tests 1 gpus with fake data."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_fp16_fake_8gpu_gpureplicated(self):
-    """Tests 8 gpus with fake data."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_fakedistort_8gpu_gpureplicated(self):
-    """Tests 8 gpus with fake distorted data."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        distortions=True,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  # XLA versions of Resnet50 tests only for single GPU.
-  def benchmark_xla_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data with XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with fp16, synthetic data with XLA."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True)
-    self._run_benchmark(params)
-
-  # Test does not run as part of continuous testing on guitar.
-  def benchmark_ng_xla_batch64_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with XLA, synth data, and batch 64."""
-    params = self._shared_params()._replace(
-        num_gpus=1, batch_size=64, variable_update='parameter_server', xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_batch64_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with fp16, XLA, synth data, and batch 64."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1,
-        batch_size=64,
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_batch128_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with fp16, XLA, and synth data."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1,
-        batch_size=128,
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_synth_1gpu_max_batch_size(self):
-    """Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True)
-    self._binary_search_batch_size(params, init_batch_size=128)
-
-  def benchmark_xla_real_1gpu_gpuparams(self):
-    """Tests 1 gpu with real data with XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.data_dir,
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-  # Test does not run as part of continuous testing.
-  def benchmark_xla_fake_1gpu_gpuparams(self):
-    """Tests 1 gpu with fake data with XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-  # Test does not run as part of continuous testing.
-  def benchmark_xla_fakedistort_1gpu_gpuparams(self):
-    """Tests 1 gpu with fake distorted data with XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        distortions=True,
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-
-class Resnet50v15Benchmarks(BenchmarkBase):
-  """"Benchmark various ResNet50V1.5 configurations.
-
-  ResNetV1.5 differs from V1 in stride 2 is used in the first 3x3 convolution of
-  each block instead of the first 1x1 convolution.
-  """
-
-  def _shared_params_fp16(self):
-    """Returns shared parameters for all ResNet50v1.5 FP16 benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        model='resnet50_v1.5',
-        batch_size=256,
-        distortions=False,
-        use_fp16=True,
-        optimizer='momentum',
-        loss_type_to_report='base_loss',
-        compute_lr_on_cpu=True,
-        single_l2_loss_op=True
-    )
-
-  def benchmark_fp16_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data."""
-    params = self._shared_params_fp16()._replace(num_gpus=1)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_batch256_synth_8gpu_gpuparams(self):
-    """Tests 8 gpus with synthetic data at batch 256."""
-    params = self._shared_params_fp16()._replace(num_gpus=8)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_batch128_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
-    params = self._shared_params_fp16()._replace(num_gpus=1, batch_size=128)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_fake_1gpu_gpuparams(self):
-    """Tests 1 gpu with fake data."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1, data_dir=self.fake_data_dir, data_name='imagenet')
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_8gpu_gpureplicated(self):
-    """Tests 8 gpu with synthetic data with parameters replicated."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8,
-        num_batches=200,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_fake_8gpu_gpureplicated(self):
-    """Tests 8 gpu with fake data with parameters replicated."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8,
-        num_batches=200,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  # XLA versions of Resnet50v1.5 tests.
-  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with fp16, synthetic data with XLA."""
-    params = self._shared_params_fp16()._replace(num_gpus=1, xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_batch128_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with fp16, batch128, synthetic data with XLA."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1, batch_size=128, xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_compile_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data."""
-    params = self._shared_params_fp16()._replace(num_gpus=1, xla_compile=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_compile_batch128_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1, num_batches=200, batch_size=128, xla_compile=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_batch256_synth_8gpu_gpuparams(self):
-    """Tests 8 gpu with synthetic data and xla autojit."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8, num_batches=200, batch_size=256, xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_compile_fake_1gpu_gpuparams(self):
-    """Tests 1 gpu with fake data."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=1,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        xla_compile=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_compile_synth_8gpu_gpureplicated(self):
-    """Tests 8 gpu with synthetic data with parameters replicated."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8,
-        num_batches=200,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2,
-        xla_compile=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_synth_8gpu_gpureplicated(self):
-    """Tests 8 gpu with synthetic data with parameters replicated."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8,
-        num_batches=200,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2,
-        xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_compile_fake_8gpu_gpureplicated(self):
-    """Tests 8 gpu with fake data with parameters replicated."""
-    params = self._shared_params_fp16()._replace(
-        num_gpus=8,
-        num_batches=200,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2,
-        xla_compile=True)
-    self._run_benchmark(params)
-
-
-class Vgg16Benchmarks(BenchmarkBase):
-  """"Benchmark various vgg16 configurations."""
-
-  def _shared_params(self):
-    """Returns shared parameters for all vgg16 benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        model='vgg16', batch_size=128, distortions=False)
-
-  def benchmark_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data with parameters on gpu."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data with parameters on gpu."""
-    params = self._shared_params()._replace(
-        num_gpus=1, use_fp16=True, variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_synth_8gpu_gpureplicated(self):
-    """Tests 8 gpu with synthetic data with parameters replicated."""
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        all_reduce_spec='nccl',
-        variable_update='replicated',
-        compact_gradient_transfer=False,
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  # XLA versions of VGG16 tests only for single GPU.
-  def benchmark_xla_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data and XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with fp16, synthetic data, and XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
-    self._run_benchmark(params)
-
-  # Test does not run as part of continuous testing.
-  def benchmark_xla_fake_1gpu_gpuparams(self):
-    """Tests 1 gpu with fake data and XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_real_1gpu_gpuparams(self):
-    """Tests 1 gpu with real data and XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.data_dir,
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-
-class TrivialBenchmarks(BenchmarkBase):
-  """"Benchmarks for trivial model.
-
-  The purpose of these tests is to verify the upper bound for the input
-  pipeline. Fake data creates an upperbound on the input pipeline throughput.
-  """
-
-  def _shared_params(self):
-    """Returns shared parameters for all trivial benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        model='trivial',
-        num_gpus=8,
-        distortions=False,
-        variable_update='independent',
-        data_dir=self.fake_data_dir)
-
-  def benchmark_fake_64batch(self):
-    params = self._shared_params()._replace(batch_size=64, data_name='imagenet')
-    self._run_benchmark(params)
-
-  def benchmark_fake_128batch(self):
-    params = self._shared_params()._replace(
-        batch_size=128, data_name='imagenet')
-    self._run_benchmark(params)
-
-  def benchmark_fake_256batch(self):
-    params = self._shared_params()._replace(
-        batch_size=256, data_name='imagenet')
-    self._run_benchmark(params)
-
-  def benchmark_fakedistort_128batch(self):
-    params = self._shared_params()._replace(
-        batch_size=128, data_name='imagenet', distortions=True)
-    self._run_benchmark(params)
-
-
-class AlexnetBenchmarks(BenchmarkBase):
-  """"Benchmarks for alexnet."""
-
-  def _shared_params(self):
-    """Returns shared parameters for all alexnet benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        model='alexnet', batch_size=512, distortions=False)
-
-  def benchmark_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data with parameters on gpu."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data with parameters on gpu."""
-    params = self._shared_params()._replace(
-        num_gpus=1, use_fp16=True, variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_synth_8gpu_gpureplicated(self):
-    """Tests 8 gpus with synthetic data with parameters replicated."""
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        compact_gradient_transfer=False,
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  def benchmark_fake_8gpu_gpureplicated(self):
-    """Tests 8 gpus with fake data with parameters replicated."""
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        compact_gradient_transfer=False,
-        gradient_repacking=2)
-    self._run_benchmark(params)
-
-  # XLA Benchmark tests for AlexNet.
-  def benchmark_xla_synth_1gpuparams(self):
-    """Tests 1 gpu with synthetic data and XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with fp16, synthetic data and XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
-    self._run_benchmark(params)
-
-  # Test does not run as part of continuous testing.
-  def benchmark_xla_fake_1gpuparams(self):
-    """Tests 1 gpu with fake data and XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_real_1gpuparams(self):
-    """Tests 1 gpu with real data and XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.data_dir,
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-
-class InceptionV3Benchmarks(BenchmarkBase):
-  """"Benchmark for InceptionV3."""
-
-  def _shared_params(self):
-    """Returns shared parameters for all InceptionV3 benchmarks."""
-    return BenchmarkBase._shared_params(self)._replace(
-        model='inception3', batch_size=64, distortions=False)
-
-  def benchmark_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic data."""
-    params = self._shared_params()._replace(
-        num_gpus=1, use_fp16=True, variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_synth_1gpu_max_batch_size(self):
-    """Finds largest batch size that can be run with 1 gpu using synth data."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server')
-    self._binary_search_batch_size(params, init_batch_size=128)
-
-  def benchmark_xla_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with synthetic and  XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
-    """Tests 1 gpu with fp16, XLA and synthetic data."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_synth_1gpu_max_batch_size(self):
-    """Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
-    params = self._shared_params()._replace(
-        num_gpus=1, variable_update='parameter_server', xla=True)
-    self._binary_search_batch_size(params, init_batch_size=128)
-
-  # Test does not run as part of continuous testing.
-  def benchmark_xla_fake_1gpu_gpuparams(self):
-    """Tests 1 gpu with fake data with XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.fake_data_dir,
-        data_name='imagenet',
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_real_1gpu_gpuparams(self):
-    """Tests 1 gpu with real data with XLA."""
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        data_dir=self.data_dir,
-        variable_update='parameter_server',
-        xla=True)
-    self._run_benchmark(params)
-
-
-class NcfBenchmarks(BenchmarkBase):
-  """Benchmarks for neural collaborative filtering."""
-
-  def _shared_params(self):
-    return BenchmarkBase._shared_params(self)._replace(
-        model='ncf', batch_size=64*1024, num_gpus=1, num_warmup_batches=1)
-
-  def benchmark_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_fp16_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        variable_update='parameter_server', use_fp16=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        variable_update='parameter_server', xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        variable_update='parameter_server', xla=True, use_fp16=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_compile_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        variable_update='parameter_server', xla_compile=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_xla_compile_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        variable_update='parameter_server', xla_compile=True, use_fp16=True)
-    self._run_benchmark(params)
-
-
-class DeepSpeech2Benchmarks(BenchmarkBase):
-  """Benchmarks for DeepSpeech2 model."""
-
-  def _shared_params(self):
-    return BenchmarkBase._shared_params(self)._replace(
-        model='deepspeech2', batch_size=32, num_gpus=1, data_name='librispeech')
-
-  def benchmark_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(variable_update='parameter_server')
-    self._run_benchmark(params)
-
-  def benchmark_xla_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        variable_update='parameter_server', xla=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_compile_synth_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        variable_update='parameter_server', xla_compile=True)
-    self._run_benchmark(params)
-
-
-class SsdBenchmarks(BenchmarkBase):
-  """Benchmarks for SSD model."""
-
-  def _cudnn_version(self):
-    if sys.platform == 'win32':
-      return None
-
-    lib = ctypes.cdll.LoadLibrary(None)
-    if hasattr(lib, 'cudnnGetErrorString'):
-      version = lib.cudnnGetVersion()
-      return version
-
-    return None
-
-  def _shared_params(self):
-    cudnn_version = self._cudnn_version()
-    if cudnn_version is None or cudnn_version < 7300:
-      raise RuntimeError(
-          'Needs at least cuDNN 7.3 to work with fp16 (b/112048183). '
-          'Build with --define=use_experimental_cudnn=1')
-
-    return BenchmarkBase._shared_params(self)._replace(
-        # TODO(b/115672206): Replace backbone model and data dir with replicated
-        # placer location for better performance.
-        backbone_model_path=platforms_util.get_ssd_backborn_model_file(),  # pylint: disable=line-too-long
-        data_dir=platforms_util.get_ssd_backboard_data_dir(),
-        batch_size=128,
-        data_name='coco',
-        model='ssd300',
-        num_batches=10,
-        num_warmup_batches=1,
-        num_gpus=1,
-        optimizer='momentum',
-        momentum=0.9,
-        weight_decay=5e-4,
-        loss_type_to_report='base_loss',
-        single_l2_loss_op=True,
-        compute_lr_on_cpu=True,
-    )
-
-  def benchmark_xla_compile_real_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        num_gpus=1,
-        xla_compile=True,
-    )
-    self._run_benchmark(params)
-
-  def benchmark_real_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(num_gpus=1,)
-    self._run_benchmark(params)
-
-  def benchmark_xla_compile_fp16_real_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        num_gpus=1, xla_compile=True, use_fp16=True)
-    self._run_benchmark(params)
-
-  def benchmark_fp16_real_1gpu_gpuparams(self):
-    params = self._shared_params()._replace(num_gpus=1, use_fp16=True)
-    self._run_benchmark(params)
-
-  def benchmark_xla_compile_real_8gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        xla_compile=True,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2,
-        num_batches=50,
-    )
-    self._run_benchmark(params)
-
-  def benchmark_real_8gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2,
-        num_batches=50,
-    )
-    self._run_benchmark(params)
-
-  def benchmark_xla_compile_fp16_real_8gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        xla_compile=True,
-        use_fp16=True,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2,
-        num_batches=50,
-    )
-    self._run_benchmark(params)
-
-  def benchmark_fp16_real_8gpu_gpuparams(self):
-    params = self._shared_params()._replace(
-        num_gpus=8,
-        use_fp16=True,
-        variable_update='replicated',
-        all_reduce_spec='nccl',
-        gradient_repacking=2,
-        num_batches=50,
-    )
-    self._run_benchmark(params)
-
-
-if __name__ == '__main__':
-  tf.disable_v2_behavior()
-  tf.test.main()
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains functions related to MLPerf compliance.
-
-MLPerf requires submissions to log what the benchmark does, in order to verify
-that the benchmark meets the MLPerf requirements. This module contains a global
-object `logger` that is used by other files to log what tf_cnn_benchmarks does
-for compliance.
-
-By default, `logger` does nothing, as the MLPerf compliance logs are verbose and
-unnecessary if one is not concerned about MLPerf compliance. The logger can be
-enabled by using the `mlperf_logger` context manager.
-
-To enable the logger with `mlperf_logger`, the MLPerf compliance library at
-https://github.com/mlperf/training/tree/master/compliance is required. If
-the logger is not enabled, the library is not needed.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-from collections import namedtuple
-import contextlib
-import os
-import sys
-
-import tensorflow.compat.v1 as tf
-
-# pylint: disable=g-import-not-at-top
-try:
-  # Not all users have the MLPerf compliance library, so we don't want to
-  # unconditionally crash if these imports fail.
-  from mlperf_compliance import mlperf_log
-  from mlperf_compliance import resnet_log_helper
-  from mlperf_compliance import tags
-  import_successful = True
-except ImportError:
-  # The logger cannot be enabled in this case since the MLPerf library isn't
-  # found. We return empty strings from the `tags` attribute so that
-  # the benchmark can still run without crashing. This empty tags are passed
-  # to an instance of `NullMlPerfLogger`, which does not log anything and
-  # ignores the tag values.
-
-  class _Tags(object):
-
-    def __getattr__(self, item):
-      return ''
-  tags = _Tags()
-  import_successful = False
-# pylint: enable=g-import-not-at-top
-
-
-_ModelInfo = namedtuple('_ModelInfo', ['print_fn', 'tag_set',
-                                       'mlperf_model_name'])
-
-
-_MLPERF_LOG_PREFIX = ':::MLPv0.5.0'
-
-
-class MlPerfLogger(object):
-  """Logs various aspects about a benchmark run for MLPerf compliance."""
-
-  def __init__(self, model):
-    self._root_dir = os.path.split(os.path.abspath(__file__))[0]
-    mlperf_log.ROOT_DIR_RESNET = self._root_dir
-    mlperf_log.ROOT_DIR_SSD = self._root_dir
-    self.model = model
-    model_to_info = {
-        'resnet50_v1.5': _ModelInfo(mlperf_log.resnet_print,
-                                    mlperf_log.RESNET_TAG_SET, tags.RESNET),
-        'ssd300': _ModelInfo(mlperf_log.ssd_print, mlperf_log.SSD_TAG_SET,
-                             tags.SSD)
-    }
-
-    try:
-      self._log_fn, self.tag_set, self.mlperf_model_name = model_to_info[model]
-    except KeyError:
-      raise ValueError('--ml_perf_compliance_logging is only compatible when '
-                       '--model is one of the following: ' +
-                       ', '.join(model_to_info.keys()))
-
-  def log(self, key, value=None, stack_offset=2):
-    if key in self.tag_set:
-      self._log_fn(key, value, stack_offset)
-    else:
-      print('Ignoring MLPerf logging item key=%s, value=%s for model %s' %
-            (key, value, self.model))
-
-  def log_deferred_tensor_value(self, key, tensor_value, global_step,
-                                stack_offset=2, every_n=1):
-    """Logs the value of a tensor when the graph is run."""
-    caller = '(%s)' % mlperf_log.get_caller(stack_offset, self._root_dir)
-    def create_print_op():
-      return tf.print(_MLPERF_LOG_PREFIX, self.mlperf_model_name,
-                      tf.timestamp(), caller, key,
-                      ': { "deferred": true, "value":', tensor_value, '}',
-                      output_stream=sys.stdout)
-    maybe_print = tf.cond(tf.equal(global_step % every_n, 0), create_print_op,
-                          tf.no_op)
-    with tf.control_dependencies([maybe_print]):
-      return tf.identity(tensor_value)
-
-  def log_max_pool(self, input_tensor, output_tensor):
-    if self.model == 'resnet50_v1.5':
-      resnet_log_helper.log_max_pool(input_tensor, output_tensor)
-
-  def log_begin_block(self, input_tensor, block_type):
-    if self.model == 'resnet50_v1.5':
-      resnet_log_helper.log_begin_block(input_tensor, block_type)
-
-  def log_end_block(self, output_tensor):
-    if self.model == 'resnet50_v1.5':
-      resnet_log_helper.log_end_block(output_tensor)
-
-  def log_projection(self, input_tensor, output_tensor):
-    if self.model == 'resnet50_v1.5':
-      resnet_log_helper.log_projection(input_tensor, output_tensor)
-
-  def log_conv2d(self, input_tensor, output_tensor, stride_height, stride_width,
-                 filters, initializer, use_bias):
-    """Log a conv2d call."""
-    if self.model == 'resnet50_v1.5':
-      assert stride_height == stride_width, (
-          '--ml_perf_compliance_logging does not support convolutions where '
-          'the stride height is not equal to the stride width. '
-          'stride_height=%d, stride_width=%d' % (stride_height, stride_width))
-      if isinstance(initializer, tf.truncated_normal_initializer) or (
-          isinstance(initializer, tf.variance_scaling_initializer) and
-          initializer.distribution == 'truncated_normal'):
-        initializer = tags.TRUNCATED_NORMAL
-      elif (isinstance(initializer, tf.glorot_uniform_initializer) or
-            initializer is None):
-        initializer = 'glorot_uniform'
-      resnet_log_helper.log_conv2d(input_tensor, output_tensor, stride_width,
-                                   filters, initializer, use_bias)
-
-  def log_batch_norm(self, input_tensor, output_tensor, momentum, epsilon,
-                     center, scale, training):
-    if self.model == 'resnet50_v1.5':
-      resnet_log_helper.log_batch_norm(input_tensor, output_tensor, momentum,
-                                       epsilon, center, scale, training)
-
-  def log_train_epochs(self, num_epochs):
-    """Logs all the TRAIN_EPOCHs log lines."""
-    num_epochs_int = int(num_epochs)
-    for i in range(num_epochs_int):
-      # MLPerf allows us to print all the train epochs at once instead of
-      # printing them as we do them.
-      self.log(key=mlperf_log.TRAIN_EPOCH, value=i, stack_offset=3)
-    if num_epochs_int != num_epochs:
-      value = (str(num_epochs_int) +
-               ', but this epoch only has {}% of the examples of a normal epoch'
-               .format(100 * (num_epochs - num_epochs_int)))
-      self.log(key=mlperf_log.TRAIN_EPOCH, value=value, stack_offset=3)
-
-  def log_input_resize_aspect_preserving(self, height, width, scale_factor):
-    assert height == width, (
-        '--ml_perf_compliance_logging does not support models with nonsquare '
-        'images. Cannot process image with height=%d and width=%d' %
-        (height, width))
-    self.log(key=tags.INPUT_RESIZE_ASPECT_PRESERVING,
-             value={'min': int(height * scale_factor)})
-
-  def log_eval_epoch(self, tag, global_step, batch_size, stack_offset=2):
-    if self.model == 'resnet50_v1.5':
-      self.log(key=tag, stack_offset=stack_offset+1)
-    elif self.model == 'ssd300':
-      epoch = int(global_step * batch_size / 118287)
-      self.log(key=tag, value=epoch, stack_offset=stack_offset+1)
-
-  def log_eval_accuracy(self, accuracy, global_step, batch_size,
-                        examples_per_epoch, stack_offset=2):
-    """Logs eval accuracy."""
-    epoch = int(global_step * batch_size / examples_per_epoch)
-    eval_accuracy = {'epoch': epoch, 'value': accuracy}
-    eval_iteration_accuracy = {'iteration': global_step, 'value': accuracy}
-    self.log(key=tags.EVAL_ACCURACY, value=eval_accuracy,
-             stack_offset=stack_offset+1)
-    self.log(key=tags.EVAL_ITERATION_ACCURACY,
-             value=eval_iteration_accuracy,
-             stack_offset=stack_offset+1)
-
-
-def _empty_fn(*args, **kwargs):
-  del args, kwargs
-
-
-class NullMlPerfLogger(object):
-  """A version of `MlPerfLogger` that does not log anything.
-
-  This class has the same interface as `MlPerfLogger`, but does not actually do
-  anything. This is used when logging is disabled, which is the default
-  behavior.
-  """
-
-  def __getattr__(self, item):
-    return _empty_fn
-
-  def log_deferred_tensor_value(self, key, tensor_value, *args, **kwargs):
-    del key, args, kwargs
-    return tensor_value
-
-
-# A global singleton logger. By default, it's the null logger but can be
-# switched to an MlPerfLogger with `mlperf_logger()`.
-logger = NullMlPerfLogger()
-
-
-@contextlib.contextmanager
-def mlperf_logger(use_mlperf_logger, model):
-  """Optionally enable the mlperf logger.
-
-  If `use_mlperf_logger` is True, sets the `logger` global variable to an
-  instance of MlPerfLogger that will print logs for MLPerf compliance. If
-  `use_mlperf_logger` is False, does nothing.
-
-  Args:
-    use_mlperf_logger: If True, enables the mlperf logger. If False, this
-      function does nothing.
-    model: The model that will be logged. Required, because different models
-      must log different things for MLPerf compliance.
-
-  Yields:
-    Nothing.
-
-  Raises:
-    ImportError: If `use_mlperf_logger` is True but the MLPerf compliance
-      library cannot be imported
-  """
-  global logger
-  if use_mlperf_logger:
-    if not import_successful:
-      raise ImportError('Failed to import MLPerf compliance library, which is '
-                        'required when --ml_perf_compliance_logging is '
-                        'specified. Clone this repo and add this directory '
-                        'https://github.com/mlperf/training/tree/master/'
-                        'compliance to the PYTHONPATH environmental variable.')
-    logger_ = MlPerfLogger(model)
-    old_logger = logger
-    try:
-      logger = logger_
-      yield
-    finally:
-      logger = old_logger
-  else:
-    yield
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Contains tests related to MLPerf.
-
-Note this test only passes if the MLPerf compliance library is installed.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from collections import Counter
-import logging
-import re
-
-import six
-import tensorflow.compat.v1 as tf
-import benchmark_cnn
-import datasets
-import mlperf
-import test_util
-from models import model
-from mlperf_compliance import mlperf_log
-
-
-class _MlPerfTestModel(model.CNNModel):
-  """A model to test the MLPerf compliance logging on."""
-
-  def __init__(self):
-    super(_MlPerfTestModel, self).__init__(
-        'mlperf_test_model', image_size=224, batch_size=2, learning_rate=1)
-
-  def add_inference(self, cnn):
-    assert cnn.top_layer.shape[1:] == (3, 224, 224)
-    cnn.conv(1, 1, 1, 1, 1, use_batch_norm=True)
-    cnn.mpool(1, 1, 1, 1, num_channels_in=1)
-    cnn.reshape([-1, 224 * 224])
-    cnn.affine(1, activation=None)
-
-    # Assert that the batch norm variables are filtered out for L2 loss.
-    variables = tf.global_variables() + tf.local_variables()
-    assert len(variables) > len(self.filter_l2_loss_vars(variables))
-
-
-class MlPerfComplianceTest(tf.test.TestCase):
-  """Tests the MLPerf compliance logs.
-
-  This serves as a quick check that we probably didn't break the compliance
-  logging. It is not mean to be as comprehensive as the official MLPerf
-  compliance checker will be.
-  """
-
-  def setUp(self):
-    super(MlPerfComplianceTest, self).setUp()
-    benchmark_cnn.setup(benchmark_cnn.make_params())
-
-  # Map between regex and the number of times we expect to see that regex in the
-  # logs. Entry commented out with the comment FIXME indicate that
-  # tf_cnn_benchmarks currently fails compliance in that regard, and needs to be
-  # fixed to be MLPerf compliant.
-  EXPECTED_LOG_REGEXES = {
-      # Preprocessing tags
-      mlperf.tags.INPUT_ORDER: 2,  # 1 for training, 1 for eval
-      # We pass --tf_random_seed=9876 in the test.
-      r'%s: 9876' % mlperf.tags.RUN_SET_RANDOM_SEED: 2,
-      # The Numpy random seed is hardcoded to 4321.
-      r'%s: 4321' % mlperf.tags.RUN_SET_RANDOM_SEED: 2,
-      r'%s: %d' % (mlperf.tags.PREPROC_NUM_TRAIN_EXAMPLES,
-                   datasets.IMAGENET_NUM_TRAIN_IMAGES): 1,
-      r'%s: %d' % (mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES,
-                   datasets.IMAGENET_NUM_VAL_IMAGES): 1,
-      mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES + '.*': 1,
-      mlperf.tags.INPUT_DISTORTED_CROP_MIN_OBJ_COV + '.*': 1,
-      mlperf.tags.INPUT_DISTORTED_CROP_RATIO_RANGE + '.*': 1,
-      mlperf.tags.INPUT_DISTORTED_CROP_AREA_RANGE + '.*': 1,
-      mlperf.tags.INPUT_DISTORTED_CROP_MAX_ATTEMPTS + '.*': 1,
-      mlperf.tags.INPUT_RANDOM_FLIP + '.*': 1,
-      r'%s: \[224, 224\].*' % mlperf.tags.INPUT_CENTRAL_CROP: 1,
-
-      r'%s: \[123.68, 116.78, 103.94\].*' % mlperf.tags.INPUT_MEAN_SUBTRACTION:
-          2,
-
-      r'%s: {"min": 256}.*' % mlperf.tags.INPUT_RESIZE_ASPECT_PRESERVING: 1,
-
-      # 1 for training, 1 for eval
-      r'%s: \[224, 224\].*' % mlperf.tags.INPUT_RESIZE: 2,
-
-      # Resnet model tags
-      mlperf.tags.MODEL_HP_BATCH_NORM + '.*': 2,
-      # 2 for training, 2 for eval. Although there's only 1 conv2d, each conv2d
-      # produces 2 logs.
-      mlperf.tags.MODEL_HP_CONV2D_FIXED_PADDING + '.*': 4,
-      mlperf.tags.MODEL_HP_RELU + '.*': 2,
-      mlperf.tags.MODEL_HP_INITIAL_MAX_POOL + '.*': 2,
-      mlperf.tags.MODEL_HP_DENSE + '.*': 4,
-      mlperf.tags.MODEL_HP_DENSE + '.*': 4,
-
-      # Note that tags our test model does not emit, like MODEL_HP_SHORTCUT_ADD,
-      # are omitted here.
-
-      r'%s: "categorical_cross_entropy".*' % mlperf.tags.MODEL_HP_LOSS_FN: 1,
-
-      # 1 for training, 2 because the _MlPerfTestModel calls this when building
-      # the model for both training and eval
-      r'%s: true' % mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2: 3,
-
-      r'%s: 0.5.*' % mlperf.tags.MODEL_L2_REGULARIZATION: 1,
-
-      # Note we do not handle OPT_LR, since that is printed to stderr using
-      # tf.Print, which we cannot easily intercept.
-
-      # Other tags
-      '%s: "%s"' % (mlperf.tags.OPT_NAME, mlperf.tags.SGD_WITH_MOMENTUM): 1,
-      '%s: 0.5' % mlperf.tags.OPT_MOMENTUM: 1,
-      mlperf.tags.RUN_START: 1,
-      '%s: 2' % mlperf.tags.INPUT_BATCH_SIZE: 1,
-      mlperf.tags.TRAIN_LOOP: 1,
-      mlperf.tags.TRAIN_EPOCH + '.*': 1,
-      '%s: 2' % mlperf.tags.INPUT_SIZE: 2,
-      mlperf.tags.EVAL_START: 2,
-      mlperf.tags.EVAL_STOP: 2,
-      '%s: 6' % mlperf.tags.EVAL_SIZE: 2,
-      mlperf.tags.EVAL_ACCURACY + '.*': 2,
-      '%s: 2.0' % mlperf.tags.EVAL_TARGET: 2,
-      mlperf.tags.RUN_STOP + '.*': 1,
-      mlperf.tags.RUN_FINAL: 1
-  }
-  EXPECTED_LOG_REGEXES = Counter({re.compile(k): v for
-                                  k, v in EXPECTED_LOG_REGEXES.items()})
-
-  def testMlPerfCompliance(self):
-    string_io = six.StringIO()
-    handler = logging.StreamHandler(string_io)
-    data_dir = test_util.create_black_and_white_images()
-    try:
-      mlperf_log.LOGGER.addHandler(handler)
-      params = benchmark_cnn.make_params(data_dir=data_dir,
-                                         data_name='imagenet',
-                                         batch_size=2,
-                                         num_warmup_batches=0,
-                                         num_batches=2,
-                                         num_eval_batches=3,
-                                         eval_during_training_every_n_steps=1,
-                                         distortions=False,
-                                         weight_decay=0.5,
-                                         optimizer='momentum',
-                                         momentum=0.5,
-                                         stop_at_top_1_accuracy=2.0,
-                                         tf_random_seed=9876,
-                                         ml_perf=True)
-      with mlperf.mlperf_logger(use_mlperf_logger=True, model='resnet50_v1.5'):
-        bench_cnn = benchmark_cnn.BenchmarkCNN(params, model=_MlPerfTestModel())
-        bench_cnn.run()
-      logs = string_io.getvalue().splitlines()
-      log_regexes = Counter()
-      for log in logs:
-        for regex in self.EXPECTED_LOG_REGEXES:
-          if regex.search(log):
-            log_regexes[regex] += 1
-      if log_regexes != self.EXPECTED_LOG_REGEXES:
-        diff_counter = Counter(log_regexes)
-        diff_counter.subtract(self.EXPECTED_LOG_REGEXES)
-        differences = []
-        for regex in (k for k in diff_counter.keys() if diff_counter[k]):
-          found_count = log_regexes[regex]
-          expected_count = self.EXPECTED_LOG_REGEXES[regex]
-          differences.append('  For regex %s: Found %d lines matching but '
-                             'expected to find %d' %
-                             (regex.pattern, found_count, expected_count))
-        raise AssertionError('Logs did not match expected logs. Differences:\n'
-                             '%s' % '\n'.join(differences))
-    finally:
-      mlperf_log.LOGGER.removeHandler(handler)
-
-if __name__ == '__main__':
-  tf.disable_v2_behavior()
-  tf.test.main()
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Alexnet model configuration.
-
-References:
-  Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton
-  ImageNet Classification with Deep Convolutional Neural Networks
-  Advances in Neural Information Processing Systems. 2012
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-from models import model
-
-
-class AlexnetModel(model.CNNModel):
-  """Alexnet cnn model."""
-
-  def __init__(self, params=None):
-    super(AlexnetModel, self).__init__(
-        'alexnet', 224 + 3, 512, 0.005, params=params)
-
-  def add_inference(self, cnn):
-    # Note: VALID requires padding the images by 3 in width and height
-    cnn.conv(64, 11, 11, 4, 4, 'VALID')
-    cnn.mpool(3, 3, 2, 2)
-    cnn.conv(192, 5, 5)
-    cnn.mpool(3, 3, 2, 2)
-    cnn.conv(384, 3, 3)
-    cnn.conv(384, 3, 3)
-    cnn.conv(256, 3, 3)
-    cnn.mpool(3, 3, 2, 2)
-    cnn.reshape([-1, 256 * 6 * 6])
-    cnn.affine(4096)
-    cnn.dropout()
-    cnn.affine(4096)
-    cnn.dropout()
-
-
-class AlexnetCifar10Model(model.CNNModel):
-  """Alexnet cnn model for cifar datasets.
-
-  The model architecture follows the one defined in the tensorflow tutorial
-  model.
-
-  Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py
-  Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf
-  """
-
-  def __init__(self, params=None):
-    super(AlexnetCifar10Model, self).__init__(
-        'alexnet', 32, 128, 0.1, params=params)
-
-  def add_inference(self, cnn):
-    cnn.conv(64, 5, 5, 1, 1, 'SAME', stddev=5e-2)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
-    cnn.conv(64, 5, 5, 1, 1, 'SAME', bias=0.1, stddev=5e-2)
-    cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    shape = cnn.top_layer.get_shape().as_list()
-    flat_dim = shape[1] * shape[2] * shape[3]
-    cnn.reshape([-1, flat_dim])
-    cnn.affine(384, stddev=0.04, bias=0.1)
-    cnn.affine(192, stddev=0.04, bias=0.1)
-
-  def get_learning_rate(self, global_step, batch_size):
-    num_examples_per_epoch = 50000
-    num_epochs_per_decay = 100
-    decay_steps = (
-        num_epochs_per_decay * num_examples_per_epoch // batch_size)
-    decay_factor = 0.1
-    return tf.train.exponential_decay(
-        self.learning_rate,
-        global_step,
-        decay_steps,
-        decay_factor,
-        staircase=True)
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/densenet_model.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/densenet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Densenet model configuration.
-
-References:
-  "Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-import tensorflow.compat.v1 as tf
-from models import model as model_lib
-
-
-class DensenetCifar10Model(model_lib.CNNModel):
-  """Densenet cnn network configuration."""
-
-  def __init__(self, model, layer_counts, growth_rate, params=None):
-    self.growth_rate = growth_rate
-    super(DensenetCifar10Model, self).__init__(
-        model, 32, 64, 0.1, layer_counts=layer_counts, params=params)
-    self.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
-
-  def dense_block(self, cnn, growth_rate):
-    input_layer = cnn.top_layer
-    c = cnn.batch_norm(input_layer, **self.batch_norm_config)
-    c = tf.nn.relu(c)
-    c = cnn.conv(growth_rate, 3, 3, 1, 1, stddev=np.sqrt(2.0/9/growth_rate),
-                 activation=None, input_layer=c)
-    channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
-    cnn.top_layer = tf.concat([input_layer, c], channel_index)
-    cnn.top_size += growth_rate
-
-  def transition_layer(self, cnn):
-    in_size = cnn.top_size
-    cnn.batch_norm(**self.batch_norm_config)
-    cnn.top_layer = tf.nn.relu(cnn.top_layer)
-    cnn.conv(in_size, 1, 1, 1, 1, stddev=np.sqrt(2.0/9/in_size))
-    cnn.apool(2, 2, 2, 2)
-
-  def add_inference(self, cnn):
-    if self.layer_counts is None:
-      raise ValueError('Layer counts not specified for %s' % self.get_model())
-    if self.growth_rate is None:
-      raise ValueError('Growth rate not specified for %s' % self.get_model())
-
-    cnn.conv(16, 3, 3, 1, 1, activation=None)
-    # Block 1
-    for _ in xrange(self.layer_counts[0]):
-      self.dense_block(cnn, self.growth_rate)
-    self.transition_layer(cnn)
-    # Block 2
-    for _ in xrange(self.layer_counts[1]):
-      self.dense_block(cnn, self.growth_rate)
-    self.transition_layer(cnn)
-    # Block 3
-    for _ in xrange(self.layer_counts[2]):
-      self.dense_block(cnn, self.growth_rate)
-    cnn.batch_norm(**self.batch_norm_config)
-    cnn.top_layer = tf.nn.relu(cnn.top_layer)
-    channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
-    cnn.top_size = cnn.top_layer.get_shape().as_list()[channel_index]
-    cnn.spatial_mean()
-
-  def get_learning_rate(self, global_step, batch_size):
-    num_batches_per_epoch = 50000 // batch_size
-    boundaries = num_batches_per_epoch * np.array([150, 225, 300],
-                                                  dtype=np.int64)
-    boundaries = [x for x in boundaries]
-    values = [0.1, 0.01, 0.001, 0.0001]
-    return tf.train.piecewise_constant(global_step, boundaries, values)
-
-
-def create_densenet40_k12_model():
-  return DensenetCifar10Model('densenet40_k12', (12, 12, 12), 12)
-
-
-def create_densenet100_k12_model():
-  return DensenetCifar10Model('densenet100_k12', (32, 32, 32), 12)
-
-
-def create_densenet100_k24_model():
-  return DensenetCifar10Model('densenet100_k24', (32, 32, 32), 24)
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""DeepSpeech2 model configuration.
-
-References:
-  https://arxiv.org/abs/1512.02595
-  Deep Speech 2: End-to-End Speech Recognition in English and Mandarin
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import itertools
-
-import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
-import tensorflow.compat.v1 as tf
-import constants
-from cnn_util import log_fn
-from models import model as model_lib
-from tensorflow.python.ops import variables  # pylint: disable=g-direct-tensorflow-import
-
-
-class DeepSpeechDecoder(object):
-  """Greedy decoder implementation for Deep Speech model."""
-
-  def __init__(self, labels, blank_index=28):
-    """Decoder initialization.
-
-    Args:
-      labels: a string specifying the speech labels for the decoder to use.
-      blank_index: an integer specifying index for the blank character. Defaults
-        to 28.
-    """
-    self.labels = labels
-    self.blank_index = blank_index
-    self.int_to_char = dict([(i, c) for (i, c) in enumerate(labels)])
-
-  def convert_to_string(self, sequence):
-    """Convert a sequence of indexes into corresponding string."""
-    return ''.join([self.int_to_char[i] for i in sequence])
-
-  def wer(self, decode, target):
-    """Computes the Word Error Rate (WER).
-
-    WER is defined as the edit distance between the two provided sentences after
-    tokenizing to words.
-
-    Args:
-      decode: string of the decoded output.
-      target: a string for the ground truth label.
-
-    Returns:
-      A float number for the WER of the current decode-target pair.
-    """
-    try:
-      from nltk.metrics import distance  # pylint: disable=g-import-not-at-top
-    except ImportError as e:
-      if 'nltk.metrics' not in e.message:
-        raise
-      raise ImportError('To use the experimental deepspeech model, you must '
-                        'pip install -U nltk')
-
-    # Map each word to a new char.
-    words = set(decode.split() + target.split())
-    word2char = dict(zip(words, range(len(words))))
-
-    new_decode = [chr(word2char[w]) for w in decode.split()]
-    new_target = [chr(word2char[w]) for w in target.split()]
-
-    return distance.edit_distance(''.join(new_decode), ''.join(new_target))
-
-  def cer(self, decode, target):
-    """Computes the Character Error Rate (CER).
-
-    CER is defined as the edit distance between the two given strings.
-
-    Args:
-      decode: a string of the decoded output.
-      target: a string for the ground truth label.
-
-    Returns:
-      A float number denoting the CER for the current sentence pair.
-    """
-    try:
-      from nltk.metrics import distance  # pylint: disable=g-import-not-at-top
-    except ImportError as e:
-      if 'nltk.metrics' not in e.message:
-        raise
-      raise ImportError('To use the experimental deepspeech model, you must '
-                        'pip install -U nltk')
-    return distance.edit_distance(decode, target)
-
-  def decode(self, char_indexes):
-    """Decode the best guess from logits using greedy algorithm."""
-    # Merge repeated chars.
-    merge = [k for k, _ in itertools.groupby(char_indexes)]
-    # Remove the blank index in the decoded sequence.
-    merge_remove_blank = []
-    for k in merge:
-      if k != self.blank_index:
-        merge_remove_blank.append(k)
-
-    return self.convert_to_string(merge_remove_blank)
-
-  def decode_logits(self, logits):
-    """Decode the best guess from logits using greedy algorithm."""
-    # Choose the class with maximimum probability.
-    best = list(np.argmax(logits, axis=1))
-    return self.decode(best)
-
-
-class DeepSpeech2Model(model_lib.Model):
-  """Define DeepSpeech2 model."""
-
-  # Supported rnn cells.
-  SUPPORTED_RNNS = {
-      'lstm': tf.nn.rnn_cell.BasicLSTMCell,
-      'rnn': tf.nn.rnn_cell.RNNCell,
-      'gru': tf.nn.rnn_cell.GRUCell,
-  }
-
-  # Parameters for batch normalization.
-  BATCH_NORM_EPSILON = 1e-5
-  BATCH_NORM_DECAY = 0.997
-
-  # Filters of convolution layer
-  CONV_FILTERS = 32
-
-  def __init__(self,
-               num_rnn_layers=5,
-               rnn_type='lstm',
-               is_bidirectional=True,
-               rnn_hidden_size=800,
-               use_bias=True,
-               params=None):
-    """Initialize DeepSpeech2 model.
-
-    Args:
-      num_rnn_layers: an integer, the number of rnn layers (default: 5).
-      rnn_type: a string, one of the supported rnn cells: gru, rnn or lstm.
-      is_bidirectional: a boolean to indicate if the rnn layer is bidirectional.
-      rnn_hidden_size: an integer for the number of hidden units in the RNN
-        cell.
-      use_bias: a boolean specifying whether to use a bias in the last fc layer.
-      params: the params from BenchmarkCNN.
-    """
-    super(DeepSpeech2Model, self).__init__(
-        'deepspeech2',
-        batch_size=128,
-        learning_rate=0.0005,
-        fp16_loss_scale=128,
-        params=params)
-    self.num_rnn_layers = num_rnn_layers
-    self.rnn_type = rnn_type
-    self.is_bidirectional = is_bidirectional
-    self.rnn_hidden_size = rnn_hidden_size
-    self.use_bias = use_bias
-    self.num_feature_bins = 161
-    self.max_time_steps = 3494
-    self.max_label_length = 576
-
-  def _batch_norm(self, inputs, training):
-    """Batch normalization layer.
-
-    Note that the momentum to use will affect validation accuracy over time.
-    Batch norm has different behaviors during training/evaluation. With a large
-    momentum, the model takes longer to get a near-accurate estimation of the
-    moving mean/variance over the entire training dataset, which means we need
-    more iterations to see good evaluation results. If the training data is
-    evenly distributed over the feature space, we can also try setting a smaller
-    momentum (such as 0.1) to get good evaluation result sooner.
-
-    Args:
-      inputs: input data for batch norm layer.
-      training: a boolean to indicate if it is in training stage.
-
-    Returns:
-      tensor output from batch norm layer.
-    """
-    return tf.layers.batch_normalization(
-        inputs=inputs,
-        momentum=DeepSpeech2Model.BATCH_NORM_DECAY,
-        epsilon=DeepSpeech2Model.BATCH_NORM_EPSILON,
-        fused=True,
-        training=training)
-
-  def _conv_bn_layer(self, inputs, padding, filters, kernel_size, strides,
-                     layer_id, training):
-    """Defines 2D convolutional + batch normalization layer.
-
-    Args:
-      inputs: input data for convolution layer.
-      padding: padding to be applied before convolution layer.
-      filters: an integer, number of output filters in the convolution.
-      kernel_size: a tuple specifying the height and width of the 2D convolution
-        window.
-      strides: a tuple specifying the stride length of the convolution.
-      layer_id: an integer specifying the layer index.
-      training: a boolean to indicate which stage we are in (training/eval).
-
-    Returns:
-      tensor output from the current layer.
-    """
-    # Perform symmetric padding on the feature dimension of time_step
-    # This step is required to avoid issues when RNN output sequence is shorter
-    # than the label length.
-    inputs = tf.pad(
-        inputs,
-        [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]])
-    inputs = tf.layers.conv2d(
-        inputs=inputs,
-        filters=filters,
-        kernel_size=kernel_size,
-        strides=strides,
-        padding='valid',
-        use_bias=False,
-        activation=tf.nn.relu6,
-        name='cnn_{}'.format(layer_id))
-    return self._batch_norm(inputs, training)
-
-  def _rnn_layer(self, inputs, rnn_cell, rnn_hidden_size, layer_id,
-                 use_batch_norm, is_bidirectional, training):
-    """Defines a batch normalization + rnn layer.
-
-    Args:
-      inputs: input tensors for the current layer.
-      rnn_cell: RNN cell instance to use.
-      rnn_hidden_size: an integer for the dimensionality of the rnn output
-        space.
-      layer_id: an integer for the index of current layer.
-      use_batch_norm: a boolean specifying whether to perform batch
-        normalization on input states.
-      is_bidirectional: a boolean specifying whether the rnn layer is
-        bi-directional.
-      training: a boolean to indicate which stage we are in (training/eval).
-
-    Returns:
-      tensor output for the current layer.
-    """
-    if use_batch_norm:
-      inputs = self._batch_norm(inputs, training)
-
-    # Construct forward/backward RNN cells.
-    fw_cell = rnn_cell(
-        num_units=rnn_hidden_size, name='rnn_fw_{}'.format(layer_id))
-
-    if is_bidirectional:
-      bw_cell = rnn_cell(
-          num_units=rnn_hidden_size, name='rnn_bw_{}'.format(layer_id))
-      outputs, _ = tf.nn.bidirectional_dynamic_rnn(
-          cell_fw=fw_cell,
-          cell_bw=bw_cell,
-          inputs=inputs,
-          dtype=tf.float32,
-          swap_memory=True)
-      rnn_outputs = tf.concat(outputs, -1)
-    else:
-      rnn_outputs = tf.nn.dynamic_rnn(
-          fw_cell, inputs, dtype=tf.float32, swap_memory=True)
-
-    return rnn_outputs
-
-  def get_input_data_types(self, subset):
-    """Returns the list of data types of the inputs."""
-    del subset  # Same data types for both train and validation subsets.
-    return [self.data_type, tf.int32, tf.int32, tf.int32]
-
-  def get_input_shapes(self, subset):
-    """Returns the list of shapes of the padded inputs."""
-    del subset  # Same shapes for both train and validation subsets
-    return [
-        [self.batch_size, self.max_time_steps, self.num_feature_bins, 1],
-        [self.batch_size, self.max_label_length],
-        [self.batch_size, 1],
-        [self.batch_size, 1],
-    ]
-
-  def get_synthetic_inputs(self, input_name, nclass):
-    inputs = tf.random_uniform(self.get_input_shapes('train')[0],
-                               dtype=self.get_input_data_types('train')[0])
-    inputs = variables.VariableV1(inputs, trainable=False,
-                                  collections=[tf.GraphKeys.LOCAL_VARIABLES],
-                                  name=input_name)
-    labels = tf.convert_to_tensor(
-        np.random.randint(28, size=[self.batch_size, self.max_label_length]))
-    input_lengths = tf.convert_to_tensor(
-        [self.max_time_steps] * self.batch_size)
-    label_lengths = tf.convert_to_tensor(
-        [self.max_label_length] * self.batch_size)
-    return [inputs, labels, input_lengths, label_lengths]
-
-  # TODO(laigd): support fp16.
-  # TODO(laigd): support multiple gpus.
-  def build_network(self, inputs, phase_train=True, nclass=29):
-    """Builds the forward pass of the deepspeech2 model.
-
-    Args:
-      inputs: The input list of the model.
-      phase_train: True during training. False during evaluation.
-      nclass: Number of classes that the input spectrogram can belong to.
-
-    Returns:
-      A BuildNetworkResult which contains the logits and model-specific extra
-        information.
-    """
-    inputs = inputs[0]  # Get the spectrogram feature.
-
-    # Two cnn layers.
-    inputs = self._conv_bn_layer(
-        inputs,
-        padding=(20, 5),
-        filters=DeepSpeech2Model.CONV_FILTERS,
-        kernel_size=(41, 11),
-        strides=(2, 2),
-        layer_id=1,
-        training=phase_train)
-
-    inputs = self._conv_bn_layer(
-        inputs,
-        padding=(10, 5),
-        filters=DeepSpeech2Model.CONV_FILTERS,
-        kernel_size=(21, 11),
-        strides=(2, 1),
-        layer_id=2,
-        training=phase_train)
-
-    # output of conv_layer2 with the shape of
-    # [batch_size (N), times (T), features (F), channels (C)].
-    # Convert the conv output to rnn input.
-
-    # batch_size = tf.shape(inputs)[0]
-    feat_size = inputs.get_shape().as_list()[2]
-    inputs = tf.reshape(
-        inputs,
-        [self.batch_size, -1, feat_size * DeepSpeech2Model.CONV_FILTERS])
-
-    # RNN layers.
-    rnn_cell = DeepSpeech2Model.SUPPORTED_RNNS[self.rnn_type]
-    for layer_counter in xrange(self.num_rnn_layers):
-      # No batch normalization on the first layer.
-      use_batch_norm = (layer_counter != 0)
-      inputs = self._rnn_layer(inputs, rnn_cell, self.rnn_hidden_size,
-                               layer_counter + 1, use_batch_norm,
-                               self.is_bidirectional, phase_train)
-
-    # FC layer with batch norm.
-    inputs = self._batch_norm(inputs, phase_train)
-    logits = tf.layers.dense(inputs, nclass, use_bias=self.use_bias)
-
-    return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
-
-  def loss_function(self, inputs, build_network_result):
-    """Computes the ctc loss for the current batch of predictions.
-
-    Args:
-      inputs: the input list of the model.
-      build_network_result: a BuildNetworkResult returned by build_network().
-
-    Returns:
-      The loss tensor of the model.
-    """
-    logits = build_network_result.logits
-    actual_time_steps = inputs[2]
-    probs = tf.nn.softmax(logits)
-    ctc_time_steps = tf.shape(probs)[1]
-    ctc_input_length = tf.to_float(
-        tf.multiply(actual_time_steps, ctc_time_steps))
-    ctc_input_length = tf.to_int32(
-        tf.floordiv(ctc_input_length, tf.to_float(self.max_time_steps)))
-
-    label_length = inputs[3]
-    label_length = tf.to_int32(tf.squeeze(label_length))
-    ctc_input_length = tf.to_int32(tf.squeeze(ctc_input_length))
-
-    labels = inputs[1]
-    sparse_labels = tf.to_int32(
-        tf.keras.backend.ctc_label_dense_to_sparse(labels, label_length))
-    y_pred = tf.log(
-        tf.transpose(probs, perm=[1, 0, 2]) + tf.keras.backend.epsilon())
-
-    losses = tf.expand_dims(
-        tf.nn.ctc_loss(
-            labels=sparse_labels,
-            inputs=y_pred,
-            sequence_length=ctc_input_length,
-            ignore_longer_outputs_than_inputs=True),
-        axis=1)
-    loss = tf.reduce_mean(losses)
-    return loss
-
-  PROBABILITY_TENSOR = 'deepspeech2_prob'
-  LABEL_TENSOR = 'deepspeech2_label'
-
-  def accuracy_function(self, inputs, logits):
-    """Returns the ops to evaluate the model performance."""
-    # Get probabilities of each predicted class
-    probs = tf.nn.softmax(logits)
-    assert probs.shape.as_list()[0] == self.batch_size
-    return {
-        (constants.UNREDUCED_ACCURACY_OP_PREFIX + self.PROBABILITY_TENSOR):
-            probs,
-        (constants.UNREDUCED_ACCURACY_OP_PREFIX + self.LABEL_TENSOR):
-            inputs[1],
-    }
-
-  def postprocess(self, results):
-    """Postprocess results returned from model in Python."""
-    probs = results[self.PROBABILITY_TENSOR]
-
-    total_wer, total_cer = 0, 0
-    speech_labels = " abcdefghijklmnopqrstuvwxyz'-"
-    greedy_decoder = DeepSpeechDecoder(speech_labels)
-
-    # Evaluate the performance using WER (Word Error Rate) and CER (Character
-    # Error Rate) as metrics.
-    targets = results[self.LABEL_TENSOR]  # The ground truth transcript
-    for i in range(self.batch_size):
-      # Decode string.
-      predicted_str = greedy_decoder.decode_logits(probs[i])
-      expected_str = greedy_decoder.decode(targets[i])
-      # Compute CER.
-      total_cer += (greedy_decoder.cer(predicted_str, expected_str) /
-                    len(expected_str))
-      # Compute WER.
-      total_wer += (greedy_decoder.wer(predicted_str, expected_str) /
-                    len(expected_str.split()))
-
-    # Get mean value
-    total_cer /= self.batch_size
-    total_wer /= self.batch_size
-
-    log_fn('total CER: {:f}; total WER: {:f}; total example: {:d}.'.format(
-        total_cer, total_wer, self.batch_size))
-    # TODO(laigd): get rid of top_N_accuracy bindings in benchmark_cnn.py
-    return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Wrap the official recommendation model in a tf_cnn_benchmarks Model.
-
-This allows the recommendation NCF model to be used in tf_cnn_benchmarks.
-Currently, the implementation is fairly hacky, because tf_cnn_benchmarks is
-intended to be used only with CNNs.
-
-Only synthetic data with 1 GPU is currently supported.
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-import tensorflow.compat.v1 as tf
-
-from models import model
-
-
-# Obtained by running the official NCF model with the following command:
-#     python ncf_main.py  --dataset ml-20m
-# and printing the number of users and items here:
-# https://github.com/tensorflow/models/blob/d089975f630a8a01be63e45ef08a31be14bb96b4/official/recommendation/data_preprocessing.py#L68
-_NUM_USERS_20M = 138493
-_NUM_ITEMS_20M = 26744
-
-
-# TODO(reedwm): Support multi-GPU. Currently keras layers, which this model
-# uses, ignore variable_scopes, which we rely on for multi-GPU support.
-# TODO(reedwm): Support real data. This will require a significant refactor.
-# TODO(reedwm): All-reduce IndexedSlices more effectively.
-# TODO(reedwm): Support the 1M variant of this model.
-
-
-class NcfModel(model.Model):
-  r"""A model.Model wrapper around the official NCF recommendation model.
-
-  To do an NCF run with synthetic data that roughly matches what the official
-  model does, run:
-
-  python tf_cnn_benchmarks.py --optimizer=adam --model=ncf --batch_size=65536 \
-      --weight_decay=0 --sparse_to_dense_grads
-  """
-
-  def __init__(self, params=None):
-    super(NcfModel, self).__init__(
-        'official_ncf', batch_size=2048, learning_rate=0.0005,
-        fp16_loss_scale=128, params=params)
-    if self.fp16_vars:
-      raise ValueError('NCF model only supports float32 variables for now.')
-
-  def build_network(self, inputs, phase_train=True, nclass=1001):
-    try:
-      from official.recommendation import neumf_model  # pylint: disable=g-import-not-at-top
-    except ImportError as e:
-      if 'neumf_model' not in e.message:
-        raise
-      raise ImportError('To use the experimental NCF model, you must clone the '
-                        'repo https://github.com/tensorflow/models and add '
-                        'tensorflow/models to the PYTHONPATH.')
-    del nclass
-
-    users, items, _ = inputs
-    params = {
-        'num_users': _NUM_USERS_20M,
-        'num_items': _NUM_ITEMS_20M,
-        'model_layers': (256, 256, 128, 64),
-        'mf_dim': 64,
-        'mf_regularization': 0,
-        'mlp_reg_layers': (0, 0, 0, 0),
-        'use_tpu': False
-    }
-    user_input = tf.keras.layers.Input(tensor=users, name='user_input')
-    item_input = tf.keras.layers.Input(tensor=items, name='item_input')
-    if self.data_type == tf.float32:
-      keras_model = neumf_model.construct_model(user_input, item_input, params)
-      logits = keras_model.output
-    else:
-      assert self.data_type == tf.float16
-      old_floatx = tf.keras.backend.floatx()
-      try:
-        tf.keras.backend.set_floatx('float16')
-        # We cannot rely on the variable_scope's fp16 custom getter here,
-        # because the NCF model uses keras layers, which ignore variable scopes.
-        # So we use a variable_creator_scope instead.
-        with tf.variable_creator_scope(_fp16_variable_creator):
-          keras_model = neumf_model.construct_model(user_input, item_input,
-                                                    params)
-        logits = tf.cast(keras_model.output, tf.float32)
-      finally:
-        tf.keras.backend.set_floatx(old_floatx)
-    return model.BuildNetworkResult(logits=logits, extra_info=None)
-
-  def loss_function(self, inputs, build_network_result):
-    logits = build_network_result.logits
-
-    # Softmax with the first column of ones is equivalent to sigmoid.
-    # TODO(reedwm): Actually, the first column should be zeros to be equivalent
-    # to sigmoid. But, we keep it at ones to match the official models.
-    logits = tf.concat([tf.ones(logits.shape, dtype=logits.dtype), logits],
-                       axis=1)
-
-    return tf.losses.sparse_softmax_cross_entropy(
-        labels=inputs[2],
-        logits=logits
-    )
-
-  def get_synthetic_inputs(self, input_name, nclass):
-    """Returns the ops to generate synthetic inputs and labels."""
-    def users_init_val():
-      return tf.random_uniform((self.batch_size, 1), minval=0,
-                               maxval=_NUM_USERS_20M, dtype=tf.int32)
-    users = tf.Variable(users_init_val, dtype=tf.int32, trainable=False,
-                        collections=[tf.GraphKeys.LOCAL_VARIABLES],
-                        name='synthetic_users')
-    def items_init_val():
-      return tf.random_uniform((self.batch_size, 1), minval=0,
-                               maxval=_NUM_ITEMS_20M, dtype=tf.int32)
-    items = tf.Variable(items_init_val, dtype=tf.int32, trainable=False,
-                        collections=[tf.GraphKeys.LOCAL_VARIABLES],
-                        name='synthetic_items')
-
-    def labels_init_val():
-      return tf.random_uniform((self.batch_size,), minval=0, maxval=2,
-                               dtype=tf.int32)
-    labels = tf.Variable(labels_init_val, dtype=tf.int32, trainable=False,
-                         collections=[tf.GraphKeys.LOCAL_VARIABLES],
-                         name='synthetic_labels')
-
-    return [users, items, labels]
-
-  def get_input_shapes(self, subset):
-    del subset
-    return [[self.batch_size, 1], [self.batch_size, 1], [self.batch_size]]
-
-  def get_input_data_types(self, subset):
-    del subset
-    return [self.int32, tf.int32, tf.int32]
-
-
-def _fp16_variable_creator(next_creator, **kwargs):
-  """Variable creator to create variables in fp32 and cast them to fp16."""
-  dtype = kwargs.get('dtype', None)
-  initial_value = kwargs.get('initial_value', None)
-  if dtype is None:
-    if initial_value is not None and not callable(initial_value):
-      dtype = initial_value.dtype
-  if dtype == tf.float16:
-    if callable(initial_value):
-      new_initial_value = lambda: tf.cast(initial_value(), tf.float32)
-    else:
-      new_initial_value = tf.cast(initial_value, tf.float32)
-    kwargs['dtype'] = tf.float32
-    kwargs['initial_value'] = new_initial_value
-    var = next_creator(**kwargs)
-    return tf.cast(var, dtype=tf.float16)
-  else:
-    return next_creator(**kwargs)
-
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/googlenet_model.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/googlenet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-"""Googlenet model configuration.
-
-References:
-  Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
-  Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich
-  Going deeper with convolutions
-  arXiv preprint arXiv:1409.4842 (2014)
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from models import model
-
-
-class GooglenetModel(model.CNNModel):
-  """GoogLeNet."""
-
-  def __init__(self, params=None):
-    super(GooglenetModel, self).__init__(
-        'googlenet', 224, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-
-    def inception_v1(cnn, k, l, m, n, p, q):
-      cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)],
-              [('conv', n, 1, 1), ('conv', p, 5, 5)],
-              [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]]
-      cnn.inception_module('incept_v1', cols)
-
-    cnn.conv(64, 7, 7, 2, 2)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    cnn.conv(64, 1, 1)
-    cnn.conv(192, 3, 3)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    inception_v1(cnn, 64, 96, 128, 16, 32, 32)
-    inception_v1(cnn, 128, 128, 192, 32, 96, 64)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    inception_v1(cnn, 192, 96, 208, 16, 48, 64)
-    inception_v1(cnn, 160, 112, 224, 24, 64, 64)
-    inception_v1(cnn, 128, 128, 256, 24, 64, 64)
-    inception_v1(cnn, 112, 144, 288, 32, 64, 64)
-    inception_v1(cnn, 256, 160, 320, 32, 128, 128)
-    cnn.mpool(3, 3, 2, 2, mode='SAME')
-    inception_v1(cnn, 256, 160, 320, 32, 128, 128)
-    inception_v1(cnn, 384, 192, 384, 48, 128, 128)
-    cnn.apool(7, 7, 1, 1, mode='VALID')
-    cnn.reshape([-1, 1024])
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/inception_model.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/inception_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Inception model configuration.
-
-Includes multiple models: inception3, inception4, inception-resnet2.
-
-References:
-  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
-  Inception-v4, Inception-ResNet and the Impact of Residual Connections on
-  Learning
-
-  Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
-  Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich
-  Going Deeper with Convolutions
-  http://arxiv.org/pdf/1409.4842v1.pdf
-
-  Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
-  Zbigniew Wojna
-  Rethinking the Inception Architecture for Computer Vision
-  arXiv preprint arXiv:1512.00567 (2015)
-
-  Inception v3 model: http://arxiv.org/abs/1512.00567
-
-  Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from six.moves import xrange  # pylint: disable=redefined-builtin
-from models import model
-
-
-class Inceptionv3Model(model.CNNModel):
-  """InceptionV3."""
-
-  def __init__(self, auxiliary=False, params=None):
-    self._auxiliary = auxiliary
-    super(Inceptionv3Model, self).__init__(
-        'inception3', 299, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-    def inception_v3_a(cnn, n):
-      cols = [[('conv', 64, 1, 1)], [('conv', 48, 1, 1), ('conv', 64, 5, 5)],
-              [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)],
-              [('apool', 3, 3, 1, 1, 'SAME'), ('conv', n, 1, 1)]]
-      cnn.inception_module('incept_v3_a', cols)
-
-    def inception_v3_b(cnn):
-      cols = [[('conv', 384, 3, 3, 2, 2, 'VALID')],
-              [('conv', 64, 1, 1),
-               ('conv', 96, 3, 3),
-               ('conv', 96, 3, 3, 2, 2, 'VALID')],
-              [('mpool', 3, 3, 2, 2, 'VALID')]]
-      cnn.inception_module('incept_v3_b', cols)
-
-    def inception_v3_c(cnn, n):
-      cols = [[('conv', 192, 1, 1)],
-              [('conv', n, 1, 1), ('conv', n, 1, 7), ('conv', 192, 7, 1)],
-              [('conv', n, 1, 1), ('conv', n, 7, 1), ('conv', n, 1, 7),
-               ('conv', n, 7, 1), ('conv', 192, 1, 7)],
-              [('apool', 3, 3, 1, 1, 'SAME'), ('conv', 192, 1, 1)]]
-      cnn.inception_module('incept_v3_c', cols)
-
-    def inception_v3_d(cnn):
-      cols = [[('conv', 192, 1, 1), ('conv', 320, 3, 3, 2, 2, 'VALID')],
-              [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 192, 7, 1),
-               ('conv', 192, 3, 3, 2, 2, 'VALID')],
-              [('mpool', 3, 3, 2, 2, 'VALID')]]
-      cnn.inception_module('incept_v3_d', cols)
-
-    def inception_v3_e(cnn, pooltype):
-      cols = [[('conv', 320, 1, 1)], [('conv', 384, 1, 1), ('conv', 384, 1, 3)],
-              [('share',), ('conv', 384, 3, 1)],
-              [('conv', 448, 1, 1), ('conv', 384, 3, 3), ('conv', 384, 1, 3)],
-              [('share',), ('share',), ('conv', 384, 3, 1)],
-              [('mpool' if pooltype == 'max' else 'apool', 3, 3, 1, 1, 'SAME'),
-               ('conv', 192, 1, 1)]]
-      cnn.inception_module('incept_v3_e', cols)
-
-    def incept_v3_aux(cnn):
-      assert cnn.aux_top_layer is None
-      cnn.aux_top_layer = cnn.top_layer
-      cnn.aux_top_size = cnn.top_size
-      with cnn.switch_to_aux_top_layer():
-        cnn.apool(5, 5, 3, 3, mode='VALID')
-        cnn.conv(128, 1, 1, mode='SAME')
-        cnn.conv(768, 5, 5, mode='VALID', stddev=0.01)
-        cnn.reshape([-1, 768])
-
-    cnn.use_batch_norm = True
-    cnn.conv(32, 3, 3, 2, 2, mode='VALID')   # 299 x 299 x 3
-    cnn.conv(32, 3, 3, 1, 1, mode='VALID')   # 149 x 149 x 32
-    cnn.conv(64, 3, 3, 1, 1, mode='SAME')    # 147 x 147 x 64
-    cnn.mpool(3, 3, 2, 2, mode='VALID')      # 147 x 147 x 64
-    cnn.conv(80, 1, 1, 1, 1, mode='VALID')   # 73 x 73 x 80
-    cnn.conv(192, 3, 3, 1, 1, mode='VALID')  # 71 x 71 x 192
-    cnn.mpool(3, 3, 2, 2, 'VALID')           # 35 x 35 x 192
-    inception_v3_a(cnn, 32)                  # 35 x 35 x 256 mixed.
-    inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_1.
-    inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_2
-    inception_v3_b(cnn)                      # 17 x 17 x 768 mixed_3
-    inception_v3_c(cnn, 128)                 # 17 x 17 x 768 mixed_4
-    inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_5
-    inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_6
-    inception_v3_c(cnn, 192)                 # 17 x 17 x 768 mixed_7
-    if self._auxiliary:
-      incept_v3_aux(cnn)                     # Auxillary Head logits
-    inception_v3_d(cnn)                      # 17 x 17 x 1280 mixed_8
-    inception_v3_e(cnn, 'avg')               # 8 x 8 x 2048 mixed_9
-    inception_v3_e(cnn, 'max')               # 8 x 8 x 2048 mixed_10
-    cnn.apool(8, 8, 1, 1, 'VALID')           # 8 x 8 x 2048
-    cnn.reshape([-1, 2048])                  # 1 x 1 x 2048
-
-
-# Stem functions
-def inception_v4_sa(cnn):
-  cols = [[('mpool', 3, 3, 2, 2, 'VALID')], [('conv', 96, 3, 3, 2, 2, 'VALID')]]
-  cnn.inception_module('incept_v4_sa', cols)
-
-
-def inception_v4_sb(cnn):
-  cols = [[('conv', 64, 1, 1), ('conv', 96, 3, 3, 1, 1, 'VALID')],
-          [('conv', 64, 1, 1), ('conv', 64, 7, 1), ('conv', 64, 1, 7),
-           ('conv', 96, 3, 3, 1, 1, 'VALID')]]
-  cnn.inception_module('incept_v4_sb', cols)
-
-
-def inception_v4_sc(cnn):
-  cols = [[('conv', 192, 3, 3, 2, 2, 'VALID')],
-          [('mpool', 3, 3, 2, 2, 'VALID')]]
-  cnn.inception_module('incept_v4_sc', cols)
-
-
-# Reduction functions
-def inception_v4_ra(cnn, k, l, m, n):
-  cols = [
-      [('mpool', 3, 3, 2, 2, 'VALID')], [('conv', n, 3, 3, 2, 2, 'VALID')],
-      [('conv', k, 1, 1), ('conv', l, 3, 3), ('conv', m, 3, 3, 2, 2, 'VALID')]
-  ]
-  cnn.inception_module('incept_v4_ra', cols)
-
-
-def inception_v4_rb(cnn):
-  cols = [[('mpool', 3, 3, 2, 2, 'VALID')],
-          [('conv', 192, 1, 1), ('conv', 192, 3, 3, 2, 2, 'VALID')],
-          [('conv', 256, 1, 1), ('conv', 256, 1, 7), ('conv', 320, 7, 1),
-           ('conv', 320, 3, 3, 2, 2, 'VALID')]]
-  cnn.inception_module('incept_v4_rb', cols)
-
-
-class Inceptionv4Model(model.CNNModel):
-  """Inceptionv4."""
-
-  def __init__(self, params=None):
-    super(Inceptionv4Model, self).__init__(
-        'inception4', 299, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-    def inception_v4_a(cnn):
-      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 96, 1, 1)],
-              [('conv', 96, 1, 1)], [('conv', 64, 1, 1), ('conv', 96, 3, 3)],
-              [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)]]
-      cnn.inception_module('incept_v4_a', cols)
-
-    def inception_v4_b(cnn):
-      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 128, 1, 1)],
-              [('conv', 384, 1, 1)],
-              [('conv', 192, 1, 1), ('conv', 224, 1, 7), ('conv', 256, 7, 1)],
-              [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 224, 7, 1),
-               ('conv', 224, 1, 7), ('conv', 256, 7, 1)]]
-      cnn.inception_module('incept_v4_b', cols)
-
-    def inception_v4_c(cnn):
-      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 256, 1, 1)],
-              [('conv', 256, 1, 1)], [('conv', 384, 1, 1), ('conv', 256, 1, 3)],
-              [('share',), ('conv', 256, 3, 1)],
-              [('conv', 384, 1, 1), ('conv', 448, 1, 3), ('conv', 512, 3, 1),
-               ('conv', 256, 3, 1)], [('share',), ('share',), ('share',),
-                                      ('conv', 256, 1, 3)]]
-      cnn.inception_module('incept_v4_c', cols)
-
-    cnn.use_batch_norm = True
-    cnn.conv(32, 3, 3, 2, 2, mode='VALID')
-    cnn.conv(32, 3, 3, 1, 1, mode='VALID')
-    cnn.conv(64, 3, 3)
-    inception_v4_sa(cnn)
-    inception_v4_sb(cnn)
-    inception_v4_sc(cnn)
-    for _ in xrange(4):
-      inception_v4_a(cnn)
-    inception_v4_ra(cnn, 192, 224, 256, 384)
-    for _ in xrange(7):
-      inception_v4_b(cnn)
-    inception_v4_rb(cnn)
-    for _ in xrange(3):
-      inception_v4_c(cnn)
-    cnn.spatial_mean()
-    cnn.dropout(0.8)
--- a/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
+++ b/TensorFlow2x/ComputeVision/Classification/benchmarks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
-# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-"""Lenet model configuration.
-
-References:
-  LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner
-  Gradient-based learning applied to document recognition
-  Proceedings of the IEEE (1998)
-"""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from models import model
-
-
-class Lenet5Model(model.CNNModel):
-  """Lenet5."""
-
-  def __init__(self, params=None):
-    super(Lenet5Model, self).__init__('lenet5', 28, 32, 0.005, params=params)
-
-  def add_inference(self, cnn):
-    # Note: This matches TF's MNIST tutorial model
-    cnn.conv(32, 5, 5)
-    cnn.mpool(2, 2)
-    cnn.conv(64, 5, 5)
-    cnn.mpool(2, 2)
-    cnn.reshape([-1, 64 * 7 * 7])
-    cnn.affine(512)