update TF code

f0d87682 · qianyj · eaff6662 · f0d87682 · f0d87682 · f0d87682
Commit f0d87682 authored Aug 01, 2022 by qianyj
20 changed files
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/benchmark_cnn_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for benchmark_cnn."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import glob
+import os
+import re
+import unittest
+import mock
+import numpy as np
+import tensorflow.compat.v1 as tf
+from google.protobuf import text_format
+from tensorflow.core.framework import step_stats_pb2
+from tensorflow.core.profiler import tfprof_log_pb2
+from tensorflow.python.platform import test
+import benchmark_cnn
+import datasets
+import flags
+import preprocessing
+import test_util
+import variable_mgr_util
+from platforms import util as platforms_util
+def _check_has_gpu():
+  if not test.is_gpu_available(cuda_only=True):
+    raise ValueError(
+        """You have asked to run part or all of this on GPU, but it appears
+        that no GPU is available. If your machine has GPUs it is possible you
+        do not have a version of TensorFlow with GPU support. To build with GPU
+        support, add --config=cuda to the build flags.\n """)
+class TfCnnBenchmarksModelTest(tf.test.TestCase):
+  """Tests which are run with multiple models."""
+  def setUp(self):
+    super(TfCnnBenchmarksModelTest, self).setUp()
+    benchmark_cnn.setup(benchmark_cnn.make_params())
+  def get_model_name(self):
+    return None
+  # Return true to run tests that don't need to be run on every model.
+  # This should be done for one or two cheap models.
+  def extended_tests(self):
+    return False
+  # Return false to suppress actually running the model; this is useful
+  # for tests that are large.
+  def model_execution_test(self):
+    return False
+  # Return false to suppress actually saving and loading the model.
+  def model_save_load_test(self):
+    return False
+  def testSaveLoadModel(self):
+    _check_has_gpu()
+    if not self.get_model_name() or not self.model_save_load_test():
+      return
+    params = benchmark_cnn.make_params(
+        model=self.get_model_name(),
+        num_batches=1,
+        num_intra_threads=0,
+        num_inter_threads=0,
+        distortions=False,
+        batch_size=2,
+        variable_update='replicated',
+        num_warmup_batches=0,
+        num_gpus=2,
+        train_dir=test_util.get_temp_dir('testSaveLoadModel_' +
+                                         self.get_model_name()))
+    # Run one batch and save the model.
+    # Note that this uses a non-test session.
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    bench.run()
+    self.assertEqual(bench.init_global_step, 0)
+    # Clear the default graph.
+    tf.reset_default_graph()
+    # Test if checkpoint had been saved.
+    ckpt = tf.train.get_checkpoint_state(params.train_dir)
+    match = re.match(os.path.join(params.train_dir, r'model.ckpt-(\d+).index'),
+                     ckpt.model_checkpoint_path + '.index')
+    self.assertTrue(match)
+    self.assertGreaterEqual(int(match.group(1)), params.num_batches)
+    params = params._replace(num_batches=2)
+    # Reload the model
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    bench.run()
+    # Check if global step has been restored.
+    self.assertNotEqual(bench.init_global_step, 0)
+    ckpt = tf.train.get_checkpoint_state(params.train_dir)
+    match = re.match(os.path.join(params.train_dir, r'model.ckpt-(\d+).index'),
+                     ckpt.model_checkpoint_path + '.index')
+    self.assertTrue(match)
+    self.assertGreaterEqual(int(match.group(1)), params.num_batches)
+    # Check that the batch norm moving averages are restored from checkpoints
+    with tf.Graph().as_default():
+      bench = benchmark_cnn.BenchmarkCNN(params)
+      bench._build_model()
+      saver = tf.train.Saver(bench.variable_mgr.savable_variables())
+      with tf.Session(config=benchmark_cnn.create_config_proto(params)) as sess:
+        benchmark_cnn.load_checkpoint(saver, sess, params.train_dir)
+        sess.run(bench.variable_mgr.get_post_init_ops())
+        bn_moving_vars = [
+            v for v in tf.global_variables()
+            if '/batchnorm' in v.name and '/moving' in v.name
+        ]
+        self.assertGreater(len(bn_moving_vars), 0)
+        for moving_var in bn_moving_vars:
+          moving_var_value = sess.run(moving_var)
+          # Check that the moving means and moving variances have been restored
+          # by asserting they are not their default values of 0 and 1,
+          # respectively
+          if '/moving_mean' in moving_var.name:
+            self.assertFalse(np.array_equal(moving_var_value,
+                                            np.zeros(moving_var_value.shape,
+                                                     moving_var_value.dtype)))
+          else:
+            self.assertIn('/moving_variance', moving_var.name)
+            self.assertFalse(np.array_equal(moving_var_value,
+                                            np.ones(moving_var_value.shape,
+                                                    moving_var_value.dtype)))
+  def testModel(self):
+    _check_has_gpu()
+    if not self.get_model_name() or not self.model_execution_test():
+      return
+    params = benchmark_cnn.make_params(
+        model=self.get_model_name(),
+        num_batches=1,
+        num_intra_threads=1,
+        num_inter_threads=12,
+        batch_size=2,
+        distortions=False)
+    # Run this one; note that this uses a non-test session.
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    bench.run()
+  def testSendRecvVariables(self):
+    self._testVariables('parameter_server')
+    if self.extended_tests():
+      self._testVariables('parameter_server', local_parameter_device='CPU')
+      self._testVariables('parameter_server', optimizer='sgd')
+  def testReplicatedVariables(self):
+    self._testVariables('replicated')
+    if self.extended_tests():
+      self._testVariables('replicated', all_reduce_spec=None)
+      self._testVariables('replicated', use_fp16=True, fp16_vars=False)
+      self._testVariables(
+          'replicated',
+          all_reduce_spec=None,
+          use_fp16=True,
+          fp16_vars=False,
+          fp16_enable_auto_loss_scale=True,
+          fp16_inc_loss_scale_every_n=4)
+  def testIndependentVariables(self):
+    self._testVariables('independent')
+    self._testVariables(
+        'independent',
+        all_reduce_spec=None,
+        use_fp16=True,
+        fp16_vars=False,
+        fp16_enable_auto_loss_scale=True,
+        fp16_inc_loss_scale_every_n=4)
+  def testSummaryVerbosity(self):
+    self._testVariables('parameter_server', summary_verbosity=1)
+    if self.extended_tests():
+      self._testVariables('parameter_server', summary_verbosity=2)
+      self._testVariables('parameter_server', summary_verbosity=3)
+  def testStagedVariables(self):
+    self._testVariables('parameter_server', staged_vars=True)
+    if self.extended_tests():
+      self._testVariables('parameter_server', staged_vars=True,
+                          local_parameter_device='CPU')
+      self._testVariables('parameter_server', staged_vars=True, use_fp16=True,
+                          fp16_vars=True)
+  def _assert_correct_var_type(self, var, params):
+    if 'gpu_cached_inputs' not in var.name:
+      if params.use_fp16 and params.fp16_vars and 'batchnorm' not in var.name:
+        expected_type = tf.float16
+      else:
+        expected_type = tf.float32
+      self.assertEqual(var.dtype.base_dtype, expected_type)
+  def _testVariables(self,
+                     variable_update,
+                     summary_verbosity=0,
+                     local_parameter_device='GPU',
+                     staged_vars=False,
+                     optimizer='momentum',
+                     # TODO(b/80125832): Enable nccl in tests
+                     # all_reduce_spec='nccl',
+                     all_reduce_spec='',
+                     use_fp16=False,
+                     fp16_vars=False,
+                     fp16_enable_auto_loss_scale=False,
+                     fp16_inc_loss_scale_every_n=10):
+    if not self.get_model_name():
+      return
+    _check_has_gpu()
+    params = benchmark_cnn.make_params(
+        model=self.get_model_name(),
+        num_batches=1,
+        num_intra_threads=1,
+        num_inter_threads=12,
+        distortions=False,
+        variable_update=variable_update,
+        local_parameter_device=local_parameter_device,
+        num_gpus=2,
+        summary_verbosity=summary_verbosity,
+        staged_vars=staged_vars,
+        optimizer=optimizer,
+        all_reduce_spec=all_reduce_spec,
+        compact_gradient_transfer=False if all_reduce_spec == 'nccl' else True,
+        use_fp16=use_fp16,
+        fp16_loss_scale=2.,
+        fp16_vars=fp16_vars,
+        fp16_enable_auto_loss_scale=fp16_enable_auto_loss_scale,
+        fp16_inc_loss_scale_every_n=fp16_inc_loss_scale_every_n,
+    )
+    # Test building models using multiple GPUs, but don't
+    # run them.
+    with self.test_session(graph=tf.Graph()):
+      bench = benchmark_cnn.BenchmarkCNN(params)
+      bench._build_model()
+      # Rough validation of variable type and placement, depending on mode.
+      all_vars = tf.global_variables() + tf.local_variables()
+      if params.variable_update == 'parameter_server':
+        for v in all_vars:
+          tf.logging.debug('var: %s' % v.name)
+          match = re.match(r'tower_(\d+)/v/gpu_cached_inputs:0', v.name)
+          if match:
+            self.assertEqual(v.device, '/device:GPU:%s' % match.group(1))
+          elif v.name.startswith('v/'):
+            self.assertEqual(v.device, '/device:%s:0' % local_parameter_device)
+            self._assert_correct_var_type(v, params)
+          elif v.name in ('input_processing/images:0',
+                          'input_processing/labels:0', 'init_learning_rate:0',
+                          'global_step:0', 'loss_scale:0',
+                          'loss_scale_normal_steps:0'):
+            self.assertEqual(v.device, '/device:CPU:0')
+          else:
+            raise ValueError('Unexpected variable %s' % v.name)
+      else:
+        v0_count = 0
+        v1_count = 0
+        for v in all_vars:
+          if v.name.startswith('tower_0/v0/'):
+            self.assertEqual(v.name, 'tower_0/v0/gpu_cached_inputs:0')
+            self.assertEqual(v.device, '/device:GPU:0')
+          elif v.name.startswith('tower_1/v1/'):
+            self.assertEqual(v.name, 'tower_1/v1/gpu_cached_inputs:0')
+            self.assertEqual(v.device, '/device:GPU:1')
+          elif v.name.startswith('v0/'):
+            v0_count += 1
+            self.assertEqual(v.device, '/device:GPU:0')
+            self._assert_correct_var_type(v, params)
+          elif v.name.startswith('v1/'):
+            v1_count += 1
+            self.assertEqual(v.device, '/device:GPU:1')
+            self._assert_correct_var_type(v, params)
+          elif v.name in ('input_processing/images:0',
+                          'input_processing/labels:0', 'init_learning_rate:0',
+                          'global_step:0', 'loss_scale:0',
+                          'loss_scale_normal_steps:0'):
+            self.assertEqual(v.device, '/device:CPU:0')
+          else:
+            raise ValueError('Unexpected variable %s' % v.name)
+        self.assertEqual(v0_count, v1_count)
+      # Validate summary ops in the model depending on verbosity level
+      summary_ops = tf.get_collection(tf.GraphKeys.SUMMARIES)
+      num_summary_ops = len(summary_ops)
+      self.assertEqual(num_summary_ops > 0, summary_verbosity > 0)
+      if summary_verbosity > 0:
+        has_affine_histogram = False
+        has_gradient_histogram = False
+        has_log_gradients_histogram = False
+        for op in summary_ops:
+          if '/gradients' in op.name:
+            has_gradient_histogram = True
+          elif '/affine' in op.name:
+            has_affine_histogram = True
+          elif 'log_gradients' in op.name:
+            has_log_gradients_histogram = True
+        self.assertEqual(summary_verbosity >= 3, has_affine_histogram)
+        self.assertEqual(summary_verbosity >= 3, has_gradient_histogram)
+        self.assertEqual(summary_verbosity >= 2, has_log_gradients_histogram)
+        if summary_verbosity == 1:
+          self.assertLess(num_summary_ops, 10)
+class TrivialModelTest(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'trivial'
+class TestVgg1Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'vgg11'
+class TestVgg19Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'vgg19'
+class TestLenet5Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'lenet'
+class TestGooglenetModel(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'googlenet'
+class TestOverfeatModel(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'overfeat'
+class TestAlexnetModel(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'alexnet'
+  def extended_tests(self):
+    return True
+class TestTrivialModel(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'trivial'
+class TestInceptionv3Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'inception3'
+  def extended_tests(self):
+    return True
+class TestInceptionv4Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'inception4'
+class TestResnet50Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'resnet50'
+  def model_save_load_test(self):
+    return True
+class TestResnet101Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'resnet101'
+class TestResnet152Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'resnet152'
+class TestResnet50V2Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'resnet50_v2'
+class TestResnet101V2Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'resnet101_v2'
+class TestResnet152V2Model(TfCnnBenchmarksModelTest):
+  def get_model_name(self):
+    return 'resnet152_v2'
+class TfCnnBenchmarksTest(tf.test.TestCase):
+  """Tests that benchmark_cnn runs correctly."""
+  def setUp(self):
+    super(TfCnnBenchmarksTest, self).setUp()
+    _check_has_gpu()
+    benchmark_cnn.setup(benchmark_cnn.make_params())
+  def _run_benchmark_cnn(self, params):
+    logs = []
+    benchmark_cnn.log_fn = test_util.print_and_add_to_list(logs)
+    benchmark_cnn.BenchmarkCNN(params).run()
+    return logs
+  def _run_benchmark_cnn_with_fake_images(self, params, images, labels):
+    logs = []
+    benchmark_cnn.log_fn = test_util.print_and_add_to_list(logs)
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    bench.input_preprocessor = preprocessing.TestImagePreprocessor(
+        params.batch_size * params.num_gpus,
+        [[params.batch_size, 227, 227, 3], [params.batch_size]],
+        params.num_gpus,
+        bench.model.data_type)
+    bench.dataset._queue_runner_required = True
+    bench.input_preprocessor.set_fake_data(images, labels)
+    bench.input_preprocessor.expected_subset = ('validation'
+                                                if params.eval else 'train')
+    bench.run()
+    return logs
+  def _run_benchmark_cnn_with_black_and_white_images(self, params):
+    """Runs BenchmarkCNN with black and white images.
+    A BenchmarkCNN is created and run with black and white images as input. Half
+    the images are black (i.e., filled with 0s) and half are white (i.e., filled
+    with 255s).
+    Args:
+      params: Params for BenchmarkCNN.
+    Returns:
+      A list of lines from the output of BenchmarkCNN.
+    """
+    # TODO(reedwm): Instead of generating images here, use black and white
+    # tfrecords by calling test_util.create_black_and_white_images().
+    effective_batch_size = params.batch_size * params.num_gpus
+    half_batch_size = effective_batch_size // 2
+    images = np.zeros((effective_batch_size, 227, 227, 3), dtype=np.float32)
+    images[half_batch_size:, :, :, :] = 255
+    labels = np.array([0] * half_batch_size + [1] * half_batch_size,
+                      dtype=np.int32)
+    return self._run_benchmark_cnn_with_fake_images(params, images, labels)
+  def _train_and_eval_local(self,
+                            params,
+                            check_output_values=False,
+                            max_final_loss=10.,
+                            skip=None,
+                            use_test_preprocessor=True):
+    # TODO(reedwm): check_output_values should default to True and be enabled
+    # on every test. Currently, if check_output_values=True and the calls to
+    # tf.set_random_seed(...) and np.seed(...) are passed certain seed values in
+    # benchmark_cnn.py, then most tests will fail. This indicates the tests
+    # are brittle and could fail with small changes when
+    # check_output_values=True, so check_output_values defaults to False for
+    # now.
+    def run_fn(run_type, inner_params):
+      del run_type
+      if use_test_preprocessor:
+        return [
+            self._run_benchmark_cnn_with_black_and_white_images(inner_params)
+        ]
+      else:
+        return [self._run_benchmark_cnn(inner_params)]
+    return test_util.train_and_eval(self, run_fn, params,
+                                    check_output_values=check_output_values,
+                                    max_final_loss=max_final_loss,
+                                    skip=skip)
+  def testAlexnet(self):
+    params = test_util.get_params('testAlexnet')._replace(
+        num_batches=30, init_learning_rate=0.01, model='alexnet')
+    self._train_and_eval_local(params)
+  def testNoPrintAccuracy(self):
+    params = test_util.get_params('testNoPrintAccuracy')._replace(
+        print_training_accuracy=False)
+    self._train_and_eval_local(params)
+  def testLowAccuracy(self):
+    params = test_util.get_params('testLowAccuracy')._replace(
+        print_training_accuracy=True, batch_size=5, num_batches=10)
+    # We force low accuracy by having each batch containing 10 identical images,
+    # each with a different label. This guarantees a top-1 accuracy of exactly
+    # 0.1 and a top-5 accuracy of exactly 0.5.
+    images = np.zeros((10, 227, 227, 3), dtype=np.float32)
+    labels = np.arange(10, dtype=np.int32)
+    logs = self._run_benchmark_cnn_with_fake_images(params, images, labels)
+    training_outputs = test_util.get_training_outputs_from_logs(
+        logs, params.print_training_accuracy)
+    last_output = training_outputs[-1]
+    # TODO(reedwm): These should be assertEqual but for some reason,
+    # occasionally the accuracies are lower (Running this test 500 times, these
+    # asserts failed twice). Investigate this problem.
+    self.assertLessEqual(last_output.top_1_accuracy, 0.1)
+    self.assertLessEqual(last_output.top_5_accuracy, 0.5)
+  def testParameterServer(self):
+    params = test_util.get_params('testParameterServer')
+    self._train_and_eval_local(params)
+  def testParameterServerStaged(self):
+    params = test_util.get_params('testParameterServerStaged')._replace(
+        staged_vars=True)
+    self._train_and_eval_local(params)
+  def testReplicated(self):
+    params = test_util.get_params('testReplicated')._replace(
+        variable_update='replicated')
+    self._train_and_eval_local(params)
+  def testIndependent(self):
+    params = test_util.get_params('testIndependent')._replace(
+        variable_update='independent')
+    self._train_and_eval_local(params)
+  def testForwardOnly(self):
+    params = test_util.get_params('testForwardOnly')._replace(forward_only=True)
+    # Evaluation is not supported with --forward_only, so we set skip='eval'.
+    self._train_and_eval_local(params, skip='eval')
+  def testForwardOnlyAndFreeze(self):
+    params = test_util.get_params('testForwardOnlyAndFreeze')._replace(
+        forward_only=True, freeze_when_forward_only=True, train_dir=None)
+    # Training is not supported with --freeze_when_forward_only.
+    self._train_and_eval_local(params, skip='eval_and_train_from_checkpoint')
+  def testNoDistortions(self):
+    params = test_util.get_params('testNoDistortions')._replace(
+        distortions=False)
+    self._train_and_eval_local(params)
+  def testCpuAsLocalParamDevice(self):
+    params = test_util.get_params('testCpuAsLocalParamDevice')._replace(
+        local_parameter_device='cpu')
+    self._train_and_eval_local(params)
+  def testNHWC(self):
+    params = test_util.get_params('testNHWC')._replace(data_format='NHWC')
+    self._train_and_eval_local(params)
+  def testCpuAsDevice(self):
+    params = test_util.get_params('testCpuAsDevice')._replace(
+        device='cpu', data_format='NHWC')  # NHWC required when --device=cpu
+    self._train_and_eval_local(params)
+  def testMomentumParameterServer(self):
+    params = test_util.get_params('testMomentumParameterServer')._replace(
+        optimizer='momentum', momentum=0.8)
+    self._train_and_eval_local(params)
+  def testRmspropReplicated(self):
+    params = test_util.get_params('testRmspropReplicated')._replace(
+        variable_update='replicated',
+        optimizer='rmsprop',
+        rmsprop_decay=0.8,
+        rmsprop_momentum=0.6,
+        rmsprop_epsilon=0.7,
+        init_learning_rate=0.01)
+    self._train_and_eval_local(params)
+  def testBatchGroupSize(self):
+    params = test_util.get_params('testBatchGroupSize')._replace(
+        batch_group_size=4, num_batches=100, num_warmup_batches=5)
+    self._train_and_eval_local(params)
+  def testGradientClip(self):
+    params = test_util.get_params('testGradientClip')._replace(
+        gradient_clip=100.0)
+    self._train_and_eval_local(params)
+  def testWeightDecay(self):
+    params = test_util.get_params('testWeightDecay')._replace(
+        weight_decay=0.0001)
+    self._train_and_eval_local(params)
+  def testNoLayers(self):
+    params = test_util.get_params('testNoLayers')._replace(use_tf_layers=False)
+    self._train_and_eval_local(params)
+  def testSaveModelSteps(self):
+    params = test_util.get_params('testSaveModelSteps')._replace(
+        save_model_steps=2, num_warmup_batches=0, num_batches=10,
+        max_ckpts_to_keep=3)
+    self._train_and_eval_local(params)
+    for i in range(1, 20 + 1):
+      # We train for 20 steps, since self._train_and_eval_local() does two
+      # training runs of 10 steps each. We save a checkpoint every 2 steps and
+      # keep the last 3 checkpoints, so at the end, we should have checkpoints
+      # for steps 16, 18, and 20.
+      matches = glob.glob(os.path.join(params.train_dir,
+                                       'model.ckpt-{}.*'.format(i)))
+      if i in (16, 18, 20):
+        self.assertTrue(matches)
+      else:
+        self.assertFalse(matches)
+  def testFp16WithFp32Vars(self):
+    params = test_util.get_params('testFp16WithFp32Vars')._replace(
+        use_fp16=True, fp16_vars=False, fp16_loss_scale=1.)
+    self._train_and_eval_local(params)
+  def testFp16WithFp16Vars(self):
+    params = test_util.get_params('testFp16WithFp16Vars')._replace(
+        use_fp16=True, fp16_vars=True)
+    self._train_and_eval_local(params)
+  def testXlaCompile(self):
+    params = test_util.get_params('testXlaCompile')._replace(xla_compile=True)
+    self._train_and_eval_local(params)
+  @unittest.skip('Fails for unknown reason')
+  def testXlaCompileWithFp16(self):
+    params = test_util.get_params('testXlaCompileWithFp16')._replace(
+        use_fp16=True, xla_compile=True)
+    self._train_and_eval_local(params)
+  def testGradientRepacking(self):
+    params = test_util.get_params('testGradientRepacking1')._replace(
+        gradient_repacking=2)
+    self._train_and_eval_local(params, skip='eval_and_train_from_checkpoint')
+    params = test_util.get_params('testGradientRepacking2')._replace(
+        gradient_repacking=2, use_fp16=True)
+    self._train_and_eval_local(params, skip='eval_and_train_from_checkpoint')
+  def testTraceFileChromeTraceFormat(self):
+    trace_file = os.path.join(self.get_temp_dir(),
+                              'testTraceFileChromeTraceFormat_tracefile')
+    params = test_util.get_params('testTraceFileChromeTraceFormat')._replace(
+        trace_file=trace_file, use_chrome_trace_format=True)
+    self._train_and_eval_local(params)
+    self.assertGreater(os.stat(trace_file).st_size, 0)
+  def testTraceFileStepStatsProto(self):
+    trace_file = os.path.join(self.get_temp_dir(),
+                              'testTraceFileStepStatsProto_tracefile')
+    params = test_util.get_params('testTraceFileStepStatsProto')._replace(
+        trace_file=trace_file, use_chrome_trace_format=False)
+    self._train_and_eval_local(params)
+    self.assertGreater(os.stat(trace_file).st_size, 0)
+    with open(trace_file) as f:
+      step_stats = step_stats_pb2.StepStats()
+      # The following statement should not raise an exception.
+      contents = f.read()
+      text_format.Merge(contents, step_stats)
+  def testTfprofFile(self):
+    tfprof_file = os.path.join(self.get_temp_dir(), 'testTfprofFile_tfproffile')
+    params = test_util.get_params('testTfprofFile')._replace(
+        tfprof_file=tfprof_file)
+    self._train_and_eval_local(params, skip='eval_and_train_from_checkpoint')
+    self.assertGreater(os.stat(tfprof_file).st_size, 0)
+    with open(tfprof_file, 'rb') as f:
+      profile_proto = tfprof_log_pb2.ProfileProto()
+      # The following statement should not raise an exception.
+      profile_proto.ParseFromString(f.read())
+  @unittest.skip('Fails for unknown reason')
+  def testMoveTrainDir(self):
+    params = test_util.get_params('testMoveTrainDir')
+    self._train_and_eval_local(params)
+    new_train_dir = params.train_dir + '_moved'
+    os.rename(params.train_dir, new_train_dir)
+    params = params._replace(train_dir=new_train_dir, eval=True)
+    self._run_benchmark_cnn_with_black_and_white_images(params)
+  @mock.patch('tensorflow.compat.v1.train.Saver')
+  @mock.patch('benchmark_cnn._get_checkpoint_to_load')
+  def testLoadCheckpoint(self, mock_checkpoint_to_load, mock_saver):
+    """Tests load checkpoint with full path to checkpoint."""
+    expected_checkpoint = '/path/to/checkpoints/model.ckpt-1243'
+    mock_checkpoint_to_load.return_value = expected_checkpoint
+    global_batch = benchmark_cnn.load_checkpoint(mock_saver,
+                                                 None,
+                                                 expected_checkpoint)
+    self.assertEqual(global_batch, 1243)
+  def testGetCheckpointToLoadFullPath(self):
+    """Tests passing full path."""
+    ckpt_path = '/foo/bar/model.ckpt-189'
+    full_path = benchmark_cnn._get_checkpoint_to_load(ckpt_path)
+    self.assertEqual(full_path, ckpt_path)
+  def testGetCheckpointToLoadException(self):
+    """Tests exception for directory without a checkpoint."""
+    ckpt_path = '/foo/bar/checkpoints'
+    self.assertRaises(benchmark_cnn.CheckpointNotFoundException,
+                      benchmark_cnn._get_checkpoint_to_load, ckpt_path)
+  @mock.patch('tensorflow.compat.v1.train.get_checkpoint_state')
+  def testGetCheckpointToLoad(self, mock_checkpoint_state):
+    """Tests passing path to checkpoint folder."""
+    expected_checkpoint = '/path/to/checkpoints/model.ckpt-1243'
+    mock_checkpoint_state.return_value = mock.Mock(
+        model_checkpoint_path=expected_checkpoint)
+    ckpt_path = '/path/to/checkpoints/'
+    full_path = benchmark_cnn._get_checkpoint_to_load(ckpt_path)
+    self.assertEqual(full_path, expected_checkpoint)
+  def testImagenetPreprocessor(self):
+    imagenet_dir = os.path.join(platforms_util.get_test_data_dir(),
+                                'fake_tf_record_data')
+    params = test_util.get_params('testImagenetPreprocessor')._replace(
+        data_dir=imagenet_dir, data_name='imagenet')
+    self._train_and_eval_local(params, use_test_preprocessor=False)
+  def testImagenetPreprocessorNoDistortions(self):
+    imagenet_dir = os.path.join(platforms_util.get_test_data_dir(),
+                                'fake_tf_record_data')
+    params = test_util.get_params(
+        'testImagenetPreprocessorNoDistortions')._replace(
+            data_dir=imagenet_dir, data_name='imagenet', distortions=False)
+    self._train_and_eval_local(params, use_test_preprocessor=False)
+  def testImagenetPreprocessorVerboseSummary(self):
+    imagenet_dir = os.path.join(platforms_util.get_test_data_dir(),
+                                'fake_tf_record_data')
+    params = test_util.get_params(
+        'testImagenetPreprocessorVerboseSummary')._replace(
+            data_dir=imagenet_dir, data_name='imagenet', distortions=False,
+            summary_verbosity=2)
+    self._train_and_eval_local(params, use_test_preprocessor=False)
+  def testCifar10SyntheticData(self):
+    params = test_util.get_params('testCifar10SyntheticData')._replace(
+        data_name='cifar10')
+    self._train_and_eval_local(params)
+  def testShiftRatio(self):
+    test_util.monkey_patch_base_cluster_manager()
+    params = benchmark_cnn.make_params(
+        data_name='imagenet',
+        data_dir=os.path.join(platforms_util.get_test_data_dir(),
+                              'fake_tf_record_data'),
+        job_name='worker',
+        worker_hosts='w1,w2,w3,w4',
+        ps_hosts='p1',
+        task_index=0)
+    self.assertEqual(
+        benchmark_cnn.BenchmarkCNN(params).input_preprocessor.shift_ratio, 0.0)
+    params = params._replace(task_index=3)
+    self.assertEqual(
+        benchmark_cnn.BenchmarkCNN(params).input_preprocessor.shift_ratio, 0.75)
+  def testDistributedReplicatedSavableVars(self):
+    test_util.monkey_patch_base_cluster_manager()
+    params = benchmark_cnn.make_params(
+        variable_update='distributed_replicated',
+        model='inception4',
+        data_name='imagenet',
+        data_dir=os.path.join(platforms_util.get_test_data_dir(),
+                              'fake_tf_record_data'),
+        job_name='worker',
+        worker_hosts='w1,w2,w3,w4',
+        ps_hosts='p1',
+        datasets_use_prefetch=False)
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    with tf.Graph().as_default():
+      bench._build_model()
+      savable_vars = bench.variable_mgr.savable_variables()
+      # Assert all global variables are in savable_vars
+      for v in tf.global_variables():
+        if not v.name.startswith(
+            variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/v0'):
+          self.assertEqual(v.name, 'global_step:0')
+        name = bench.variable_mgr._strip_port(v.name)
+        if name.startswith(variable_mgr_util.PS_SHADOW_VAR_PREFIX):
+          name = name[len(variable_mgr_util.PS_SHADOW_VAR_PREFIX + '/'):]
+        self.assertIn(name, savable_vars)
+        self.assertIn(savable_vars[name], tf.global_variables())
+      # Assert all local variables on the first tower are in savable_vars
+      for v in tf.local_variables():
+        if v.name.startswith('v0/'):
+          name = bench.variable_mgr._strip_port(v.name)
+          self.assertIn(name, savable_vars)
+  def _test_preprocessing_eval(self, image_height, image_width, output_height,
+                               output_width):
+    image = tf.fill((image_height, image_width, 3),
+                    tf.constant(128, dtype=tf.uint8))
+    params = benchmark_cnn.make_params()
+    new_image = preprocessing.eval_image(image, output_height, output_width, 0,
+                                         'bilinear', params.summary_verbosity)
+    with self.test_session() as sess:
+      new_image_value = sess.run(new_image)
+    self.assertAllEqual(new_image_value,
+                        np.full((output_height, output_width, 3), 128,
+                                dtype=np.uint8))
+  def testPreprocessingEval(self):
+    self._test_preprocessing_eval(10, 10, 4, 4)
+    self._test_preprocessing_eval(4, 4, 10, 10)
+    self._test_preprocessing_eval(1, 100, 100, 1)
+    self._test_preprocessing_eval(100, 1, 1, 100)
+    self._test_preprocessing_eval(1, 100, 1, 100)
+  def _test_preprocessing_traing(self, image_buf, image_color,
+                                 output_height, output_width, bbox,
+                                 batch_position, resize_method, distortions,
+                                 summary_verbosity, fuse_decode_and_crop):
+    new_image = preprocessing.train_image(
+        image_buf,
+        output_height,
+        output_width,
+        bbox,
+        batch_position,
+        resize_method,
+        distortions,
+        summary_verbosity=summary_verbosity,
+        fuse_decode_and_crop=fuse_decode_and_crop)
+    self.assertEqual(new_image.shape, [output_height, output_width, 3])
+    with self.test_session(use_gpu=True) as sess:
+      new_image_value = sess.run(new_image)
+    self.assertAllClose(
+        new_image_value,
+        np.full(
+            [output_height, output_width, 3],
+            image_color,
+            dtype=np.float32),
+        atol=50.,
+        rtol=0.)
+  def testPreprocessingTrain(self):
+    test_data_dir = os.path.join(platforms_util.get_test_data_dir(), 'images')
+    black_file = os.path.join(test_data_dir, 'black_image.jpg')
+    with open(black_file, 'rb') as f:
+      black_jpg_buffer = f.read()
+    white_file = os.path.join(test_data_dir, 'white_image.jpg')
+    with open(white_file, 'rb') as f:
+      white_jpg_buffer = f.read()
+    bbox = tf.zeros((1, 0, 4), dtype=tf.float32)
+    batch_position = 0
+    # Each size config is (output_height, output_width, resize_method)
+    size_configs = [(100, 100, 'round_robin'), (150, 10, 'bilinear'),
+                    (10, 150, 'nearest')]
+    # Each image config is (image_buf, image_color)
+    image_configs = [(white_jpg_buffer, 255), (black_jpg_buffer, 0)]
+    for (image_buf, image_color) in image_configs:
+      for output_height, output_width, resize_method in size_configs:
+        for distortions in [True, False]:
+          for summary_verbosity in [0, 2]:
+            for fuse_decode_and_crop in [True, False]:
+              self._test_preprocessing_traing(
+                  image_buf, image_color, output_height, output_width, bbox,
+                  batch_position, resize_method, distortions, summary_verbosity,
+                  fuse_decode_and_crop)
+  def _test_learning_rate(self, params, global_step_to_expected_learning_rate):
+    self.longMessage = True  # pylint: disable=invalid-name
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    with tf.Graph().as_default() as graph:
+      bench._build_model()
+      global_step = graph.get_tensor_by_name('global_step:0')
+      learning_rate = graph.get_tensor_by_name('learning_rate_tensor:0')
+      with self.test_session(graph=graph, use_gpu=True) as sess:
+        items = global_step_to_expected_learning_rate.items()
+        for global_step_val, expected_learning_rate in items:
+          self.assertAlmostEqual(sess.run(learning_rate,
+                                          {global_step: global_step_val}),
+                                 expected_learning_rate,
+                                 msg='at global_step:{}'.
+                                 format(global_step_val))
+  def testLearningRateModelSpecificResNet(self):
+    params = benchmark_cnn.make_params(model='resnet50',
+                                       batch_size=256,
+                                       variable_update='parameter_server',
+                                       num_gpus=1)
+    self._test_learning_rate(params, {
+        0: 0,
+        150136: 0.128,
+        150137: 0.0128,
+        300273: 0.0128,
+        300274: 0.00128,
+        10000000: 0.0000128
+    })
+  def testLearningRateUserProvidedInitLr(self):
+    params = benchmark_cnn.make_params(model='resnet50',
+                                       batch_size=256,
+                                       variable_update='replicated',
+                                       init_learning_rate=1.)
+    self._test_learning_rate(params, {
+        0: 1.,
+        10000000: 1.
+    })
+  def testLearningRateUserProvidedInitLrAndWarmup(self):
+    params = benchmark_cnn.make_params(model='resnet50',
+                                       batch_size=256,
+                                       variable_update='replicated',
+                                       init_learning_rate=1.,
+                                       num_learning_rate_warmup_epochs=5)
+    self._test_learning_rate(params, {
+        0: 0.,
+        12511: 0.5,
+        25022: 1.,
+        10000000: 1.
+    })
+  def testLearningRateUserProvidedDecayInfo(self):
+    params = benchmark_cnn.make_params(model='resnet50',
+                                       init_learning_rate=1.,
+                                       learning_rate_decay_factor=0.5,
+                                       num_epochs_per_decay=2,
+                                       minimum_learning_rate=0.3750,
+                                       batch_size=32)
+    self._test_learning_rate(params, {
+        0: 1.,
+        80071: 1.,
+        80072: 0.5,
+        160143: 0.5,
+        160144: 0.375,
+        10000000: 0.375
+    })
+  def testLearningRateUserProvidedZeroDecay(self):
+    params = benchmark_cnn.make_params(model='resnet50',
+                                       num_learning_rate_warmup_epochs=0,
+                                       learning_rate_decay_factor=0.5,
+                                       num_epochs_per_decay=0,
+                                       minimum_learning_rate=0.3750,
+                                       batch_size=32)
+    with self.assertRaises(ValueError):
+      with tf.Graph().as_default():
+        # This will fail because params.learning_rate_decay_factor cannot be
+        # nonzero if params.num_epochs_per_decay is zero.
+        benchmark_cnn.BenchmarkCNN(params)._build_model()
+  def testLearningRateUserProvidedSchedule(self):
+    params = benchmark_cnn.make_params(
+        model='trivial',
+        batch_size=32,
+        piecewise_learning_rate_schedule='1;3;.1;5;.01')
+    self._test_learning_rate(params, {
+        0: 1.,
+        120108: 1.,
+        120109: 0.1,
+        200181: 0.1,
+        200182: 0.01,
+        100000000: 0.01
+    })
+  def testNumBatchesAndEpochs(self):
+    params = benchmark_cnn.make_params()
+    batches, epochs = benchmark_cnn.get_num_batches_and_epochs(params, 10, 100)
+    self.assertEqual(batches, benchmark_cnn._DEFAULT_NUM_BATCHES)
+    self.assertAlmostEqual(epochs,
+                           float(benchmark_cnn._DEFAULT_NUM_BATCHES) / 10)
+    params = benchmark_cnn.make_params(num_batches=21)
+    batches, epochs = benchmark_cnn.get_num_batches_and_epochs(params, 25, 50)
+    self.assertEqual(batches, 21)
+    self.assertAlmostEqual(epochs, 10.5)
+    params = benchmark_cnn.make_params(num_epochs=3)
+    batches, epochs = benchmark_cnn.get_num_batches_and_epochs(params, 2, 3)
+    self.assertEqual(batches, 5)
+    self.assertAlmostEqual(epochs, 10./3.)
+    params = benchmark_cnn.make_params(num_epochs=4)
+    batches, epochs = benchmark_cnn.get_num_batches_and_epochs(params, 2, 3)
+    self.assertEqual(batches, 6)
+    self.assertAlmostEqual(epochs, 4)
+    with self.assertRaises(ValueError):
+      params = benchmark_cnn.make_params(num_batches=100, num_epochs=100)
+      benchmark_cnn.get_num_batches_and_epochs(params, 1, 1)
+  def _testEvalDuringTraining(self, params, expected_num_eval_batches_found):
+    # The idea of this test is that all train images are black and all eval
+    # images are white. We pass the images through the TestModel, and ensure
+    # the outputs are as expected.
+    batch_size = params.batch_size
+    eval_batch_size = params.eval_batch_size or params.batch_size
+    class TestModel(test_util.TestCNNModel):
+      def __init__(self):
+        super(TestModel, self).__init__()
+        self.depth = 3
+      def add_inference(self, cnn):
+        if cnn.phase_train:
+          # This will allow us to test that 100 is only added during training
+          # and not during eval.
+          cnn.top_layer += 100
+          assert cnn.top_layer.shape[0] == batch_size
+        else:
+          assert cnn.top_layer.shape[0] == eval_batch_size
+        # Reduce the image to a single number. The number should be (-1 + 100)
+        # during training and 1 during testing.
+        cnn.top_layer = tf.reshape(cnn.top_layer, (cnn.top_layer.shape[0], -1))
+        cnn.top_layer = tf.reduce_mean(cnn.top_layer, axis=1)
+        cnn.top_layer = tf.reshape(cnn.top_layer,
+                                   (cnn.top_layer.shape[0], 1, 1, 1))
+        cnn.top_size = 1
+        trainable_vars = tf.trainable_variables()
+        # The super method will compute image*A*B, where A=1 and B=2.
+        super(TestModel, self).add_inference(cnn)
+        if not cnn.phase_train:
+          # Assert no new variables were added, since they should be reused from
+          # training.
+          assert len(trainable_vars) == len(tf.trainable_variables())
+    model = TestModel()
+    dataset = datasets.ImagenetDataset(params.data_dir)
+    logs = []
+    bench_cnn = benchmark_cnn.BenchmarkCNN(params, model=model, dataset=dataset)
+    with test_util.monkey_patch(benchmark_cnn,
+                                log_fn=test_util.print_and_add_to_list(logs)):
+      bench_cnn.run()
+    training_outputs = test_util.get_training_outputs_from_logs(
+        logs, print_training_accuracy=False)
+    self.assertEqual(len(training_outputs), params.num_batches)
+    expected_training_output = (-1 + 100) * 1 * 2
+    for training_output in training_outputs:
+      self.assertEqual(training_output.loss, expected_training_output)
+    eval_outputs = test_util.get_evaluation_outputs_from_logs(logs)
+    self.assertTrue(eval_outputs)
+    expected_eval_output = 1 * 1 * 2
+    for eval_output in eval_outputs:
+      self.assertEqual(eval_output.top_1_accuracy, expected_eval_output)
+      self.assertEqual(eval_output.top_5_accuracy, expected_eval_output)
+    num_eval_batches_found = 0
+    eval_batch_regex = re.compile(r'^\d+\t[0-9.]+ examples/sec$')
+    for log in logs:
+      if eval_batch_regex.match(log):
+        num_eval_batches_found += 1
+    self.assertEqual(num_eval_batches_found, expected_num_eval_batches_found)
+  def testEvalDuringTraining(self):
+    data_dir = test_util.create_black_and_white_images()
+    base_params = test_util.get_params('testEvalDuringTraining')
+    train_dir = base_params.train_dir
+    base_params = base_params._replace(
+        train_dir=None, print_training_accuracy=False, num_warmup_batches=0,
+        num_batches=7, num_eval_batches=2, display_every=1,
+        init_learning_rate=0, weight_decay=0,
+        distortions=False, data_dir=data_dir)
+    expected_num_eval_batches_found = (
+        base_params.num_eval_batches * (base_params.num_batches // 2 + 1))
+    # Test --eval_during_training_every_n_steps
+    self._testEvalDuringTraining(
+        base_params._replace(eval_during_training_every_n_steps=2,
+                             variable_update='parameter_server'),
+        expected_num_eval_batches_found)
+    self._testEvalDuringTraining(
+        base_params._replace(eval_during_training_every_n_steps=2,
+                             variable_update='replicated'),
+        expected_num_eval_batches_found)
+    self._testEvalDuringTraining(
+        base_params._replace(eval_during_training_every_n_steps=2,
+                             variable_update='replicated',
+                             summary_verbosity=2,
+                             save_summaries_steps=2,
+                             datasets_use_prefetch=False),
+        expected_num_eval_batches_found)
+    self._testEvalDuringTraining(
+        base_params._replace(eval_during_training_every_n_steps=2,
+                             variable_update='replicated',
+                             use_fp16=True, train_dir=train_dir,
+                             eval_batch_size=base_params.batch_size + 2),
+        expected_num_eval_batches_found)
+    # Test --eval_during_training_every_n_epochs
+    every_n_epochs = (2 * base_params.batch_size * base_params.num_gpus /
+                      datasets.IMAGENET_NUM_TRAIN_IMAGES)
+    self._testEvalDuringTraining(
+        base_params._replace(eval_during_training_every_n_epochs=every_n_epochs,
+                             variable_update='replicated'),
+        expected_num_eval_batches_found)
+    # Test --eval_during_training_at_specified_steps
+    list_steps = [2, 3, 5, 7, 1000]
+    num_eval_steps = 1 + sum(1 for step in list_steps
+                             if step < base_params.num_batches)
+    expected_num_eval_batches_found = (
+        base_params.num_eval_batches * num_eval_steps)
+    self._testEvalDuringTraining(
+        base_params._replace(eval_during_training_at_specified_steps=list_steps,
+                             variable_update='replicated'),
+        expected_num_eval_batches_found)
+    # Test --eval_during_training_at_specified_epochs
+    list_epochs = [(step * base_params.batch_size * base_params.num_gpus /
+                    datasets.IMAGENET_NUM_TRAIN_IMAGES)
+                   for step in list_steps]
+    self._testEvalDuringTraining(
+        base_params._replace(
+            eval_during_training_at_specified_epochs=list_epochs,
+            variable_update='replicated'),
+        expected_num_eval_batches_found)
+    # Test --eval_during_training_every_n_steps runs with synthetic data.
+    params = base_params._replace(
+        variable_update='replicated', data_dir=None,
+        eval_during_training_every_n_steps=2, num_batches=2)
+    benchmark_cnn.BenchmarkCNN(params).run()
+  def testEvalDuringTrainingNumEpochs(self):
+    params = benchmark_cnn.make_params(
+        batch_size=1, eval_batch_size=2, eval_during_training_every_n_steps=1,
+        num_batches=30, num_eval_epochs=100 / datasets.IMAGENET_NUM_VAL_IMAGES)
+    bench_cnn = benchmark_cnn.BenchmarkCNN(params)
+    self.assertEqual(bench_cnn.num_batches, 30)
+    self.assertAlmostEqual(bench_cnn.num_epochs,
+                           30 / datasets.IMAGENET_NUM_TRAIN_IMAGES)
+    self.assertAlmostEqual(bench_cnn.num_eval_batches, 50)
+    self.assertAlmostEqual(bench_cnn.num_eval_epochs,
+                           100 / datasets.IMAGENET_NUM_VAL_IMAGES)
+  def testEarlyStopping(self):
+    params = benchmark_cnn.make_params(
+        batch_size=2,
+        display_every=1,
+        num_batches=100,
+        eval_during_training_every_n_steps=2,
+        stop_at_top_1_accuracy=0.4,
+    )
+    with mock.patch.object(benchmark_cnn.BenchmarkCNN, '_eval_once',
+                           side_effect=[(0.1, 0.1), (0.5, 0.5), (0.2, 0.2)]
+                          ) as mock_eval_once:
+      logs = []
+      bench_cnn = benchmark_cnn.BenchmarkCNN(params)
+      with test_util.monkey_patch(benchmark_cnn,
+                                  log_fn=test_util.print_and_add_to_list(logs)):
+        bench_cnn.run()
+      training_outputs = test_util.get_training_outputs_from_logs(
+          logs, print_training_accuracy=False)
+      # We should stop after the second evaluation, and we evaluate every 2
+      # steps. So there should be 2 * 2 = 4 training outputs.
+      self.assertEqual(len(training_outputs), 4)
+      self.assertEqual(mock_eval_once.call_count, 2)
+  def testOutOfRangeErrorsAreNotIgnored(self):
+    error_msg = 'Fake OutOfRangeError error message'
+    with mock.patch.object(benchmark_cnn.BenchmarkCNN, 'benchmark_with_session',
+                           side_effect=tf.errors.OutOfRangeError(None, None,
+                                                                 error_msg)):
+      with self.assertRaisesRegex(RuntimeError, error_msg):
+        benchmark_cnn.BenchmarkCNN(benchmark_cnn.make_params()).run()
+  def testInvalidFlags(self):
+    params = benchmark_cnn.make_params(device='cpu', data_format='NCHW')
+    with self.assertRaises(ValueError):
+      benchmark_cnn.BenchmarkCNN(params)
+    params = benchmark_cnn.make_params(use_fp16=True, fp16_vars=True,
+                                       variable_update='replicated',
+                                       all_reduce_spec='nccl')
+    with self.assertRaises(ValueError):
+      benchmark_cnn.BenchmarkCNN(params)
+    # Automatic loss scaling is only supported for 'replicated', 'ps',
+    # and 'independent' variable_updates.
+    invalid_variable_updates = [
+        'distributed_replicated', 'distributed_all_reduce'
+    ]
+    for variable_update in invalid_variable_updates:
+      params = benchmark_cnn.make_params(
+          use_fp16=True,
+          fp16_vars=True,
+          fp16_enable_auto_loss_scale=True,
+          variable_update=variable_update)
+      with self.assertRaises(ValueError):
+        benchmark_cnn.BenchmarkCNN(params)
+    # Automatic loss scaling is not supported for 'nccl'.
+    params = benchmark_cnn.make_params(
+        use_fp16=True,
+        fp16_vars=True,
+        fp16_enable_auto_loss_scale=True,
+        all_reduce_spec='nccl')
+    with self.assertRaises(ValueError):
+      benchmark_cnn.BenchmarkCNN(params)
+    # Automatic loss scaling is not supported for 'staged_vars'.
+    params = benchmark_cnn.make_params(
+        use_fp16=True,
+        fp16_vars=True,
+        fp16_enable_auto_loss_scale=True,
+        staged_vars=True)
+    with self.assertRaises(ValueError):
+      benchmark_cnn.BenchmarkCNN(params)
+  def testMakeParams(self):
+    default_params = benchmark_cnn.make_params()
+    self.assertEqual(default_params.model,
+                     flags.param_specs['model'].default_value)
+    params = benchmark_cnn.make_params(model='foo')
+    self.assertEqual(params.model, 'foo')
+    with self.assertRaises(ValueError):
+      benchmark_cnn.make_params(job_name='foo')
+    with self.assertRaises(ValueError):
+      benchmark_cnn.make_params(gpu_memory_frac_for_testing=-1.)
+class VariableUpdateTest(tf.test.TestCase):
+  """Tests that variables are updated correctly.
+  These tests use a very simple deterministic model. For example, some tests use
+  the model
+    loss = image * A * B
+  where image is a 1x1 images (with a single scalar value), and A and B are
+  scalar variables. Tests will run tf_cnn_benchmarks with such a model, on a
+  sequence of scalar images, and assert that the losses are the correct value.
+  Since the losses depend on the variables, this indirectly tests variables are
+  updated correctly.
+  """
+  def setUp(self):
+    super(VariableUpdateTest, self).setUp()
+    _check_has_gpu()
+    benchmark_cnn.setup(benchmark_cnn.make_params())
+  def _get_benchmark_cnn_losses(self, inputs, params):
+    """Returns the losses of BenchmarkCNN on the given inputs and params."""
+    logs = []
+    model = test_util.TestCNNModel()
+    with test_util.monkey_patch(benchmark_cnn,
+                                log_fn=test_util.print_and_add_to_list(logs),
+                                LOSS_AND_ACCURACY_DIGITS_TO_SHOW=15):
+      bench = benchmark_cnn.BenchmarkCNN(
+          params, dataset=test_util.TestDataSet(), model=model)
+      # The test model does not use labels when computing loss, so the label
+      # values do not matter as long as it's the right shape.
+      labels = np.array([1] * inputs.shape[0])
+      bench.input_preprocessor.set_fake_data(inputs, labels)
+      if bench.eval_input_preprocessor:
+        bench.eval_input_preprocessor.set_fake_data(inputs, labels)
+      bench.run()
+    outputs = test_util.get_training_outputs_from_logs(
+        logs, params.print_training_accuracy)
+    return [x.loss for x in outputs]
+  def _test_variable_update(self, params):
+    """Tests variables are updated correctly when the given params are used.
+    A BenchmarkCNN is created with a TestCNNModel, and is run with some scalar
+    images. The losses are then compared with the losses obtained with
+    TestCNNModel().manually_compute_losses()
+    Args:
+      params: a Params tuple used to create BenchmarkCNN.
+    """
+    inputs = test_util.get_fake_var_update_inputs()
+    actual_losses = self._get_benchmark_cnn_losses(inputs, params)
+    expected_losses, = test_util.TestCNNModel().manually_compute_losses(
+        inputs, 1, params)
+    rtol = 3e-2 if params.use_fp16 else 1e-5
+    self.assertAllClose(actual_losses[:len(expected_losses)], expected_losses,
+                        rtol=rtol, atol=0.)
+  def _test_variable_updates(self, params,
+                             var_updates=('parameter_server', 'replicated')):
+    for var_update in var_updates:
+      self._test_variable_update(params._replace(variable_update=var_update))
+  def testDefault(self):
+    params = test_util.get_var_update_params()
+    self._test_variable_updates(params)
+  # For some reason, this test doesn't always pass
+  # def testCpuAsDevice(self):
+  #   params = test_util.get_var_update_params()._replace(
+  #       device='cpu',
+  #       data_format='NHWC')  # NHWC required when --device=cpu
+  #   self._test_variable_updates(params)
+  def testCpuAsLocalParamDevice(self):
+    params = test_util.get_var_update_params()._replace(
+        local_parameter_device='cpu')
+    self._test_variable_updates(params)
+  def testFp16(self):
+    params = test_util.get_var_update_params()._replace(use_fp16=True)
+    self._test_variable_updates(params)
+  def testMomentum(self):
+    params = test_util.get_var_update_params()._replace(optimizer='momentum')
+    self._test_variable_updates(params)
+  def testRmsprop(self):
+    params = test_util.get_var_update_params()._replace(optimizer='rmsprop')
+    self._test_variable_updates(params)
+  def testNoLayers(self):
+    params = test_util.get_var_update_params()._replace(use_tf_layers=False)
+    self._test_variable_updates(params)
+  def testVariousAllReduceSpecs(self):
+    # We do not test xring, because it requires all Variables to have at least
+    # two elements.
+    params = test_util.get_var_update_params()._replace(all_reduce_spec='pscpu')
+    self._test_variable_updates(params, var_updates=('replicated',))
+    params = params._replace(all_reduce_spec='psgpu')
+    self._test_variable_updates(params, var_updates=('replicated',))
+    # TODO(b/80125832): Enable nccl in tests
+    # params = params._replace(all_reduce_spec='nccl',
+    #                          compact_gradient_transfer=False)
+    # self._test_variable_updates(params, var_updates=('replicated',))
+  def testPrintBaseLoss(self):
+    params = test_util.get_var_update_params()._replace(
+        loss_type_to_report='base_loss')
+    self._test_variable_updates(params)
+  def testSingleL2LossOp(self):
+    params = test_util.get_var_update_params()._replace(
+        single_l2_loss_op=True)
+    self._test_variable_updates(params)
+  def testResourceVars(self):
+    params = test_util.get_var_update_params()._replace(
+        use_resource_vars=True)
+    self._test_variable_updates(params)
+  def testEvalDuringTrainingEveryNSteps(self):
+    # TODO(reedwm): Test that the eval results are correct. This only tests that
+    # training results are correct.
+    params = test_util.get_var_update_params()._replace(
+        eval_during_training_every_n_steps=1)
+    self._test_variable_updates(params, var_updates=('replicated',))
+class VariableMgrLocalReplicatedTest(tf.test.TestCase):
+  def _test_grad_aggregation_with_var_mgr(self, variable_mgr, num_towers,
+                                          num_vars, deferred_grads):
+    tower_devices = ['/gpu:%d' % i for i in range(num_towers)]
+    tower_grads = []
+    expected_sums = [0.] * num_vars
+    for i, tower_device in enumerate(tower_devices):
+      with tf.device(tower_device):
+        grad_vars = []
+        for j in range(num_vars):
+          n = num_towers * i + j
+          grad_vars.append((tf.constant(n, dtype=tf.float32),
+                            tf.Variable(n, dtype=tf.float32)))
+          expected_sums[j] += n
+      tower_grads.append(grad_vars)
+    _, agg_device_grads = variable_mgr.preprocess_device_grads(
+        tower_grads)
+    expected_device_grads = []
+    for i in range(num_towers):
+      expected_grad_vars = []
+      for j in range(num_vars):
+        expected_grad_and_var = [expected_sums[j], num_towers * i + j]
+        if isinstance(agg_device_grads[i][j], tuple):
+          # agg_device_grads[i][j] can be a list or tuple.
+          expected_grad_and_var = tuple(expected_grad_and_var)
+        expected_grad_vars.append(expected_grad_and_var)
+      if isinstance(agg_device_grads[i], tuple):
+        # agg_device_grads[i] can be a list or tuple.
+        expected_grad_vars = tuple(expected_grad_vars)
+      expected_device_grads.append(expected_grad_vars)
+    config = tf.ConfigProto(allow_soft_placement=True)
+    with tf.Session(config=config) as sess:
+      sess.run(tf.initialize_all_variables())
+      sess.run(variable_mgr._warmup_ops)
+      if deferred_grads:
+        # With deferred grads, the result of a session run is always the summed
+        # gradients from the previous session run.
+        sess.run(agg_device_grads)
+        feed_dict = {g: 0 for grad_vars in tower_grads for g, _ in grad_vars}
+        agg_device_grads_ = sess.run(agg_device_grads, feed_dict)
+      else:
+        agg_device_grads_ = sess.run(agg_device_grads)
+    self.assertEqual(agg_device_grads_, expected_device_grads)
+  def _test_grad_aggregation(self, params, num_vars):
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    deferred_grads = (params.variable_consistency == 'relaxed')
+    self._test_grad_aggregation_with_var_mgr(bench.variable_mgr, bench.num_gpus,
+                                             num_vars, deferred_grads)
+  def test_grad_aggregation(self):
+    base_params = benchmark_cnn.make_params(num_gpus=10,
+                                            variable_update='replicated',
+                                            use_fp16=True)
+    params = base_params
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(gradient_repacking=3)
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(variable_consistency='relaxed')
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(compact_gradient_transfer=False)
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(gradient_repacking=3,
+                                  variable_consistency='relaxed')
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(gradient_repacking=3,
+                                  compact_gradient_transfer=False)
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(variable_consistency='relaxed',
+                                  compact_gradient_transfer=False)
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(gradient_repacking=3,
+                                  variable_consistency='relaxed',
+                                  compact_gradient_transfer=False)
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(num_gpus=8, hierarchical_copy=True)
+    self._test_grad_aggregation(params, 10)
+    # TODO(b/80125832): Enable nccl in tests
+    # params = base_params._replace(all_reduce_spec='nccl',
+    #                               compact_gradient_transfer=False,
+    #                               # For some reason, this test freezes when
+    #                               # num_gpus=10
+    #                               num_gpus=8)
+    # self._test_grad_aggregation(params, 10)
+    params = base_params._replace(all_reduce_spec='pscpu')
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(num_gpus=8,
+                                  gradient_repacking=3,
+                                  variable_consistency='relaxed',
+                                  hierarchical_copy=True)
+    self._test_grad_aggregation(params, 10)
+    # TODO(b/80125832): Enable nccl in tests
+    # params = base_params._replace(num_gpus=8,
+    #                               gradient_repacking=3,
+    #                               variable_consistency='relaxed',
+    #                               all_reduce_spec='nccl',
+    #                               compact_gradient_transfer=False)
+    # self._test_grad_aggregation(params, 10)
+    params = base_params._replace(gradient_repacking=3,
+                                  variable_consistency='relaxed',
+                                  all_reduce_spec='pscpu')
+    self._test_grad_aggregation(params, 10)
+    params = base_params._replace(gradient_repacking=3,
+                                  variable_consistency='relaxed',
+                                  all_reduce_spec='xring')
+    self._test_grad_aggregation(params, 10)
+if __name__ == '__main__':
+  tf.disable_v2_behavior()
+  tf.test.main()
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utilities for CNN benchmarks."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import sys
+import threading
+import numpy as np
+import tensorflow.compat.v1 as tf
+def tensorflow_version_tuple():
+  v = tf.__version__
+  major, minor, patch = v.split('.')
+  return (int(major), int(minor), patch)
+def tensorflow_version():
+  vt = tensorflow_version_tuple()
+  return vt[0] * 1000 + vt[1]
+def log_fn(log):
+  print(log, flush=True)
+def roll_numpy_batches(array, batch_size, shift_ratio):
+  """Moves a proportion of batches from start to the end of the array.
+  This function moves a proportion of batches, specified by `shift_ratio`, from
+  the starts of the array to the end. The number of batches moved is rounded
+  down to the nearest integer. For example,
+  ```
+  roll_numpy_batches([1, 2, 3, 4, 5, 6], 2, 0.34) == [3, 4, 5, 6, 1, 2]
+  ```
+  Args:
+    array: A Numpy array whose first dimension is the batch dimension.
+    batch_size: The batch size.
+    shift_ratio: Proportion of batches to move from the start of the array to
+      the end of the array.
+  Returns:
+    A new Numpy array, with a proportion of the batches at the start of `array`
+    moved to the end.
+  """
+  num_items = array.shape[0]
+  assert num_items % batch_size == 0
+  num_batches = num_items // batch_size
+  starting_batch = int(num_batches * shift_ratio)
+  starting_item = starting_batch * batch_size
+  return np.roll(array, -starting_item, axis=0)
+# For Python 2.7 compatibility, we do not use threading.Barrier.
+class Barrier(object):
+  """Implements a lightweight Barrier.
+  Useful for synchronizing a fixed number of threads at known synchronization
+  points.  Threads block on 'wait()' and simultaneously return once they have
+  all made that call.
+  # Implementation adopted from boost/thread/barrier.hpp
+  """
+  def __init__(self, parties):
+    """Create a barrier, initialised to 'parties' threads."""
+    self.cond = threading.Condition(threading.Lock())
+    self.parties = parties
+    # Indicates the number of waiting parties.
+    self.waiting = 0
+    # generation is needed to deal with spurious wakeups. If self.cond.wait()
+    # wakes up for other reasons, generation will force it go back to wait().
+    self.generation = 0
+    self.broken = False
+  def wait(self):
+    """Wait for the barrier."""
+    with self.cond:
+      # Check if the barrier has been disabled or not.
+      if self.broken:
+        return
+      gen = self.generation
+      self.waiting += 1
+      if self.waiting == self.parties:
+        self.waiting = 0
+        self.generation += 1
+        self.cond.notify_all()
+      # loop because of spurious wakeups
+      while gen == self.generation:
+        self.cond.wait()
+  # TODO(huangyp): Remove this method once we find a way to know which step
+  # is the last barrier.
+  def abort(self):
+    """Clear existing barrier and disable this barrier."""
+    with self.cond:
+      if self.waiting > 0:
+        self.generation += 1
+        self.cond.notify_all()
+      self.broken = True
+class ImageProducer(object):
+  """An image producer that puts images into a staging area periodically.
+  This class is useful for periodically running a set of ops, `put_ops` on a
+  different thread every `batch_group_size` steps.
+  The notify_image_consumption() method is used to increment an internal counter
+  so that every `batch_group_size` times it is called, `put_ops` is executed. A
+  barrier is placed so that notify_image_consumption() will block until
+  the previous call to `put_ops` has been executed.
+  The start() method is used to start the thread that runs `put_ops`.
+  The done() method waits until the last put_ops is executed and stops the
+  thread.
+  The purpose of this class is to fill an image input pipeline every
+  `batch_group_size` steps. Suppose `put_ops` supplies `batch_group_size` images
+  to the input pipeline when run, and that every step, 1 batch of images is
+  consumed. Then, by calling notify_image_consumption() every step, images are
+  supplied to the input pipeline at the same amount they are consumed.
+  Example usage:
+  ```
+  put_ops = ... # Enqueues `batch_group_size` batches to a StagingArea
+  get_op = ...  # Dequeues 1 batch, and does some operations on it
+  batch_group_size = 4
+  with tf.Session() as sess:
+    image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size)
+    image_producer.start()
+    for _ in range(100):
+      sess.run(get_op)
+      image_producer.notify_image_consumption()
+  ```
+  """
+  def __init__(self, sess, put_ops, batch_group_size, use_python32_barrier):
+    self.sess = sess
+    self.num_gets = 0
+    self.put_ops = put_ops
+    self.batch_group_size = batch_group_size
+    self.done_event = threading.Event()
+    if (use_python32_barrier and
+        sys.version_info[0] == 3 and sys.version_info[1] >= 2):
+      self.put_barrier = threading.Barrier(2)
+    else:
+      self.put_barrier = Barrier(2)
+  def _should_put(self):
+    return (self.num_gets + 1) % self.batch_group_size == 0
+  def done(self):
+    """Stop the image producer."""
+    self.done_event.set()
+    self.put_barrier.abort()
+    self.thread.join()
+  def start(self):
+    """Start the image producer."""
+    self.sess.run([self.put_ops])
+    self.thread = threading.Thread(target=self._loop_producer)
+    # Set daemon to true to allow Ctrl + C to terminate all threads.
+    self.thread.daemon = True
+    self.thread.start()
+  def notify_image_consumption(self):
+    """Increment the counter of image_producer by 1.
+    This should only be called by the main thread that consumes images and runs
+    the model computation. One batch of images should be consumed between
+    calling start() and the first call to this method. Then, one batch of images
+    should be consumed between any two successive calls to this method.
+    """
+    if self._should_put():
+      self.put_barrier.wait()
+    self.num_gets += 1
+  def _loop_producer(self):
+    while not self.done_event.isSet():
+      self.sess.run([self.put_ops])
+      self.put_barrier.wait()
+class BaseClusterManager(object):
+  """The manager for the cluster of servers running the benchmark."""
+  def __init__(self, params):
+    worker_hosts = params.worker_hosts.split(',')
+    ps_hosts = params.ps_hosts.split(',') if params.ps_hosts else []
+    cluster = {'worker': worker_hosts}
+    if ps_hosts:
+      cluster['ps'] = ps_hosts
+    self._cluster_spec = tf.train.ClusterSpec(cluster)
+  def get_target(self):
+    """Returns a target to be passed to tf.Session()."""
+    raise NotImplementedError('get_target must be implemented by subclass')
+  def join_server(self):
+    raise NotImplementedError('join must be implemented by subclass')
+  def get_cluster_spec(self):
+    return self._cluster_spec
+  def num_workers(self):
+    return len(self._cluster_spec.job_tasks('worker'))
+  def num_ps(self):
+    if 'ps' in self._cluster_spec.jobs:
+      return len(self._cluster_spec.job_tasks('ps'))
+    else:
+      return 0
+class GrpcClusterManager(BaseClusterManager):
+  """A cluster manager for a cluster networked with gRPC."""
+  def __init__(self, params, config_proto):
+    super(GrpcClusterManager, self).__init__(params)
+    if params.job_name == 'controller':
+      self._target = 'grpc://%s' % self._cluster_spec.job_tasks('worker')[0]
+    else:
+      self._server = tf.train.Server(self._cluster_spec,
+                                     job_name=params.job_name,
+                                     task_index=params.task_index,
+                                     config=config_proto,
+                                     protocol=params.server_protocol)
+      self._target = self._server.target
+  def get_target(self):
+    return self._target
+  def join_server(self):
+    return self._server.join()
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/cnn_util_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for tf_cnn_benchmarks.cnn_util."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import threading
+import time
+import tensorflow.compat.v1 as tf
+import cnn_util
+class CnnUtilBarrierTest(tf.test.TestCase):
+  def testBarrier(self):
+    num_tasks = 20
+    num_waits = 4
+    barrier = cnn_util.Barrier(num_tasks)
+    threads = []
+    sync_matrix = []
+    for i in range(num_tasks):
+      sync_times = [0] * num_waits
+      thread = threading.Thread(
+          target=self._run_task, args=(barrier, sync_times))
+      thread.start()
+      threads.append(thread)
+      sync_matrix.append(sync_times)
+    for thread in threads:
+      thread.join()
+    for wait_index in range(num_waits - 1):
+      # Max of times at iteration i < min of times at iteration i + 1
+      self.assertLessEqual(
+          max([sync_matrix[i][wait_index] for i in range(num_tasks)]),
+          min([sync_matrix[i][wait_index + 1] for i in range(num_tasks)]))
+  def _run_task(self, barrier, sync_times):
+    for wait_index in range(len(sync_times)):
+      sync_times[wait_index] = time.time()
+      barrier.wait()
+  def testBarrierAbort(self):
+    num_tasks = 2
+    num_waits = 1
+    sync_times = [0] * num_waits
+    barrier = cnn_util.Barrier(num_tasks)
+    thread = threading.Thread(
+        target=self._run_task, args=(barrier, sync_times))
+    thread.start()
+    barrier.abort()
+    # thread won't be blocked by done barrier.
+    thread.join()
+class ImageProducerTest(tf.test.TestCase):
+  def _slow_tensorflow_op(self):
+    """Returns a TensorFlow op that takes approximately 0.1s to complete."""
+    def slow_func(v):
+      time.sleep(0.1)
+      return v
+    return tf.py_func(slow_func, [tf.constant(0.)], tf.float32).op
+  def _test_image_producer(self, batch_group_size, put_slower_than_get):
+    # We use the variable x to simulate a staging area of images. x represents
+    # the number of batches in the staging area.
+    x = tf.Variable(0, dtype=tf.int32)
+    if put_slower_than_get:
+      put_dep = self._slow_tensorflow_op()
+      get_dep = tf.no_op()
+    else:
+      put_dep = tf.no_op()
+      get_dep = self._slow_tensorflow_op()
+    with tf.control_dependencies([put_dep]):
+      put_op = x.assign_add(batch_group_size, use_locking=True)
+    with tf.control_dependencies([get_dep]):
+      get_op = x.assign_sub(1, use_locking=True)
+    with self.test_session() as sess:
+      sess.run(tf.variables_initializer([x]))
+      image_producer = cnn_util.ImageProducer(sess, put_op, batch_group_size,
+                                              use_python32_barrier=False)
+      image_producer.start()
+      for _ in range(5 * batch_group_size):
+        sess.run(get_op)
+        # We assert x is nonnegative, to ensure image_producer never causes
+        # an unstage op to block. We assert x is at most 2 * batch_group_size,
+        # to ensure it doesn't use too much memory by storing too many batches
+        # in the staging area.
+        self.assertGreaterEqual(sess.run(x), 0)
+        self.assertLessEqual(sess.run(x), 2 * batch_group_size)
+        image_producer.notify_image_consumption()
+        self.assertGreaterEqual(sess.run(x), 0)
+        self.assertLessEqual(sess.run(x), 2 * batch_group_size)
+      image_producer.done()
+      time.sleep(0.1)
+      self.assertGreaterEqual(sess.run(x), 0)
+      self.assertLessEqual(sess.run(x), 2 * batch_group_size)
+  def test_image_producer(self):
+    self._test_image_producer(1, False)
+    self._test_image_producer(1, True)
+    self._test_image_producer(2, False)
+    self._test_image_producer(2, True)
+    self._test_image_producer(3, False)
+    self._test_image_producer(3, True)
+    self._test_image_producer(8, False)
+    self._test_image_producer(8, True)
+if __name__ == '__main__':
+  tf.disable_v2_behavior()
+  tf.test.main()
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/coco_metric.py
+# Copyright 2018 Google. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""COCO-style evaluation metrics.
+Forked from reference model implementation.
+COCO API: github.com/cocodataset/cocoapi/
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import atexit
+import tempfile
+from absl import flags
+import numpy as np
+from pycocotools.coco import COCO
+from pycocotools.cocoeval import COCOeval
+import six
+import tensorflow.compat.v1 as tf
+import mlperf
+import ssd_constants
+FLAGS = flags.FLAGS
+# https://github.com/cocodataset/cocoapi/issues/49
+if six.PY3:
+  import pycocotools.coco
+  pycocotools.coco.unicode = str
+def async_eval_runner(queue_predictions, queue_results, val_json_file):
+  """Load intermediate eval results and get COCO metrics."""
+  while True:
+    message = queue_predictions.get()
+    if message == 'STOP':  # poison pill
+      break
+    step, predictions = message
+    results = compute_map(predictions, val_json_file)
+    queue_results.put((step, results))
+def compute_map(predictions, val_json_file):
+  """Use model predictions to compute mAP.
+  Args:
+    predictions: a list of tuples returned by decoded_predictions function,
+      each containing the following elements:
+      image source_id, box coordinates in XYWH order, probability score, label
+    val_json_file: path to COCO annotation file
+  Returns:
+    A dictionary that maps all COCO metrics (keys) to their values
+  """
+  if val_json_file.startswith("gs://"):
+    _, local_val_json = tempfile.mkstemp(suffix=".json")
+    tf.gfile.Remove(local_val_json)
+    tf.gfile.Copy(val_json_file, local_val_json)
+    atexit.register(tf.gfile.Remove, local_val_json)
+  else:
+    local_val_json = val_json_file
+  cocoGt = COCO(local_val_json)
+  cocoDt = cocoGt.loadRes(np.array(predictions))
+  E = COCOeval(cocoGt, cocoDt, iouType='bbox')
+  E.evaluate()
+  E.accumulate()
+  E.summarize()
+  print("Current AP: {:.5f}".format(E.stats[0]))
+  metric_names = ['AP', 'AP50', 'AP75', 'APs', 'APm', 'APl', 'ARmax1',
+                  'ARmax10', 'ARmax100', 'ARs', 'ARm', 'ARl']
+  # Prefix with "COCO" to group in TensorBoard.
+  return {"COCO/" + key: value for key, value in zip(metric_names, E.stats)}
+def calc_iou(target, candidates):
+  target_tiled = np.tile(target[np.newaxis, :], (candidates.shape[0], 1))
+  # Left Top & Right Bottom
+  lt = np.maximum(target_tiled[:,:2], candidates[:,:2])
+  rb = np.minimum(target_tiled[:,2:], candidates[:,2:])
+  delta = np.maximum(rb - lt, 0)
+  intersect = delta[:,0] * delta[:,1]
+  delta1 = target_tiled[:,2:] - candidates[:,:2]
+  area1 = delta1[:,0] * delta1[:,1]
+  delta2 = target_tiled[:,2:] - candidates[:,:2]
+  area2 = delta2[:,0] * delta2[:,1]
+  iou = intersect/(area1 + area2 - intersect)
+  return iou
+# TODO(haoyuzhang): Rewrite this NumPy based implementation to TensorFlow based
+# implementation under ssd_model.py accuracy_function.
+def decode_predictions(labels_and_predictions):
+  """Decode predictions and remove unused boxes and labels."""
+  predictions = []
+  for example in labels_and_predictions:
+    source_id = int(example[ssd_constants.SOURCE_ID])
+    pred_box = example[ssd_constants.PRED_BOXES]
+    pred_scores = example[ssd_constants.PRED_SCORES]
+    locs, labels, probs = decode_single(
+        pred_box, pred_scores, ssd_constants.OVERLAP_CRITERIA,
+        ssd_constants.MAX_NUM_EVAL_BOXES, ssd_constants.MAX_NUM_EVAL_BOXES)
+    raw_height, raw_width, _ = example[ssd_constants.RAW_SHAPE]
+    for loc, label, prob in zip(locs, labels, probs):
+      # Ordering convention differs, hence [1], [0] rather than [0], [1]
+      x, y = loc[1] * raw_width, loc[0] * raw_height
+      w, h = (loc[3] - loc[1]) * raw_width, (loc[2] - loc[0]) * raw_height
+      predictions.append(
+          [source_id, x, y, w, h, prob, ssd_constants.CLASS_INV_MAP[label]])
+  mlperf.logger.log(key=mlperf.tags.NMS_THRESHOLD,
+                    value=ssd_constants.OVERLAP_CRITERIA)
+  mlperf.logger.log(key=mlperf.tags.NMS_MAX_DETECTIONS,
+                    value=ssd_constants.MAX_NUM_EVAL_BOXES)
+  return predictions
+def decode_single(bboxes_in, scores_in, criteria, max_output, max_num=200):
+  # Reference to https://github.com/amdegroot/ssd.pytorch
+  bboxes_out = []
+  scores_out = []
+  labels_out = []
+  for i, score in enumerate(np.split(scores_in, scores_in.shape[1], 1)):
+    score = np.squeeze(score, 1)
+    # skip background
+    if i == 0:
+      continue
+    mask = score > ssd_constants.MIN_SCORE
+    if not np.any(mask):
+      continue
+    bboxes, score = bboxes_in[mask, :], score[mask]
+    score_idx_sorted = np.argsort(score)
+    score_sorted = score[score_idx_sorted]
+    score_idx_sorted = score_idx_sorted[-max_num:]
+    candidates = []
+    # perform non-maximum suppression
+    while len(score_idx_sorted):
+      idx = score_idx_sorted[-1]
+      bboxes_sorted = bboxes[score_idx_sorted, :]
+      bboxes_idx = bboxes[idx, :]
+      iou = calc_iou(bboxes_idx, bboxes_sorted)
+      score_idx_sorted = score_idx_sorted[iou < criteria]
+      candidates.append(idx)
+    bboxes_out.append(bboxes[candidates, :])
+    scores_out.append(score[candidates])
+    labels_out.extend([i]*len(candidates))
+  if len(scores_out) == 0:
+    tf.logging.info("No objects detected. Returning dummy values.")
+    return (
+        np.zeros(shape=(1, 4), dtype=np.float32),
+        np.zeros(shape=(1,), dtype=np.int32),
+        np.ones(shape=(1,), dtype=np.float32) * ssd_constants.DUMMY_SCORE,
+    )
+  bboxes_out = np.concatenate(bboxes_out, axis=0)
+  scores_out = np.concatenate(scores_out, axis=0)
+  labels_out = np.array(labels_out)
+  max_ids = np.argsort(scores_out)[-max_output:]
+  return bboxes_out[max_ids, :], labels_out[max_ids], scores_out[max_ids]
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/constants.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Constants used in tf_cnn_benchmarks."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from enum import Enum
+# Results fetched with this prefix will not be reduced. Instead, they will be
+# passed as matrices to model's postprocess function.
+UNREDUCED_ACCURACY_OP_PREFIX = "tensor:"
+# Eval result values with this name prefix will be included in summary.
+SIMPLE_VALUE_RESULT_PREFIX = "simple_value:"
+class BenchmarkMode(object):
+  """Benchmark running mode."""
+  TRAIN = "training"
+  EVAL = "evaluation"
+  TRAIN_AND_EVAL = "training + evaluation"
+  FORWARD_ONLY = "forward only"
+class NetworkTopology(str, Enum):
+  """Network topology describes how multiple GPUs are inter-connected.
+  """
+  # DGX-1 uses hybrid cube mesh topology with the following device peer to peer
+  # matrix:
+  # DMA: 0 1 2 3 4 5 6 7
+  # 0:   Y Y Y Y Y N N N
+  # 1:   Y Y Y Y N Y N N
+  # 2:   Y Y Y Y N N Y N
+  # 3:   Y Y Y Y N N N Y
+  # 4:   Y N N N Y Y Y Y
+  # 5:   N Y N N Y Y Y Y
+  # 6:   N N Y N Y Y Y Y
+  # 7:   N N N Y Y Y Y Y
+  DGX1 = "dgx1"
+  # V100 in GCP are connected with the following device peer to peer matrix.
+  # In this topology, bandwidth of the connection depends on if it uses NVLink
+  # or PCIe link.
+  # DMA: 0 1 2 3 4 5 6 7
+  # 0:   Y Y Y Y N Y N N
+  # 1:   Y Y Y Y N N N N
+  # 2:   Y Y Y Y N N N Y
+  # 3:   Y Y Y Y N N N N
+  # 4:   N N N N Y Y Y Y
+  # 5:   Y N N N Y Y Y Y
+  # 6:   N N N N Y Y Y Y
+  # 7:   N N Y N Y Y Y Y
+  GCP_V100 = "gcp_v100"
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/convnet_builder.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""CNN builder."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from collections import defaultdict
+import contextlib
+import numpy as np
+import tensorflow.compat.v1 as tf
+# pylint: disable=g-direct-tensorflow-import
+import mlperf
+from tensorflow.python.layers import convolutional as conv_layers
+from tensorflow.python.layers import core as core_layers
+from tensorflow.python.layers import normalization as normalization_layers
+from tensorflow.python.layers import pooling as pooling_layers
+from tensorflow.python.training import moving_averages
+_data_format_to_channel_axis = {'NCHW': 1, 'NHWC': 3}
+class ConvNetBuilder(object):
+  """Builder of cnn net."""
+  def __init__(self,
+               input_op,
+               input_nchan,
+               phase_train,
+               use_tf_layers,
+               data_format='NCHW',
+               dtype=tf.float32,
+               variable_dtype=tf.float32):
+    self.top_layer = input_op
+    self.top_size = input_nchan
+    self.phase_train = phase_train
+    self.use_tf_layers = use_tf_layers
+    self.data_format = data_format
+    self.dtype = dtype
+    self.variable_dtype = variable_dtype
+    self.counts = defaultdict(lambda: 0)
+    self.use_batch_norm = False
+    self.batch_norm_config = {}  # 'decay': 0.997, 'scale': True}
+    self.channel_pos = ('channels_last'
+                        if data_format == 'NHWC' else 'channels_first')
+    self.aux_top_layer = None
+    self.aux_top_size = 0
+  def get_custom_getter(self):
+    """Returns a custom getter that this class's methods must be called under.
+    All methods of this class must be called under a variable scope that was
+    passed this custom getter. Example:
+    ```python
+    network = ConvNetBuilder(...)
+    with tf.variable_scope('cg', custom_getter=network.get_custom_getter()):
+      network.conv(...)
+      # Call more methods of network here
+    ```
+    Currently, this custom getter only does anything if self.use_tf_layers is
+    True. In that case, it causes variables to be stored as dtype
+    self.variable_type, then casted to the requested dtype, instead of directly
+    storing the variable as the requested dtype.
+    """
+    def inner_custom_getter(getter, *args, **kwargs):
+      """Custom getter that forces variables to have type self.variable_type."""
+      if not self.use_tf_layers:
+        return getter(*args, **kwargs)
+      requested_dtype = kwargs['dtype']
+      if not (requested_dtype == tf.float32 and
+              self.variable_dtype == tf.float16):
+        # Only change the variable dtype if doing so does not decrease variable
+        # precision.
+        kwargs['dtype'] = self.variable_dtype
+      var = getter(*args, **kwargs)
+      # This if statement is needed to guard the cast, because batch norm
+      # assigns directly to the return value of this custom getter. The cast
+      # makes the return value not a variable so it cannot be assigned. Batch
+      # norm variables are always in fp32 so this if statement is never
+      # triggered for them.
+      if var.dtype.base_dtype != requested_dtype:
+        var = tf.cast(var, requested_dtype)
+      return var
+    return inner_custom_getter
+  @contextlib.contextmanager
+  def switch_to_aux_top_layer(self):
+    """Context that construct cnn in the auxiliary arm."""
+    if self.aux_top_layer is None:
+      raise RuntimeError('Empty auxiliary top layer in the network.')
+    saved_top_layer = self.top_layer
+    saved_top_size = self.top_size
+    self.top_layer = self.aux_top_layer
+    self.top_size = self.aux_top_size
+    yield
+    self.aux_top_layer = self.top_layer
+    self.aux_top_size = self.top_size
+    self.top_layer = saved_top_layer
+    self.top_size = saved_top_size
+  def get_variable(self, name, shape, dtype, cast_dtype, *args, **kwargs):
+    # TODO(reedwm): Currently variables and gradients are transferred to other
+    # devices and machines as type `dtype`, not `cast_dtype`. In particular,
+    # this means in fp16 mode, variables are transferred as fp32 values, not
+    # fp16 values, which uses extra bandwidth.
+    var = tf.get_variable(name, shape, dtype, *args, **kwargs)
+    return tf.cast(var, cast_dtype)
+  def _conv2d_impl(self, input_layer, num_channels_in, filters, kernel_size,
+                   strides, padding, kernel_initializer):
+    if self.use_tf_layers:
+      return conv_layers.conv2d(input_layer, filters, kernel_size, strides,
+                                padding, self.channel_pos,
+                                kernel_initializer=kernel_initializer,
+                                use_bias=False)
+    else:
+      weights_shape = [kernel_size[0], kernel_size[1], num_channels_in, filters]
+      # We use the name 'conv2d/kernel' so the variable has the same name as its
+      # tf.layers equivalent. This way, if a checkpoint is written when
+      # self.use_tf_layers == True, it can be loaded when
+      # self.use_tf_layers == False, and vice versa.
+      weights = self.get_variable('conv2d/kernel', weights_shape,
+                                  self.variable_dtype, self.dtype,
+                                  initializer=kernel_initializer)
+      if self.data_format == 'NHWC':
+        strides = [1] + strides + [1]
+      else:
+        strides = [1, 1] + strides
+      return tf.nn.conv2d(input_layer, weights, strides, padding,
+                          data_format=self.data_format)
+  def conv(self,
+           num_out_channels,
+           k_height,
+           k_width,
+           d_height=1,
+           d_width=1,
+           mode='SAME',
+           input_layer=None,
+           num_channels_in=None,
+           use_batch_norm=None,
+           stddev=None,
+           activation='relu',
+           bias=0.0,
+           kernel_initializer=None):
+    """Construct a conv2d layer on top of cnn."""
+    if input_layer is None:
+      input_layer = self.top_layer
+    if num_channels_in is None:
+      num_channels_in = self.top_size
+    if stddev is not None and kernel_initializer is None:
+      kernel_initializer = tf.truncated_normal_initializer(stddev=stddev)
+    if kernel_initializer is None:
+      kernel_initializer = tf.variance_scaling_initializer()
+    name = 'conv' + str(self.counts['conv'])
+    self.counts['conv'] += 1
+    with tf.variable_scope(name):
+      strides = [1, d_height, d_width, 1]
+      if self.data_format == 'NCHW':
+        strides = [strides[0], strides[3], strides[1], strides[2]]
+      if mode != 'SAME_RESNET':
+        conv = self._conv2d_impl(input_layer, num_channels_in, num_out_channels,
+                                 kernel_size=[k_height, k_width],
+                                 strides=[d_height, d_width], padding=mode,
+                                 kernel_initializer=kernel_initializer)
+      else:  # Special padding mode for ResNet models
+        if d_height == 1 and d_width == 1:
+          conv = self._conv2d_impl(input_layer, num_channels_in,
+                                   num_out_channels,
+                                   kernel_size=[k_height, k_width],
+                                   strides=[d_height, d_width], padding='SAME',
+                                   kernel_initializer=kernel_initializer)
+        else:
+          rate = 1  # Unused (for 'a trous' convolutions)
+          kernel_height_effective = k_height + (k_height - 1) * (rate - 1)
+          pad_h_beg = (kernel_height_effective - 1) // 2
+          pad_h_end = kernel_height_effective - 1 - pad_h_beg
+          kernel_width_effective = k_width + (k_width - 1) * (rate - 1)
+          pad_w_beg = (kernel_width_effective - 1) // 2
+          pad_w_end = kernel_width_effective - 1 - pad_w_beg
+          padding = [[0, 0], [pad_h_beg, pad_h_end],
+                     [pad_w_beg, pad_w_end], [0, 0]]
+          if self.data_format == 'NCHW':
+            padding = [padding[0], padding[3], padding[1], padding[2]]
+          padded_input_layer = tf.pad(input_layer, padding)
+          conv = self._conv2d_impl(padded_input_layer, num_channels_in,
+                                   num_out_channels,
+                                   kernel_size=[k_height, k_width],
+                                   strides=[d_height, d_width], padding='VALID',
+                                   kernel_initializer=kernel_initializer)
+      if use_batch_norm is None:
+        use_batch_norm = self.use_batch_norm
+      mlperf.logger.log_conv2d(input_tensor=input_layer, output_tensor=conv,
+                               stride_height=d_height, stride_width=d_width,
+                               filters=num_out_channels,
+                               initializer=kernel_initializer,
+                               use_bias=not use_batch_norm and bias is not None)
+      if not use_batch_norm:
+        if bias is not None:
+          biases = self.get_variable('biases', [num_out_channels],
+                                     self.variable_dtype, self.dtype,
+                                     initializer=tf.constant_initializer(bias))
+          biased = tf.reshape(
+              tf.nn.bias_add(conv, biases, data_format=self.data_format),
+              conv.get_shape())
+        else:
+          biased = conv
+      else:
+        self.top_layer = conv
+        self.top_size = num_out_channels
+        biased = self.batch_norm(**self.batch_norm_config)
+      if activation == 'relu':
+        mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
+        conv1 = tf.nn.relu(biased)
+      elif activation == 'linear' or activation is None:
+        conv1 = biased
+      elif activation == 'tanh':
+        conv1 = tf.nn.tanh(biased)
+      else:
+        raise KeyError('Invalid activation type \'%s\'' % activation)
+      self.top_layer = conv1
+      self.top_size = num_out_channels
+      return conv1
+  def _pool(self,
+            pool_name,
+            pool_function,
+            k_height,
+            k_width,
+            d_height,
+            d_width,
+            mode,
+            input_layer,
+            num_channels_in):
+    """Construct a pooling layer."""
+    if input_layer is None:
+      input_layer = self.top_layer
+    else:
+      self.top_size = num_channels_in
+    name = pool_name + str(self.counts[pool_name])
+    self.counts[pool_name] += 1
+    if self.use_tf_layers:
+      pool = pool_function(
+          input_layer, [k_height, k_width], [d_height, d_width],
+          padding=mode,
+          data_format=self.channel_pos,
+          name=name)
+    else:
+      if self.data_format == 'NHWC':
+        ksize = [1, k_height, k_width, 1]
+        strides = [1, d_height, d_width, 1]
+      else:
+        ksize = [1, 1, k_height, k_width]
+        strides = [1, 1, d_height, d_width]
+      pool = tf.nn.max_pool(input_layer, ksize, strides, padding=mode,
+                            data_format=self.data_format, name=name)
+    if pool_name == 'mpool':
+      mlperf.logger.log_max_pool(input_tensor=input_layer,
+                                 output_tensor=pool)
+    self.top_layer = pool
+    return pool
+  def mpool(self,
+            k_height,
+            k_width,
+            d_height=2,
+            d_width=2,
+            mode='VALID',
+            input_layer=None,
+            num_channels_in=None):
+    """Construct a max pooling layer."""
+    return self._pool('mpool', pooling_layers.max_pooling2d, k_height, k_width,
+                      d_height, d_width, mode, input_layer, num_channels_in)
+  def apool(self,
+            k_height,
+            k_width,
+            d_height=2,
+            d_width=2,
+            mode='VALID',
+            input_layer=None,
+            num_channels_in=None):
+    """Construct an average pooling layer."""
+    return self._pool('apool', pooling_layers.average_pooling2d, k_height,
+                      k_width, d_height, d_width, mode, input_layer,
+                      num_channels_in)
+  def reshape(self, shape, input_layer=None):
+    if input_layer is None:
+      input_layer = self.top_layer
+    self.top_layer = tf.reshape(input_layer, shape)
+    self.top_size = shape[-1]  # HACK This may not always work
+    return self.top_layer
+  def affine(self,
+             num_out_channels,
+             input_layer=None,
+             num_channels_in=None,
+             bias=0.0,
+             stddev=None,
+             activation='relu'):
+    if input_layer is None:
+      input_layer = self.top_layer
+    if num_channels_in is None:
+      num_channels_in = self.top_size
+    name = 'affine' + str(self.counts['affine'])
+    self.counts['affine'] += 1
+    with tf.variable_scope(name):
+      init_factor = 2. if activation == 'relu' else 1.
+      stddev = stddev or np.sqrt(init_factor / num_channels_in)
+      kernel = self.get_variable(
+          'weights', [num_channels_in, num_out_channels],
+          self.variable_dtype, self.dtype,
+          initializer=tf.truncated_normal_initializer(stddev=stddev))
+      biases = self.get_variable('biases', [num_out_channels],
+                                 self.variable_dtype, self.dtype,
+                                 initializer=tf.constant_initializer(bias))
+      mlperf.logger.log(key=mlperf.tags.MODEL_HP_DENSE,
+                        value=num_out_channels)
+      logits = tf.nn.xw_plus_b(input_layer, kernel, biases)
+      if activation == 'relu':
+        mlperf.logger.log(key=mlperf.tags.MODEL_HP_RELU)
+        affine1 = tf.nn.relu(logits, name=name)
+      elif activation == 'linear' or activation is None:
+        affine1 = logits
+      else:
+        raise KeyError('Invalid activation type \'%s\'' % activation)
+      self.top_layer = affine1
+      self.top_size = num_out_channels
+      return affine1
+  def inception_module(self, name, cols, input_layer=None, in_size=None):
+    if input_layer is None:
+      input_layer = self.top_layer
+    if in_size is None:
+      in_size = self.top_size
+    name += str(self.counts[name])
+    self.counts[name] += 1
+    with tf.variable_scope(name):
+      col_layers = []
+      col_layer_sizes = []
+      for c, col in enumerate(cols):
+        col_layers.append([])
+        col_layer_sizes.append([])
+        for l, layer in enumerate(col):
+          ltype, args = layer[0], layer[1:]
+          kwargs = {
+              'input_layer': input_layer,
+              'num_channels_in': in_size
+          } if l == 0 else {}
+          if ltype == 'conv':
+            self.conv(*args, **kwargs)
+          elif ltype == 'mpool':
+            self.mpool(*args, **kwargs)
+          elif ltype == 'apool':
+            self.apool(*args, **kwargs)
+          elif ltype == 'share':  # Share matching layer from previous column
+            self.top_layer = col_layers[c - 1][l]
+            self.top_size = col_layer_sizes[c - 1][l]
+          else:
+            raise KeyError(
+                'Invalid layer type for inception module: \'%s\'' % ltype)
+          col_layers[c].append(self.top_layer)
+          col_layer_sizes[c].append(self.top_size)
+      catdim = 3 if self.data_format == 'NHWC' else 1
+      self.top_layer = tf.concat([layers[-1] for layers in col_layers], catdim)
+      self.top_size = sum([sizes[-1] for sizes in col_layer_sizes])
+      return self.top_layer
+  def spatial_mean(self, keep_dims=False):
+    name = 'spatial_mean' + str(self.counts['spatial_mean'])
+    self.counts['spatial_mean'] += 1
+    axes = [1, 2] if self.data_format == 'NHWC' else [2, 3]
+    self.top_layer = tf.reduce_mean(
+        self.top_layer, axes, keepdims=keep_dims, name=name)
+    return self.top_layer
+  def dropout(self, keep_prob=0.5, input_layer=None):
+    if input_layer is None:
+      input_layer = self.top_layer
+    else:
+      self.top_size = None
+    name = 'dropout' + str(self.counts['dropout'])
+    with tf.variable_scope(name):
+      if not self.phase_train:
+        keep_prob = 1.0
+      if self.use_tf_layers:
+        dropout = core_layers.dropout(input_layer, 1. - keep_prob,
+                                      training=self.phase_train)
+      else:
+        dropout = tf.nn.dropout(input_layer, keep_prob)
+      self.top_layer = dropout
+      return dropout
+  def _batch_norm_without_layers(self, input_layer, decay, use_scale, epsilon):
+    """Batch normalization on `input_layer` without tf.layers."""
+    # We make this function as similar as possible to the
+    # tf.contrib.layers.batch_norm, to minimize the differences between using
+    # layers and not using layers.
+    shape = input_layer.shape
+    num_channels = shape[3] if self.data_format == 'NHWC' else shape[1]
+    beta = self.get_variable('beta', [num_channels], tf.float32, tf.float32,
+                             initializer=tf.zeros_initializer())
+    if use_scale:
+      gamma = self.get_variable('gamma', [num_channels], tf.float32,
+                                tf.float32, initializer=tf.ones_initializer())
+    else:
+      gamma = tf.constant(1.0, tf.float32, [num_channels])
+    # For moving variables, we use tf.get_variable instead of self.get_variable,
+    # since self.get_variable returns the result of tf.cast which we cannot
+    # assign to.
+    moving_mean = tf.get_variable('moving_mean', [num_channels],
+                                  tf.float32,
+                                  initializer=tf.zeros_initializer(),
+                                  trainable=False)
+    moving_variance = tf.get_variable('moving_variance', [num_channels],
+                                      tf.float32,
+                                      initializer=tf.ones_initializer(),
+                                      trainable=False)
+    if self.phase_train:
+      bn, batch_mean, batch_variance = tf.nn.fused_batch_norm(
+          input_layer, gamma, beta, epsilon=epsilon,
+          data_format=self.data_format, is_training=True)
+      mean_update = moving_averages.assign_moving_average(
+          moving_mean, batch_mean, decay=decay, zero_debias=False)
+      variance_update = moving_averages.assign_moving_average(
+          moving_variance, batch_variance, decay=decay, zero_debias=False)
+      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, mean_update)
+      tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, variance_update)
+    else:
+      bn, _, _ = tf.nn.fused_batch_norm(
+          input_layer, gamma, beta, mean=moving_mean,
+          variance=moving_variance, epsilon=epsilon,
+          data_format=self.data_format, is_training=False)
+    return bn
+  def batch_norm(self, input_layer=None, decay=0.999, scale=False,
+                 epsilon=0.001):
+    """Adds a Batch Normalization layer."""
+    if input_layer is None:
+      input_layer = self.top_layer
+    else:
+      self.top_size = None
+    name = 'batchnorm' + str(self.counts['batchnorm'])
+    self.counts['batchnorm'] += 1
+    center = True
+    with tf.variable_scope(name) as scope:
+      if self.use_tf_layers:
+        layer_obj = normalization_layers.BatchNormalization(
+            momentum=decay,
+            scale=scale,
+            epsilon=epsilon,
+            fused=True,
+            axis=_data_format_to_channel_axis[self.data_format],
+            # We pass this 'scope' argument for compatibility with checkpoints
+            # created with the contrib version of batch norm. tf_cnn_benchmarks
+            # used to use the contrib version.
+            _scope=scope,
+            center=center,
+            name=scope.name)
+        bn = layer_obj.apply(input_layer, training=self.phase_train)
+      else:
+        bn = self._batch_norm_without_layers(input_layer, decay, scale, epsilon)
+    self.top_layer = bn
+    self.top_size = bn.shape[3] if self.data_format == 'NHWC' else bn.shape[1]
+    self.top_size = int(self.top_size)
+    mlperf.logger.log_batch_norm(
+        input_tensor=input_layer, output_tensor=bn, momentum=decay,
+        epsilon=epsilon, center=center, scale=scale, training=self.phase_train)
+    return bn
+  def lrn(self, depth_radius, bias, alpha, beta):
+    """Adds a local response normalization layer."""
+    name = 'lrn' + str(self.counts['lrn'])
+    self.counts['lrn'] += 1
+    self.top_layer = tf.nn.lrn(
+        self.top_layer, depth_radius, bias, alpha, beta, name=name)
+    return self.top_layer
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/datasets.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmark dataset utilities.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from abc import abstractmethod
+import os
+import numpy as np
+import six
+from six.moves import cPickle
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow.compat.v1 as tf
+from tensorflow.python.platform import gfile
+import preprocessing
+IMAGENET_NUM_TRAIN_IMAGES = 1281167
+IMAGENET_NUM_VAL_IMAGES = 50000
+COCO_NUM_TRAIN_IMAGES = 118287
+COCO_NUM_VAL_IMAGES = 4952
+class Dataset(object):
+  """Abstract class for cnn benchmarks dataset."""
+  def __init__(self,
+               name,
+               data_dir=None,
+               queue_runner_required=False,
+               num_classes=None):
+    self.name = name
+    self.data_dir = data_dir
+    self._queue_runner_required = queue_runner_required
+    self._num_classes = num_classes
+  def tf_record_pattern(self, subset):
+    return os.path.join(self.data_dir, '%s-*-of-*' % subset)
+  def reader(self):
+    return tf.TFRecordReader()
+  @property
+  def num_classes(self):
+    return self._num_classes
+  @num_classes.setter
+  def num_classes(self, val):
+    self._num_classes = val
+  @abstractmethod
+  def num_examples_per_epoch(self, subset):
+    pass
+  def __str__(self):
+    return self.name
+  def get_input_preprocessor(self, input_preprocessor='default'):
+    assert not self.use_synthetic_gpu_inputs()
+    return _SUPPORTED_INPUT_PREPROCESSORS[self.name][input_preprocessor]
+  def queue_runner_required(self):
+    return self._queue_runner_required
+  def use_synthetic_gpu_inputs(self):
+    return not self.data_dir
+class LibrispeechDataset(Dataset):
+  """Configuration for LibriSpeech dataset."""
+  def __init__(self, data_dir=None):
+    super(LibrispeechDataset, self).__init__(
+        'librispeech', data_dir, num_classes=29)
+  def tf_record_pattern(self, subset):
+    if subset == 'train':
+      return os.path.join(self.data_dir, 'train-clean-*.tfrecords')
+    elif subset == 'validation':
+      return os.path.join(self.data_dir, 'test-clean.tfrecords')
+    else:
+      return ''
+  def num_examples_per_epoch(self, subset='train'):
+    del subset
+    return 2  # TODO(laigd): currently this is an arbitrary number.
+class ImageDataset(Dataset):
+  """Abstract class for image datasets."""
+  def __init__(self,
+               name,
+               height,
+               width,
+               depth=None,
+               data_dir=None,
+               queue_runner_required=False,
+               num_classes=1001):
+    super(ImageDataset, self).__init__(name, data_dir, queue_runner_required,
+                                       num_classes)
+    self.height = height
+    self.width = width
+    self.depth = depth or 3
+class ImagenetDataset(ImageDataset):
+  """Configuration for Imagenet dataset."""
+  def __init__(self, data_dir=None):
+    super(ImagenetDataset, self).__init__(
+        'imagenet', 300, 300, data_dir=data_dir)
+  def num_examples_per_epoch(self, subset='train'):
+    if subset == 'train':
+      return IMAGENET_NUM_TRAIN_IMAGES
+    elif subset == 'validation':
+      return IMAGENET_NUM_VAL_IMAGES
+    else:
+      raise ValueError('Invalid data subset "%s"' % subset)
+class Cifar10Dataset(ImageDataset):
+  """Configuration for cifar 10 dataset.
+  It will mount all the input images to memory.
+  """
+  def __init__(self, data_dir=None):
+    super(Cifar10Dataset, self).__init__(
+        'cifar10',
+        32,
+        32,
+        data_dir=data_dir,
+        queue_runner_required=True,
+        num_classes=11)
+  def read_data_files(self, subset='train'):
+    """Reads from data file and returns images and labels in a numpy array."""
+    assert self.data_dir, ('Cannot call `read_data_files` when using synthetic '
+                           'data')
+    if subset == 'train':
+      filenames = [
+          os.path.join(self.data_dir, 'data_batch_%d' % i)
+          for i in xrange(1, 6)
+      ]
+    elif subset == 'validation':
+      filenames = [os.path.join(self.data_dir, 'test_batch')]
+    else:
+      raise ValueError('Invalid data subset "%s"' % subset)
+    inputs = []
+    for filename in filenames:
+      with gfile.Open(filename, 'rb') as f:
+        # python2 does not have the encoding parameter
+        encoding = {} if six.PY2 else {'encoding': 'bytes'}
+        inputs.append(cPickle.load(f, **encoding))
+    # See http://www.cs.toronto.edu/~kriz/cifar.html for a description of the
+    # input format.
+    all_images = np.concatenate(
+        [each_input[b'data'] for each_input in inputs]).astype(np.float32)
+    all_labels = np.concatenate(
+        [each_input[b'labels'] for each_input in inputs])
+    return all_images, all_labels
+  def num_examples_per_epoch(self, subset='train'):
+    if subset == 'train':
+      return 50000
+    elif subset == 'validation':
+      return 10000
+    else:
+      raise ValueError('Invalid data subset "%s"' % subset)
+class COCODataset(ImageDataset):
+  """COnfiguration for COCO dataset."""
+  def __init__(self, data_dir=None, image_size=300):
+    super(COCODataset, self).__init__(
+        'coco', image_size, image_size, data_dir=data_dir, num_classes=81)
+  def num_examples_per_epoch(self, subset='train'):
+    if subset == 'train':
+      return COCO_NUM_TRAIN_IMAGES
+    elif subset == 'validation':
+      return COCO_NUM_VAL_IMAGES
+    else:
+      raise ValueError('Invalid data subset "%s"' % subset)
+_SUPPORTED_DATASETS = {
+    'imagenet': ImagenetDataset,
+    'cifar10': Cifar10Dataset,
+    'librispeech': LibrispeechDataset,
+    'coco': COCODataset,
+}
+_SUPPORTED_INPUT_PREPROCESSORS = {
+    'imagenet': {
+        'default': preprocessing.RecordInputImagePreprocessor,
+        'official_models_imagenet': preprocessing.ImagenetPreprocessor,
+    },
+    'cifar10': {
+        'default': preprocessing.Cifar10ImagePreprocessor
+    },
+    'librispeech': {
+        'default': preprocessing.LibrispeechPreprocessor
+    },
+    'coco': {
+        'default': preprocessing.COCOPreprocessor
+    },
+}
+def create_dataset(data_dir, data_name):
+  """Create a Dataset instance based on data_dir and data_name."""
+  if not data_dir and not data_name:
+    # When using synthetic data, use synthetic imagenet images by default.
+    data_name = 'imagenet'
+  # Infere dataset name from data_dir if data_name is not provided.
+  if data_name is None:
+    for supported_name in _SUPPORTED_DATASETS:
+      if supported_name in data_dir:
+        data_name = supported_name
+        break
+    else:  # Failed to identify dataset name from data dir.
+      raise ValueError('Could not identify name of dataset. '
+                       'Please specify with --data_name option.')
+  if data_name not in _SUPPORTED_DATASETS:
+    raise ValueError('Unknown dataset. Must be one of %s' % ', '.join(
+        [key for key in sorted(_SUPPORTED_DATASETS.keys())]))
+  return _SUPPORTED_DATASETS[data_name](data_dir)
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/flags.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains functions to define flags and params.
+Calling a DEFINE_* function will add a ParamSpec namedtuple to the param_spec
+dict. The DEFINE_* arguments match those in absl. Calling define_flags() creates
+a command-line flag for every ParamSpec defined by a DEFINE_* functions.
+The reason we don't use absl flags directly is that we want to be able to use
+tf_cnn_benchmarks as a library. When using it as a library, we don't want to
+define any flags, but instead pass parameters to the BenchmarkCNN constructor.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from collections import namedtuple
+from absl import flags as absl_flags
+import six
+FLAGS = absl_flags.FLAGS
+# ParamSpec describes one of benchmark_cnn.BenchmarkCNN's parameters.
+ParamSpec = namedtuple('_ParamSpec',
+                       ['flag_type', 'default_value', 'description',
+                        'kwargs'])
+# Maps from parameter name to its ParamSpec.
+param_specs = {}
+def DEFINE_string(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
+  param_specs[name] = ParamSpec('string', default, help, {})
+def DEFINE_boolean(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
+  param_specs[name] = ParamSpec('boolean', default, help, {})
+def DEFINE_integer(name, default, help, lower_bound=None, upper_bound=None):  # pylint: disable=invalid-name,redefined-builtin
+  kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
+  param_specs[name] = ParamSpec('integer', default, help, kwargs)
+def DEFINE_float(name, default, help, lower_bound=None, upper_bound=None):  # pylint: disable=invalid-name,redefined-builtin
+  kwargs = {'lower_bound': lower_bound, 'upper_bound': upper_bound}
+  param_specs[name] = ParamSpec('float', default, help, kwargs)
+def DEFINE_enum(name, default, enum_values, help):  # pylint: disable=invalid-name,redefined-builtin
+  kwargs = {'enum_values': enum_values}
+  param_specs[name] = ParamSpec('enum', default, help, kwargs)
+def DEFINE_list(name, default, help):  # pylint: disable=invalid-name,redefined-builtin
+  param_specs[name] = ParamSpec('list', default, help, {})
+def define_flags(specs=None):
+  """Define a command line flag for each ParamSpec in flags.param_specs."""
+  specs = specs or param_specs
+  define_flag = {
+      'boolean': absl_flags.DEFINE_boolean,
+      'float': absl_flags.DEFINE_float,
+      'integer': absl_flags.DEFINE_integer,
+      'string': absl_flags.DEFINE_string,
+      'enum': absl_flags.DEFINE_enum,
+      'list': absl_flags.DEFINE_list
+  }
+  for name, param_spec in six.iteritems(specs):
+    if param_spec.flag_type not in define_flag:
+      raise ValueError('Unknown flag_type %s' % param_spec.flag_type)
+    else:
+      define_flag[param_spec.flag_type](name, param_spec.default_value,
+                                        help=param_spec.description,
+                                        **param_spec.kwargs)
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/leading_indicators_test.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/leading_indicators_test.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmark various leading indicators CNNs.
+The purpose of these tests is to test each model as a high level baseline and
+to ensure the various variable_update options have not regressing. Not all
+options are tested.  The tests focus on the most viable options.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import ctypes
+import logging
+import os
+import sys
+from absl import flags
+from absl.testing import absltest  # pylint: disable=unused-import
+import tensorflow.compat.v1 as tf  # pylint: disable=g-bad-import-order
+import benchmark_cnn
+from platforms import util as platforms_util
+flags.DEFINE_integer('num_batches', None,
+                     'number of batches to run, excluding warmup')
+class BenchmarkBase(tf.test.Benchmark):
+  """Base class for all benchmarks in this file."""
+  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
+    """Base class for all benchmarks in this file.
+    Args:
+      output_dir: directory where to output e.g. log files
+      root_data_dir: directory under which to look for dataset
+      **kwargs: arbitrary named arguments. This is needed to make the
+                constructor forward compatible in case PerfZero provides more
+                named arguments before updating the constructor.
+    """
+    # Load default values if the benchmark is not run with absl.app.run()
+    if not flags.FLAGS.is_parsed():
+      flags.FLAGS.mark_as_parsed()
+    self.fake_data_dir = os.path.join(platforms_util.get_test_data_dir(),
+                                      'fake_tf_record_data')
+    self.output_dir = output_dir
+    if root_data_dir is None:
+      self.data_dir = ('/readahead/200M/placer/prod/home/distbelief/'
+                       'imagenet-tensorflow/imagenet-2012-tfrecord')
+    else:
+      self.data_dir = os.path.join(root_data_dir, 'imagenet')
+  def _run_benchmark(self, params):
+    """Run a CNN benchmark and report its results.
+    Args:
+      params: Params tuple, typically created by benchmark_cnn.make_params or
+        benchmark_cnn.make_params_from_flags.
+    """
+    logging.info('Running benchmark [%s]', self._get_name())
+    params = benchmark_cnn.setup(params)
+    bench = benchmark_cnn.BenchmarkCNN(params)
+    bench.print_info()
+    stats = bench.run()
+    extras = {}
+    extras['examples_per_sec'] = stats.get('images_per_sec')
+    if 'last_average_loss' in stats:
+      extras['last_average_loss'] = stats['last_average_loss']
+    if 'top_1_accuracy' in stats:
+      extras['top_1_accuracy'] = stats['top_1_accuracy']
+    if 'top_5_accuracy' in stats:
+      extras['top_5_accuracy'] = stats['top_5_accuracy']
+    self.report_benchmark(
+        iters=stats.get('num_steps'),
+        wall_time=stats.get('average_wall_time'),
+        extras=extras)
+  def _shared_params(self):
+    """Returns shared parameters for all benchmarks in this file."""
+    params = {}
+    if flags.FLAGS.num_batches is not None:
+      params['num_batches'] = flags.FLAGS.num_batches
+    if self.output_dir is not None:
+      params['benchmark_log_dir'] = self.output_dir
+    return benchmark_cnn.make_params(**params)
+  def _binary_search_batch_size(self, params, init_batch_size):
+    """Find the max batch_size using binary search."""
+    assert init_batch_size > 0
+    low_batch_size = 0
+    high_batch_size = None
+    batch_size = init_batch_size
+    # No need to run a warmup or many batches; if it doesn't OOM after 10
+    # batches, it should work in general.
+    params = params._replace(num_batches=10, num_warmup_batches=0)
+    # Find high_batch_size first.
+    tf.logging.info(
+        'Looking for upper bound to batch size, starting with %d' % batch_size)
+    while high_batch_size is None:
+      tf.logging.info('Trying batch_size %d' % batch_size)
+      params = params._replace(batch_size=batch_size)
+      bench = benchmark_cnn.BenchmarkCNN(params)
+      bench.print_info()
+      try:
+        bench.run()
+        low_batch_size = batch_size
+        batch_size *= 2
+      except tf.errors.ResourceExhaustedError:
+        high_batch_size = batch_size - 1
+    # Binary Search
+    tf.logging.info(
+        'Max batch size is in range (%d, %d].  Starting binary search to find '
+        'exact max batch size.' % (low_batch_size, batch_size))
+    while low_batch_size < high_batch_size:
+      batch_size = (low_batch_size + high_batch_size + 1) // 2
+      tf.logging.info('Trying batch_size %d' % batch_size)
+      params = params._replace(batch_size=batch_size)
+      bench = benchmark_cnn.BenchmarkCNN(params)
+      bench.print_info()
+      try:
+        bench.run()
+        low_batch_size = batch_size
+      except tf.errors.ResourceExhaustedError:
+        high_batch_size = batch_size - 1
+    self.report_benchmark(extras={'max_batch_size': low_batch_size})
+class Resnet50BenchmarksInferenceCpu(BenchmarkBase):
+  """"Benchmarks for ResNet50 inference on CPU."""
+  def _shared_params(self):
+    """Returns shared parameters for all ResNet50 benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        num_gpus=1,
+        model='resnet50',
+        num_warmup_batches=5,
+        num_batches=50,
+        distortions=False,
+        forward_only=True,
+        device='cpu',
+        data_format='NHWC',
+        num_intra_threads=0)
+  def benchmark_synth_forward_batch1(self):
+    """Tests 1 CPU batch size 1."""
+    params = self._shared_params()._replace(batch_size=1)
+    self._run_benchmark(params)
+  def benchmark_synth_forward_batch16(self):
+    """Tests 1 CPU batch size 16."""
+    params = self._shared_params()._replace(batch_size=16)
+    self._run_benchmark(params)
+class FrozenResnet50BenchmarksInferenceCpu(Resnet50BenchmarksInferenceCpu):
+  """"Benchmarks for ResNet50 frozen graph inference on CPU."""
+  def _shared_params(self):
+    return super(FrozenResnet50BenchmarksInferenceCpu,
+                 self)._shared_params()._replace(freeze_when_forward_only=True)
+class Resnet50BenchmarksInference(BenchmarkBase):
+  """"Benchmarks for ResNet50 inference."""
+  def _shared_params(self):
+    """Returns shared parameters for all ResNet50 benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        num_gpus=1, model='resnet50', distortions=False, forward_only=True)
+  def benchmark_synth_forward_batch128(self):
+    """Tests 1 GPU batch size 128."""
+    params = self._shared_params()._replace(batch_size=128)
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_forward_batch128(self):
+    """Tests 1 GPU batch size 128 FP16."""
+    params = self._shared_params()._replace(batch_size=128, use_fp16=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_forward_batch16(self):
+    """Tests 1 GPU batch size 16 FP16."""
+    params = self._shared_params()._replace(batch_size=16, use_fp16=True)
+    self._run_benchmark(params)
+  def benchmark_xla_synth_forward_batch128(self):
+    """Tests 1 GPU batch size 128 with XLA."""
+    params = self._shared_params()._replace(batch_size=128, xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_synth_forward_batch128(self):
+    """Tests 1 GPU batch size 128 FP16 with XLA."""
+    params = self._shared_params()._replace(
+        batch_size=128, use_fp16=True, xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_synth_forward_batch16(self):
+    """Tests 1 GPU batch size 16 FP16 with XLA."""
+    params = self._shared_params()._replace(
+        batch_size=16, use_fp16=True, xla=True)
+    self._run_benchmark(params)
+class FrozenResnet50BenchmarksInference(Resnet50BenchmarksInference):
+  """"Benchmarks for ResNet50 frozen graph inference."""
+  def _shared_params(self):
+    return super(FrozenResnet50BenchmarksInference,
+                 self)._shared_params()._replace(freeze_when_forward_only=True)
+  def benchmark_trt_synth_forward_batch128(self):
+    """Tests 1 GPU batch size 128."""
+    params = self._shared_params()._replace(batch_size=128, trt_mode='FP32')
+    self._run_benchmark(params)
+  # TODO(laigd): enable fp16 tests for TF-TRT, it's currently not supported yet.
+  # def benchmark_fp16_trt_synth_forward_batch128(self):
+  #   """Tests 1 GPU batch size 128 FP16."""
+  #   params = self._shared_params()._replace(
+  #       batch_size=128, use_fp16=True, trt_mode='FP16')
+  #   self._run_benchmark(params)
+  # Test with batch size 16 to compare with native TF GPU implementation and
+  # XLA.
+  # def benchmark_fp16_trt_synth_forward_batch16(self):
+  #   """Tests 1 GPU batch size 16 FP16."""
+  #   params = self._shared_params()._replace(
+  #       batch_size=16, use_fp16=True, trt_mode='FP16')
+  #   self._run_benchmark(params)
+class Resnet50Benchmarks(BenchmarkBase):
+  """"Benchmark resnet50 configurations."""
+  def _shared_params(self):
+    """Returns shared parameters for all ResNet50 benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        model='resnet50', batch_size=128, distortions=False,
+        optimizer='momentum')
+  def _shared_params_fp16(self):
+    """Returns shared parameters for all ResNet50 FP16 benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        model='resnet50',
+        batch_size=256,
+        distortions=False,
+        use_fp16=True,
+        optimizer='momentum',
+        loss_type_to_report='base_loss',
+        compute_lr_on_cpu=True,
+        single_l2_loss_op=True
+    )
+  def benchmark_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data."""
+    params = self._shared_params()._replace(num_gpus=1)
+    self._run_benchmark(params)
+  def benchmark_fake_1gpu_gpuparams(self):
+    """Tests 1 gpu with fake data."""
+    params = self._shared_params()._replace(
+        num_gpus=1, data_dir=self.fake_data_dir, data_name='imagenet')
+    self._run_benchmark(params)
+  def benchmark_synth_1gpu_max_batch_size(self):
+    """Finds largest batch size that can be run with 1 gpu using synth data."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server')
+    self._binary_search_batch_size(params, init_batch_size=128)
+  def benchmark_synth_4gpu_gpureplicated(self):
+    """Tests 4 gpu with synthetic data with parameters replicated."""
+    params = self._shared_params()._replace(
+        num_gpus=4,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  def benchmark_synth_8gpu_gpureplicated(self):
+    """Tests 8 gpu with synthetic data with parameters replicated."""
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  def benchmark_fake_8gpu_gpureplicated(self):
+    """Tests 8 gpu with fake data with parameters replicated."""
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  # FP16 mixed-precision tests.
+  def benchmark_fp16_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data with parameters on the gpu."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1, variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_1gpu_gpuparams_batch128(self):
+    """Tests 1 gpu with synthetic data with parameters on the gpu."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1, batch_size=128, variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_4gpu_gpureplicated(self):
+    """Tests 4 gpu with synthetic data with nccl and all_reduce."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=4,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_8gpu_gpureplicated(self):
+    """Tests 8 gpu with synthetic with nccl and all_reduce."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  def benchmark_fp16_fake_1gpu_gpuparams(self):
+    """Tests 1 gpus with fake data."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_fp16_fake_8gpu_gpureplicated(self):
+    """Tests 8 gpus with fake data."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  def benchmark_fp16_fakedistort_8gpu_gpureplicated(self):
+    """Tests 8 gpus with fake distorted data."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        distortions=True,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  # XLA versions of Resnet50 tests only for single GPU.
+  def benchmark_xla_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data with XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with fp16, synthetic data with XLA."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True)
+    self._run_benchmark(params)
+  # Test does not run as part of continuous testing on guitar.
+  def benchmark_ng_xla_batch64_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with XLA, synth data, and batch 64."""
+    params = self._shared_params()._replace(
+        num_gpus=1, batch_size=64, variable_update='parameter_server', xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_batch64_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with fp16, XLA, synth data, and batch 64."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1,
+        batch_size=64,
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_batch128_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with fp16, XLA, and synth data."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1,
+        batch_size=128,
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+  def benchmark_xla_synth_1gpu_max_batch_size(self):
+    """Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True)
+    self._binary_search_batch_size(params, init_batch_size=128)
+  def benchmark_xla_real_1gpu_gpuparams(self):
+    """Tests 1 gpu with real data with XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.data_dir,
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+  # Test does not run as part of continuous testing.
+  def benchmark_xla_fake_1gpu_gpuparams(self):
+    """Tests 1 gpu with fake data with XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+  # Test does not run as part of continuous testing.
+  def benchmark_xla_fakedistort_1gpu_gpuparams(self):
+    """Tests 1 gpu with fake distorted data with XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        distortions=True,
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+class Resnet50v15Benchmarks(BenchmarkBase):
+  """"Benchmark various ResNet50V1.5 configurations.
+  ResNetV1.5 differs from V1 in stride 2 is used in the first 3x3 convolution of
+  each block instead of the first 1x1 convolution.
+  """
+  def _shared_params_fp16(self):
+    """Returns shared parameters for all ResNet50v1.5 FP16 benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        model='resnet50_v1.5',
+        batch_size=256,
+        distortions=False,
+        use_fp16=True,
+        optimizer='momentum',
+        loss_type_to_report='base_loss',
+        compute_lr_on_cpu=True,
+        single_l2_loss_op=True
+    )
+  def benchmark_fp16_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data."""
+    params = self._shared_params_fp16()._replace(num_gpus=1)
+    self._run_benchmark(params)
+  def benchmark_fp16_batch256_synth_8gpu_gpuparams(self):
+    """Tests 8 gpus with synthetic data at batch 256."""
+    params = self._shared_params_fp16()._replace(num_gpus=8)
+    self._run_benchmark(params)
+  def benchmark_fp16_batch128_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
+    params = self._shared_params_fp16()._replace(num_gpus=1, batch_size=128)
+    self._run_benchmark(params)
+  def benchmark_fp16_fake_1gpu_gpuparams(self):
+    """Tests 1 gpu with fake data."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1, data_dir=self.fake_data_dir, data_name='imagenet')
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_8gpu_gpureplicated(self):
+    """Tests 8 gpu with synthetic data with parameters replicated."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8,
+        num_batches=200,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  def benchmark_fp16_fake_8gpu_gpureplicated(self):
+    """Tests 8 gpu with fake data with parameters replicated."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8,
+        num_batches=200,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  # XLA versions of Resnet50v1.5 tests.
+  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with fp16, synthetic data with XLA."""
+    params = self._shared_params_fp16()._replace(num_gpus=1, xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_batch128_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with fp16, batch128, synthetic data with XLA."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1, batch_size=128, xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_compile_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data."""
+    params = self._shared_params_fp16()._replace(num_gpus=1, xla_compile=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_compile_batch128_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data at batch 128 (useful for small GPUs)."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1, num_batches=200, batch_size=128, xla_compile=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_batch256_synth_8gpu_gpuparams(self):
+    """Tests 8 gpu with synthetic data and xla autojit."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8, num_batches=200, batch_size=256, xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_compile_fake_1gpu_gpuparams(self):
+    """Tests 1 gpu with fake data."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=1,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        xla_compile=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_compile_synth_8gpu_gpureplicated(self):
+    """Tests 8 gpu with synthetic data with parameters replicated."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8,
+        num_batches=200,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2,
+        xla_compile=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_synth_8gpu_gpureplicated(self):
+    """Tests 8 gpu with synthetic data with parameters replicated."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8,
+        num_batches=200,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2,
+        xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_compile_fake_8gpu_gpureplicated(self):
+    """Tests 8 gpu with fake data with parameters replicated."""
+    params = self._shared_params_fp16()._replace(
+        num_gpus=8,
+        num_batches=200,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2,
+        xla_compile=True)
+    self._run_benchmark(params)
+class Vgg16Benchmarks(BenchmarkBase):
+  """"Benchmark various vgg16 configurations."""
+  def _shared_params(self):
+    """Returns shared parameters for all vgg16 benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        model='vgg16', batch_size=128, distortions=False)
+  def benchmark_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data with parameters on gpu."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data with parameters on gpu."""
+    params = self._shared_params()._replace(
+        num_gpus=1, use_fp16=True, variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_synth_8gpu_gpureplicated(self):
+    """Tests 8 gpu with synthetic data with parameters replicated."""
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        all_reduce_spec='nccl',
+        variable_update='replicated',
+        compact_gradient_transfer=False,
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  # XLA versions of VGG16 tests only for single GPU.
+  def benchmark_xla_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data and XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with fp16, synthetic data, and XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
+    self._run_benchmark(params)
+  # Test does not run as part of continuous testing.
+  def benchmark_xla_fake_1gpu_gpuparams(self):
+    """Tests 1 gpu with fake data and XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+  def benchmark_xla_real_1gpu_gpuparams(self):
+    """Tests 1 gpu with real data and XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.data_dir,
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+class TrivialBenchmarks(BenchmarkBase):
+  """"Benchmarks for trivial model.
+  The purpose of these tests is to verify the upper bound for the input
+  pipeline. Fake data creates an upperbound on the input pipeline throughput.
+  """
+  def _shared_params(self):
+    """Returns shared parameters for all trivial benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        model='trivial',
+        num_gpus=8,
+        distortions=False,
+        variable_update='independent',
+        data_dir=self.fake_data_dir)
+  def benchmark_fake_64batch(self):
+    params = self._shared_params()._replace(batch_size=64, data_name='imagenet')
+    self._run_benchmark(params)
+  def benchmark_fake_128batch(self):
+    params = self._shared_params()._replace(
+        batch_size=128, data_name='imagenet')
+    self._run_benchmark(params)
+  def benchmark_fake_256batch(self):
+    params = self._shared_params()._replace(
+        batch_size=256, data_name='imagenet')
+    self._run_benchmark(params)
+  def benchmark_fakedistort_128batch(self):
+    params = self._shared_params()._replace(
+        batch_size=128, data_name='imagenet', distortions=True)
+    self._run_benchmark(params)
+class AlexnetBenchmarks(BenchmarkBase):
+  """"Benchmarks for alexnet."""
+  def _shared_params(self):
+    """Returns shared parameters for all alexnet benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        model='alexnet', batch_size=512, distortions=False)
+  def benchmark_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data with parameters on gpu."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data with parameters on gpu."""
+    params = self._shared_params()._replace(
+        num_gpus=1, use_fp16=True, variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_synth_8gpu_gpureplicated(self):
+    """Tests 8 gpus with synthetic data with parameters replicated."""
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        compact_gradient_transfer=False,
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  def benchmark_fake_8gpu_gpureplicated(self):
+    """Tests 8 gpus with fake data with parameters replicated."""
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        compact_gradient_transfer=False,
+        gradient_repacking=2)
+    self._run_benchmark(params)
+  # XLA Benchmark tests for AlexNet.
+  def benchmark_xla_synth_1gpuparams(self):
+    """Tests 1 gpu with synthetic data and XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with fp16, synthetic data and XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
+    self._run_benchmark(params)
+  # Test does not run as part of continuous testing.
+  def benchmark_xla_fake_1gpuparams(self):
+    """Tests 1 gpu with fake data and XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+  def benchmark_xla_real_1gpuparams(self):
+    """Tests 1 gpu with real data and XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.data_dir,
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+class InceptionV3Benchmarks(BenchmarkBase):
+  """"Benchmark for InceptionV3."""
+  def _shared_params(self):
+    """Returns shared parameters for all InceptionV3 benchmarks."""
+    return BenchmarkBase._shared_params(self)._replace(
+        model='inception3', batch_size=64, distortions=False)
+  def benchmark_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic data."""
+    params = self._shared_params()._replace(
+        num_gpus=1, use_fp16=True, variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_synth_1gpu_max_batch_size(self):
+    """Finds largest batch size that can be run with 1 gpu using synth data."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server')
+    self._binary_search_batch_size(params, init_batch_size=128)
+  def benchmark_xla_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with synthetic and  XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
+    """Tests 1 gpu with fp16, XLA and synthetic data."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True, use_fp16=True)
+    self._run_benchmark(params)
+  def benchmark_xla_synth_1gpu_max_batch_size(self):
+    """Finds largest batch that can be run with XLA, 1 gpu, and synth data."""
+    params = self._shared_params()._replace(
+        num_gpus=1, variable_update='parameter_server', xla=True)
+    self._binary_search_batch_size(params, init_batch_size=128)
+  # Test does not run as part of continuous testing.
+  def benchmark_xla_fake_1gpu_gpuparams(self):
+    """Tests 1 gpu with fake data with XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.fake_data_dir,
+        data_name='imagenet',
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+  def benchmark_xla_real_1gpu_gpuparams(self):
+    """Tests 1 gpu with real data with XLA."""
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        data_dir=self.data_dir,
+        variable_update='parameter_server',
+        xla=True)
+    self._run_benchmark(params)
+class NcfBenchmarks(BenchmarkBase):
+  """Benchmarks for neural collaborative filtering."""
+  def _shared_params(self):
+    return BenchmarkBase._shared_params(self)._replace(
+        model='ncf', batch_size=64*1024, num_gpus=1, num_warmup_batches=1)
+  def benchmark_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_fp16_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        variable_update='parameter_server', use_fp16=True)
+    self._run_benchmark(params)
+  def benchmark_xla_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        variable_update='parameter_server', xla=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        variable_update='parameter_server', xla=True, use_fp16=True)
+    self._run_benchmark(params)
+  def benchmark_xla_compile_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        variable_update='parameter_server', xla_compile=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_xla_compile_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        variable_update='parameter_server', xla_compile=True, use_fp16=True)
+    self._run_benchmark(params)
+class DeepSpeech2Benchmarks(BenchmarkBase):
+  """Benchmarks for DeepSpeech2 model."""
+  def _shared_params(self):
+    return BenchmarkBase._shared_params(self)._replace(
+        model='deepspeech2', batch_size=32, num_gpus=1, data_name='librispeech')
+  def benchmark_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(variable_update='parameter_server')
+    self._run_benchmark(params)
+  def benchmark_xla_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        variable_update='parameter_server', xla=True)
+    self._run_benchmark(params)
+  def benchmark_xla_compile_synth_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        variable_update='parameter_server', xla_compile=True)
+    self._run_benchmark(params)
+class SsdBenchmarks(BenchmarkBase):
+  """Benchmarks for SSD model."""
+  def _cudnn_version(self):
+    if sys.platform == 'win32':
+      return None
+    lib = ctypes.cdll.LoadLibrary(None)
+    if hasattr(lib, 'cudnnGetErrorString'):
+      version = lib.cudnnGetVersion()
+      return version
+    return None
+  def _shared_params(self):
+    cudnn_version = self._cudnn_version()
+    if cudnn_version is None or cudnn_version < 7300:
+      raise RuntimeError(
+          'Needs at least cuDNN 7.3 to work with fp16 (b/112048183). '
+          'Build with --define=use_experimental_cudnn=1')
+    return BenchmarkBase._shared_params(self)._replace(
+        # TODO(b/115672206): Replace backbone model and data dir with replicated
+        # placer location for better performance.
+        backbone_model_path=platforms_util.get_ssd_backborn_model_file(),  # pylint: disable=line-too-long
+        data_dir=platforms_util.get_ssd_backboard_data_dir(),
+        batch_size=128,
+        data_name='coco',
+        model='ssd300',
+        num_batches=10,
+        num_warmup_batches=1,
+        num_gpus=1,
+        optimizer='momentum',
+        momentum=0.9,
+        weight_decay=5e-4,
+        loss_type_to_report='base_loss',
+        single_l2_loss_op=True,
+        compute_lr_on_cpu=True,
+    )
+  def benchmark_xla_compile_real_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        num_gpus=1,
+        xla_compile=True,
+    )
+    self._run_benchmark(params)
+  def benchmark_real_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(num_gpus=1,)
+    self._run_benchmark(params)
+  def benchmark_xla_compile_fp16_real_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        num_gpus=1, xla_compile=True, use_fp16=True)
+    self._run_benchmark(params)
+  def benchmark_fp16_real_1gpu_gpuparams(self):
+    params = self._shared_params()._replace(num_gpus=1, use_fp16=True)
+    self._run_benchmark(params)
+  def benchmark_xla_compile_real_8gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        xla_compile=True,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2,
+        num_batches=50,
+    )
+    self._run_benchmark(params)
+  def benchmark_real_8gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2,
+        num_batches=50,
+    )
+    self._run_benchmark(params)
+  def benchmark_xla_compile_fp16_real_8gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        xla_compile=True,
+        use_fp16=True,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2,
+        num_batches=50,
+    )
+    self._run_benchmark(params)
+  def benchmark_fp16_real_8gpu_gpuparams(self):
+    params = self._shared_params()._replace(
+        num_gpus=8,
+        use_fp16=True,
+        variable_update='replicated',
+        all_reduce_spec='nccl',
+        gradient_repacking=2,
+        num_batches=50,
+    )
+    self._run_benchmark(params)
+if __name__ == '__main__':
+  tf.disable_v2_behavior()
+  tf.test.main()
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains functions related to MLPerf compliance.
+MLPerf requires submissions to log what the benchmark does, in order to verify
+that the benchmark meets the MLPerf requirements. This module contains a global
+object `logger` that is used by other files to log what tf_cnn_benchmarks does
+for compliance.
+By default, `logger` does nothing, as the MLPerf compliance logs are verbose and
+unnecessary if one is not concerned about MLPerf compliance. The logger can be
+enabled by using the `mlperf_logger` context manager.
+To enable the logger with `mlperf_logger`, the MLPerf compliance library at
+https://github.com/mlperf/training/tree/master/compliance is required. If
+the logger is not enabled, the library is not needed.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from collections import namedtuple
+import contextlib
+import os
+import sys
+import tensorflow.compat.v1 as tf
+# pylint: disable=g-import-not-at-top
+try:
+  # Not all users have the MLPerf compliance library, so we don't want to
+  # unconditionally crash if these imports fail.
+  from mlperf_compliance import mlperf_log
+  from mlperf_compliance import resnet_log_helper
+  from mlperf_compliance import tags
+  import_successful = True
+except ImportError:
+  # The logger cannot be enabled in this case since the MLPerf library isn't
+  # found. We return empty strings from the `tags` attribute so that
+  # the benchmark can still run without crashing. This empty tags are passed
+  # to an instance of `NullMlPerfLogger`, which does not log anything and
+  # ignores the tag values.
+  class _Tags(object):
+    def __getattr__(self, item):
+      return ''
+  tags = _Tags()
+  import_successful = False
+# pylint: enable=g-import-not-at-top
+_ModelInfo = namedtuple('_ModelInfo', ['print_fn', 'tag_set',
+                                       'mlperf_model_name'])
+_MLPERF_LOG_PREFIX = ':::MLPv0.5.0'
+class MlPerfLogger(object):
+  """Logs various aspects about a benchmark run for MLPerf compliance."""
+  def __init__(self, model):
+    self._root_dir = os.path.split(os.path.abspath(__file__))[0]
+    mlperf_log.ROOT_DIR_RESNET = self._root_dir
+    mlperf_log.ROOT_DIR_SSD = self._root_dir
+    self.model = model
+    model_to_info = {
+        'resnet50_v1.5': _ModelInfo(mlperf_log.resnet_print,
+                                    mlperf_log.RESNET_TAG_SET, tags.RESNET),
+        'ssd300': _ModelInfo(mlperf_log.ssd_print, mlperf_log.SSD_TAG_SET,
+                             tags.SSD)
+    }
+    try:
+      self._log_fn, self.tag_set, self.mlperf_model_name = model_to_info[model]
+    except KeyError:
+      raise ValueError('--ml_perf_compliance_logging is only compatible when '
+                       '--model is one of the following: ' +
+                       ', '.join(model_to_info.keys()))
+  def log(self, key, value=None, stack_offset=2):
+    if key in self.tag_set:
+      self._log_fn(key, value, stack_offset)
+    else:
+      print('Ignoring MLPerf logging item key=%s, value=%s for model %s' %
+            (key, value, self.model))
+  def log_deferred_tensor_value(self, key, tensor_value, global_step,
+                                stack_offset=2, every_n=1):
+    """Logs the value of a tensor when the graph is run."""
+    caller = '(%s)' % mlperf_log.get_caller(stack_offset, self._root_dir)
+    def create_print_op():
+      return tf.print(_MLPERF_LOG_PREFIX, self.mlperf_model_name,
+                      tf.timestamp(), caller, key,
+                      ': { "deferred": true, "value":', tensor_value, '}',
+                      output_stream=sys.stdout)
+    maybe_print = tf.cond(tf.equal(global_step % every_n, 0), create_print_op,
+                          tf.no_op)
+    with tf.control_dependencies([maybe_print]):
+      return tf.identity(tensor_value)
+  def log_max_pool(self, input_tensor, output_tensor):
+    if self.model == 'resnet50_v1.5':
+      resnet_log_helper.log_max_pool(input_tensor, output_tensor)
+  def log_begin_block(self, input_tensor, block_type):
+    if self.model == 'resnet50_v1.5':
+      resnet_log_helper.log_begin_block(input_tensor, block_type)
+  def log_end_block(self, output_tensor):
+    if self.model == 'resnet50_v1.5':
+      resnet_log_helper.log_end_block(output_tensor)
+  def log_projection(self, input_tensor, output_tensor):
+    if self.model == 'resnet50_v1.5':
+      resnet_log_helper.log_projection(input_tensor, output_tensor)
+  def log_conv2d(self, input_tensor, output_tensor, stride_height, stride_width,
+                 filters, initializer, use_bias):
+    """Log a conv2d call."""
+    if self.model == 'resnet50_v1.5':
+      assert stride_height == stride_width, (
+          '--ml_perf_compliance_logging does not support convolutions where '
+          'the stride height is not equal to the stride width. '
+          'stride_height=%d, stride_width=%d' % (stride_height, stride_width))
+      if isinstance(initializer, tf.truncated_normal_initializer) or (
+          isinstance(initializer, tf.variance_scaling_initializer) and
+          initializer.distribution == 'truncated_normal'):
+        initializer = tags.TRUNCATED_NORMAL
+      elif (isinstance(initializer, tf.glorot_uniform_initializer) or
+            initializer is None):
+        initializer = 'glorot_uniform'
+      resnet_log_helper.log_conv2d(input_tensor, output_tensor, stride_width,
+                                   filters, initializer, use_bias)
+  def log_batch_norm(self, input_tensor, output_tensor, momentum, epsilon,
+                     center, scale, training):
+    if self.model == 'resnet50_v1.5':
+      resnet_log_helper.log_batch_norm(input_tensor, output_tensor, momentum,
+                                       epsilon, center, scale, training)
+  def log_train_epochs(self, num_epochs):
+    """Logs all the TRAIN_EPOCHs log lines."""
+    num_epochs_int = int(num_epochs)
+    for i in range(num_epochs_int):
+      # MLPerf allows us to print all the train epochs at once instead of
+      # printing them as we do them.
+      self.log(key=mlperf_log.TRAIN_EPOCH, value=i, stack_offset=3)
+    if num_epochs_int != num_epochs:
+      value = (str(num_epochs_int) +
+               ', but this epoch only has {}% of the examples of a normal epoch'
+               .format(100 * (num_epochs - num_epochs_int)))
+      self.log(key=mlperf_log.TRAIN_EPOCH, value=value, stack_offset=3)
+  def log_input_resize_aspect_preserving(self, height, width, scale_factor):
+    assert height == width, (
+        '--ml_perf_compliance_logging does not support models with nonsquare '
+        'images. Cannot process image with height=%d and width=%d' %
+        (height, width))
+    self.log(key=tags.INPUT_RESIZE_ASPECT_PRESERVING,
+             value={'min': int(height * scale_factor)})
+  def log_eval_epoch(self, tag, global_step, batch_size, stack_offset=2):
+    if self.model == 'resnet50_v1.5':
+      self.log(key=tag, stack_offset=stack_offset+1)
+    elif self.model == 'ssd300':
+      epoch = int(global_step * batch_size / 118287)
+      self.log(key=tag, value=epoch, stack_offset=stack_offset+1)
+  def log_eval_accuracy(self, accuracy, global_step, batch_size,
+                        examples_per_epoch, stack_offset=2):
+    """Logs eval accuracy."""
+    epoch = int(global_step * batch_size / examples_per_epoch)
+    eval_accuracy = {'epoch': epoch, 'value': accuracy}
+    eval_iteration_accuracy = {'iteration': global_step, 'value': accuracy}
+    self.log(key=tags.EVAL_ACCURACY, value=eval_accuracy,
+             stack_offset=stack_offset+1)
+    self.log(key=tags.EVAL_ITERATION_ACCURACY,
+             value=eval_iteration_accuracy,
+             stack_offset=stack_offset+1)
+def _empty_fn(*args, **kwargs):
+  del args, kwargs
+class NullMlPerfLogger(object):
+  """A version of `MlPerfLogger` that does not log anything.
+  This class has the same interface as `MlPerfLogger`, but does not actually do
+  anything. This is used when logging is disabled, which is the default
+  behavior.
+  """
+  def __getattr__(self, item):
+    return _empty_fn
+  def log_deferred_tensor_value(self, key, tensor_value, *args, **kwargs):
+    del key, args, kwargs
+    return tensor_value
+# A global singleton logger. By default, it's the null logger but can be
+# switched to an MlPerfLogger with `mlperf_logger()`.
+logger = NullMlPerfLogger()
+@contextlib.contextmanager
+def mlperf_logger(use_mlperf_logger, model):
+  """Optionally enable the mlperf logger.
+  If `use_mlperf_logger` is True, sets the `logger` global variable to an
+  instance of MlPerfLogger that will print logs for MLPerf compliance. If
+  `use_mlperf_logger` is False, does nothing.
+  Args:
+    use_mlperf_logger: If True, enables the mlperf logger. If False, this
+      function does nothing.
+    model: The model that will be logged. Required, because different models
+      must log different things for MLPerf compliance.
+  Yields:
+    Nothing.
+  Raises:
+    ImportError: If `use_mlperf_logger` is True but the MLPerf compliance
+      library cannot be imported
+  """
+  global logger
+  if use_mlperf_logger:
+    if not import_successful:
+      raise ImportError('Failed to import MLPerf compliance library, which is '
+                        'required when --ml_perf_compliance_logging is '
+                        'specified. Clone this repo and add this directory '
+                        'https://github.com/mlperf/training/tree/master/'
+                        'compliance to the PYTHONPATH environmental variable.')
+    logger_ = MlPerfLogger(model)
+    old_logger = logger
+    try:
+      logger = logger_
+      yield
+    finally:
+      logger = old_logger
+  else:
+    yield
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/mlperf_test.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Contains tests related to MLPerf.
+Note this test only passes if the MLPerf compliance library is installed.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from collections import Counter
+import logging
+import re
+import six
+import tensorflow.compat.v1 as tf
+import benchmark_cnn
+import datasets
+import mlperf
+import test_util
+from models import model
+from mlperf_compliance import mlperf_log
+class _MlPerfTestModel(model.CNNModel):
+  """A model to test the MLPerf compliance logging on."""
+  def __init__(self):
+    super(_MlPerfTestModel, self).__init__(
+        'mlperf_test_model', image_size=224, batch_size=2, learning_rate=1)
+  def add_inference(self, cnn):
+    assert cnn.top_layer.shape[1:] == (3, 224, 224)
+    cnn.conv(1, 1, 1, 1, 1, use_batch_norm=True)
+    cnn.mpool(1, 1, 1, 1, num_channels_in=1)
+    cnn.reshape([-1, 224 * 224])
+    cnn.affine(1, activation=None)
+    # Assert that the batch norm variables are filtered out for L2 loss.
+    variables = tf.global_variables() + tf.local_variables()
+    assert len(variables) > len(self.filter_l2_loss_vars(variables))
+class MlPerfComplianceTest(tf.test.TestCase):
+  """Tests the MLPerf compliance logs.
+  This serves as a quick check that we probably didn't break the compliance
+  logging. It is not mean to be as comprehensive as the official MLPerf
+  compliance checker will be.
+  """
+  def setUp(self):
+    super(MlPerfComplianceTest, self).setUp()
+    benchmark_cnn.setup(benchmark_cnn.make_params())
+  # Map between regex and the number of times we expect to see that regex in the
+  # logs. Entry commented out with the comment FIXME indicate that
+  # tf_cnn_benchmarks currently fails compliance in that regard, and needs to be
+  # fixed to be MLPerf compliant.
+  EXPECTED_LOG_REGEXES = {
+      # Preprocessing tags
+      mlperf.tags.INPUT_ORDER: 2,  # 1 for training, 1 for eval
+      # We pass --tf_random_seed=9876 in the test.
+      r'%s: 9876' % mlperf.tags.RUN_SET_RANDOM_SEED: 2,
+      # The Numpy random seed is hardcoded to 4321.
+      r'%s: 4321' % mlperf.tags.RUN_SET_RANDOM_SEED: 2,
+      r'%s: %d' % (mlperf.tags.PREPROC_NUM_TRAIN_EXAMPLES,
+                   datasets.IMAGENET_NUM_TRAIN_IMAGES): 1,
+      r'%s: %d' % (mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES,
+                   datasets.IMAGENET_NUM_VAL_IMAGES): 1,
+      mlperf.tags.PREPROC_NUM_EVAL_EXAMPLES + '.*': 1,
+      mlperf.tags.INPUT_DISTORTED_CROP_MIN_OBJ_COV + '.*': 1,
+      mlperf.tags.INPUT_DISTORTED_CROP_RATIO_RANGE + '.*': 1,
+      mlperf.tags.INPUT_DISTORTED_CROP_AREA_RANGE + '.*': 1,
+      mlperf.tags.INPUT_DISTORTED_CROP_MAX_ATTEMPTS + '.*': 1,
+      mlperf.tags.INPUT_RANDOM_FLIP + '.*': 1,
+      r'%s: \[224, 224\].*' % mlperf.tags.INPUT_CENTRAL_CROP: 1,
+      r'%s: \[123.68, 116.78, 103.94\].*' % mlperf.tags.INPUT_MEAN_SUBTRACTION:
+          2,
+      r'%s: {"min": 256}.*' % mlperf.tags.INPUT_RESIZE_ASPECT_PRESERVING: 1,
+      # 1 for training, 1 for eval
+      r'%s: \[224, 224\].*' % mlperf.tags.INPUT_RESIZE: 2,
+      # Resnet model tags
+      mlperf.tags.MODEL_HP_BATCH_NORM + '.*': 2,
+      # 2 for training, 2 for eval. Although there's only 1 conv2d, each conv2d
+      # produces 2 logs.
+      mlperf.tags.MODEL_HP_CONV2D_FIXED_PADDING + '.*': 4,
+      mlperf.tags.MODEL_HP_RELU + '.*': 2,
+      mlperf.tags.MODEL_HP_INITIAL_MAX_POOL + '.*': 2,
+      mlperf.tags.MODEL_HP_DENSE + '.*': 4,
+      mlperf.tags.MODEL_HP_DENSE + '.*': 4,
+      # Note that tags our test model does not emit, like MODEL_HP_SHORTCUT_ADD,
+      # are omitted here.
+      r'%s: "categorical_cross_entropy".*' % mlperf.tags.MODEL_HP_LOSS_FN: 1,
+      # 1 for training, 2 because the _MlPerfTestModel calls this when building
+      # the model for both training and eval
+      r'%s: true' % mlperf.tags.MODEL_EXCLUDE_BN_FROM_L2: 3,
+      r'%s: 0.5.*' % mlperf.tags.MODEL_L2_REGULARIZATION: 1,
+      # Note we do not handle OPT_LR, since that is printed to stderr using
+      # tf.Print, which we cannot easily intercept.
+      # Other tags
+      '%s: "%s"' % (mlperf.tags.OPT_NAME, mlperf.tags.SGD_WITH_MOMENTUM): 1,
+      '%s: 0.5' % mlperf.tags.OPT_MOMENTUM: 1,
+      mlperf.tags.RUN_START: 1,
+      '%s: 2' % mlperf.tags.INPUT_BATCH_SIZE: 1,
+      mlperf.tags.TRAIN_LOOP: 1,
+      mlperf.tags.TRAIN_EPOCH + '.*': 1,
+      '%s: 2' % mlperf.tags.INPUT_SIZE: 2,
+      mlperf.tags.EVAL_START: 2,
+      mlperf.tags.EVAL_STOP: 2,
+      '%s: 6' % mlperf.tags.EVAL_SIZE: 2,
+      mlperf.tags.EVAL_ACCURACY + '.*': 2,
+      '%s: 2.0' % mlperf.tags.EVAL_TARGET: 2,
+      mlperf.tags.RUN_STOP + '.*': 1,
+      mlperf.tags.RUN_FINAL: 1
+  }
+  EXPECTED_LOG_REGEXES = Counter({re.compile(k): v for
+                                  k, v in EXPECTED_LOG_REGEXES.items()})
+  def testMlPerfCompliance(self):
+    string_io = six.StringIO()
+    handler = logging.StreamHandler(string_io)
+    data_dir = test_util.create_black_and_white_images()
+    try:
+      mlperf_log.LOGGER.addHandler(handler)
+      params = benchmark_cnn.make_params(data_dir=data_dir,
+                                         data_name='imagenet',
+                                         batch_size=2,
+                                         num_warmup_batches=0,
+                                         num_batches=2,
+                                         num_eval_batches=3,
+                                         eval_during_training_every_n_steps=1,
+                                         distortions=False,
+                                         weight_decay=0.5,
+                                         optimizer='momentum',
+                                         momentum=0.5,
+                                         stop_at_top_1_accuracy=2.0,
+                                         tf_random_seed=9876,
+                                         ml_perf=True)
+      with mlperf.mlperf_logger(use_mlperf_logger=True, model='resnet50_v1.5'):
+        bench_cnn = benchmark_cnn.BenchmarkCNN(params, model=_MlPerfTestModel())
+        bench_cnn.run()
+      logs = string_io.getvalue().splitlines()
+      log_regexes = Counter()
+      for log in logs:
+        for regex in self.EXPECTED_LOG_REGEXES:
+          if regex.search(log):
+            log_regexes[regex] += 1
+      if log_regexes != self.EXPECTED_LOG_REGEXES:
+        diff_counter = Counter(log_regexes)
+        diff_counter.subtract(self.EXPECTED_LOG_REGEXES)
+        differences = []
+        for regex in (k for k in diff_counter.keys() if diff_counter[k]):
+          found_count = log_regexes[regex]
+          expected_count = self.EXPECTED_LOG_REGEXES[regex]
+          differences.append('  For regex %s: Found %d lines matching but '
+                             'expected to find %d' %
+                             (regex.pattern, found_count, expected_count))
+        raise AssertionError('Logs did not match expected logs. Differences:\n'
+                             '%s' % '\n'.join(differences))
+    finally:
+      mlperf_log.LOGGER.removeHandler(handler)
+if __name__ == '__main__':
+  tf.disable_v2_behavior()
+  tf.test.main()
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/__init__.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/__init__.py
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/alexnet_model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Alexnet model configuration.
+References:
+  Krizhevsky, Alex, Ilya Sutskever, and Geoffrey E. Hinton
+  ImageNet Classification with Deep Convolutional Neural Networks
+  Advances in Neural Information Processing Systems. 2012
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow.compat.v1 as tf
+from models import model
+class AlexnetModel(model.CNNModel):
+  """Alexnet cnn model."""
+  def __init__(self, params=None):
+    super(AlexnetModel, self).__init__(
+        'alexnet', 224 + 3, 512, 0.005, params=params)
+  def add_inference(self, cnn):
+    # Note: VALID requires padding the images by 3 in width and height
+    cnn.conv(64, 11, 11, 4, 4, 'VALID')
+    cnn.mpool(3, 3, 2, 2)
+    cnn.conv(192, 5, 5)
+    cnn.mpool(3, 3, 2, 2)
+    cnn.conv(384, 3, 3)
+    cnn.conv(384, 3, 3)
+    cnn.conv(256, 3, 3)
+    cnn.mpool(3, 3, 2, 2)
+    cnn.reshape([-1, 256 * 6 * 6])
+    cnn.affine(4096)
+    cnn.dropout()
+    cnn.affine(4096)
+    cnn.dropout()
+class AlexnetCifar10Model(model.CNNModel):
+  """Alexnet cnn model for cifar datasets.
+  The model architecture follows the one defined in the tensorflow tutorial
+  model.
+  Reference model: tensorflow/models/tutorials/image/cifar10/cifar10.py
+  Paper: http://www.cs.toronto.edu/~kriz/learning-features-2009-TR.pdf
+  """
+  def __init__(self, params=None):
+    super(AlexnetCifar10Model, self).__init__(
+        'alexnet', 32, 128, 0.1, params=params)
+  def add_inference(self, cnn):
+    cnn.conv(64, 5, 5, 1, 1, 'SAME', stddev=5e-2)
+    cnn.mpool(3, 3, 2, 2, mode='SAME')
+    cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
+    cnn.conv(64, 5, 5, 1, 1, 'SAME', bias=0.1, stddev=5e-2)
+    cnn.lrn(depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)
+    cnn.mpool(3, 3, 2, 2, mode='SAME')
+    shape = cnn.top_layer.get_shape().as_list()
+    flat_dim = shape[1] * shape[2] * shape[3]
+    cnn.reshape([-1, flat_dim])
+    cnn.affine(384, stddev=0.04, bias=0.1)
+    cnn.affine(192, stddev=0.04, bias=0.1)
+  def get_learning_rate(self, global_step, batch_size):
+    num_examples_per_epoch = 50000
+    num_epochs_per_decay = 100
+    decay_steps = (
+        num_epochs_per_decay * num_examples_per_epoch // batch_size)
+    decay_factor = 0.1
+    return tf.train.exponential_decay(
+        self.learning_rate,
+        global_step,
+        decay_steps,
+        decay_factor,
+        staircase=True)
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/densenet_model.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/densenet_model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Densenet model configuration.
+References:
+  "Densely Connected Convolutional Networks": https://arxiv.org/pdf/1608.06993
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow.compat.v1 as tf
+from models import model as model_lib
+class DensenetCifar10Model(model_lib.CNNModel):
+  """Densenet cnn network configuration."""
+  def __init__(self, model, layer_counts, growth_rate, params=None):
+    self.growth_rate = growth_rate
+    super(DensenetCifar10Model, self).__init__(
+        model, 32, 64, 0.1, layer_counts=layer_counts, params=params)
+    self.batch_norm_config = {'decay': 0.9, 'epsilon': 1e-5, 'scale': True}
+  def dense_block(self, cnn, growth_rate):
+    input_layer = cnn.top_layer
+    c = cnn.batch_norm(input_layer, **self.batch_norm_config)
+    c = tf.nn.relu(c)
+    c = cnn.conv(growth_rate, 3, 3, 1, 1, stddev=np.sqrt(2.0/9/growth_rate),
+                 activation=None, input_layer=c)
+    channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
+    cnn.top_layer = tf.concat([input_layer, c], channel_index)
+    cnn.top_size += growth_rate
+  def transition_layer(self, cnn):
+    in_size = cnn.top_size
+    cnn.batch_norm(**self.batch_norm_config)
+    cnn.top_layer = tf.nn.relu(cnn.top_layer)
+    cnn.conv(in_size, 1, 1, 1, 1, stddev=np.sqrt(2.0/9/in_size))
+    cnn.apool(2, 2, 2, 2)
+  def add_inference(self, cnn):
+    if self.layer_counts is None:
+      raise ValueError('Layer counts not specified for %s' % self.get_model())
+    if self.growth_rate is None:
+      raise ValueError('Growth rate not specified for %s' % self.get_model())
+    cnn.conv(16, 3, 3, 1, 1, activation=None)
+    # Block 1
+    for _ in xrange(self.layer_counts[0]):
+      self.dense_block(cnn, self.growth_rate)
+    self.transition_layer(cnn)
+    # Block 2
+    for _ in xrange(self.layer_counts[1]):
+      self.dense_block(cnn, self.growth_rate)
+    self.transition_layer(cnn)
+    # Block 3
+    for _ in xrange(self.layer_counts[2]):
+      self.dense_block(cnn, self.growth_rate)
+    cnn.batch_norm(**self.batch_norm_config)
+    cnn.top_layer = tf.nn.relu(cnn.top_layer)
+    channel_index = 3 if cnn.channel_pos == 'channels_last' else 1
+    cnn.top_size = cnn.top_layer.get_shape().as_list()[channel_index]
+    cnn.spatial_mean()
+  def get_learning_rate(self, global_step, batch_size):
+    num_batches_per_epoch = 50000 // batch_size
+    boundaries = num_batches_per_epoch * np.array([150, 225, 300],
+                                                  dtype=np.int64)
+    boundaries = [x for x in boundaries]
+    values = [0.1, 0.01, 0.001, 0.0001]
+    return tf.train.piecewise_constant(global_step, boundaries, values)
+def create_densenet40_k12_model():
+  return DensenetCifar10Model('densenet40_k12', (12, 12, 12), 12)
+def create_densenet100_k12_model():
+  return DensenetCifar10Model('densenet100_k12', (32, 32, 32), 12)
+def create_densenet100_k24_model():
+  return DensenetCifar10Model('densenet100_k24', (32, 32, 32), 24)
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/__init__.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/__init__.py
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/deepspeech.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""DeepSpeech2 model configuration.
+References:
+  https://arxiv.org/abs/1512.02595
+  Deep Speech 2: End-to-End Speech Recognition in English and Mandarin
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import itertools
+import numpy as np
+from six.moves import xrange  # pylint: disable=redefined-builtin
+import tensorflow.compat.v1 as tf
+import constants
+from cnn_util import log_fn
+from models import model as model_lib
+from tensorflow.python.ops import variables  # pylint: disable=g-direct-tensorflow-import
+class DeepSpeechDecoder(object):
+  """Greedy decoder implementation for Deep Speech model."""
+  def __init__(self, labels, blank_index=28):
+    """Decoder initialization.
+    Args:
+      labels: a string specifying the speech labels for the decoder to use.
+      blank_index: an integer specifying index for the blank character. Defaults
+        to 28.
+    """
+    self.labels = labels
+    self.blank_index = blank_index
+    self.int_to_char = dict([(i, c) for (i, c) in enumerate(labels)])
+  def convert_to_string(self, sequence):
+    """Convert a sequence of indexes into corresponding string."""
+    return ''.join([self.int_to_char[i] for i in sequence])
+  def wer(self, decode, target):
+    """Computes the Word Error Rate (WER).
+    WER is defined as the edit distance between the two provided sentences after
+    tokenizing to words.
+    Args:
+      decode: string of the decoded output.
+      target: a string for the ground truth label.
+    Returns:
+      A float number for the WER of the current decode-target pair.
+    """
+    try:
+      from nltk.metrics import distance  # pylint: disable=g-import-not-at-top
+    except ImportError as e:
+      if 'nltk.metrics' not in e.message:
+        raise
+      raise ImportError('To use the experimental deepspeech model, you must '
+                        'pip install -U nltk')
+    # Map each word to a new char.
+    words = set(decode.split() + target.split())
+    word2char = dict(zip(words, range(len(words))))
+    new_decode = [chr(word2char[w]) for w in decode.split()]
+    new_target = [chr(word2char[w]) for w in target.split()]
+    return distance.edit_distance(''.join(new_decode), ''.join(new_target))
+  def cer(self, decode, target):
+    """Computes the Character Error Rate (CER).
+    CER is defined as the edit distance between the two given strings.
+    Args:
+      decode: a string of the decoded output.
+      target: a string for the ground truth label.
+    Returns:
+      A float number denoting the CER for the current sentence pair.
+    """
+    try:
+      from nltk.metrics import distance  # pylint: disable=g-import-not-at-top
+    except ImportError as e:
+      if 'nltk.metrics' not in e.message:
+        raise
+      raise ImportError('To use the experimental deepspeech model, you must '
+                        'pip install -U nltk')
+    return distance.edit_distance(decode, target)
+  def decode(self, char_indexes):
+    """Decode the best guess from logits using greedy algorithm."""
+    # Merge repeated chars.
+    merge = [k for k, _ in itertools.groupby(char_indexes)]
+    # Remove the blank index in the decoded sequence.
+    merge_remove_blank = []
+    for k in merge:
+      if k != self.blank_index:
+        merge_remove_blank.append(k)
+    return self.convert_to_string(merge_remove_blank)
+  def decode_logits(self, logits):
+    """Decode the best guess from logits using greedy algorithm."""
+    # Choose the class with maximimum probability.
+    best = list(np.argmax(logits, axis=1))
+    return self.decode(best)
+class DeepSpeech2Model(model_lib.Model):
+  """Define DeepSpeech2 model."""
+  # Supported rnn cells.
+  SUPPORTED_RNNS = {
+      'lstm': tf.nn.rnn_cell.BasicLSTMCell,
+      'rnn': tf.nn.rnn_cell.RNNCell,
+      'gru': tf.nn.rnn_cell.GRUCell,
+  }
+  # Parameters for batch normalization.
+  BATCH_NORM_EPSILON = 1e-5
+  BATCH_NORM_DECAY = 0.997
+  # Filters of convolution layer
+  CONV_FILTERS = 32
+  def __init__(self,
+               num_rnn_layers=5,
+               rnn_type='lstm',
+               is_bidirectional=True,
+               rnn_hidden_size=800,
+               use_bias=True,
+               params=None):
+    """Initialize DeepSpeech2 model.
+    Args:
+      num_rnn_layers: an integer, the number of rnn layers (default: 5).
+      rnn_type: a string, one of the supported rnn cells: gru, rnn or lstm.
+      is_bidirectional: a boolean to indicate if the rnn layer is bidirectional.
+      rnn_hidden_size: an integer for the number of hidden units in the RNN
+        cell.
+      use_bias: a boolean specifying whether to use a bias in the last fc layer.
+      params: the params from BenchmarkCNN.
+    """
+    super(DeepSpeech2Model, self).__init__(
+        'deepspeech2',
+        batch_size=128,
+        learning_rate=0.0005,
+        fp16_loss_scale=128,
+        params=params)
+    self.num_rnn_layers = num_rnn_layers
+    self.rnn_type = rnn_type
+    self.is_bidirectional = is_bidirectional
+    self.rnn_hidden_size = rnn_hidden_size
+    self.use_bias = use_bias
+    self.num_feature_bins = 161
+    self.max_time_steps = 3494
+    self.max_label_length = 576
+  def _batch_norm(self, inputs, training):
+    """Batch normalization layer.
+    Note that the momentum to use will affect validation accuracy over time.
+    Batch norm has different behaviors during training/evaluation. With a large
+    momentum, the model takes longer to get a near-accurate estimation of the
+    moving mean/variance over the entire training dataset, which means we need
+    more iterations to see good evaluation results. If the training data is
+    evenly distributed over the feature space, we can also try setting a smaller
+    momentum (such as 0.1) to get good evaluation result sooner.
+    Args:
+      inputs: input data for batch norm layer.
+      training: a boolean to indicate if it is in training stage.
+    Returns:
+      tensor output from batch norm layer.
+    """
+    return tf.layers.batch_normalization(
+        inputs=inputs,
+        momentum=DeepSpeech2Model.BATCH_NORM_DECAY,
+        epsilon=DeepSpeech2Model.BATCH_NORM_EPSILON,
+        fused=True,
+        training=training)
+  def _conv_bn_layer(self, inputs, padding, filters, kernel_size, strides,
+                     layer_id, training):
+    """Defines 2D convolutional + batch normalization layer.
+    Args:
+      inputs: input data for convolution layer.
+      padding: padding to be applied before convolution layer.
+      filters: an integer, number of output filters in the convolution.
+      kernel_size: a tuple specifying the height and width of the 2D convolution
+        window.
+      strides: a tuple specifying the stride length of the convolution.
+      layer_id: an integer specifying the layer index.
+      training: a boolean to indicate which stage we are in (training/eval).
+    Returns:
+      tensor output from the current layer.
+    """
+    # Perform symmetric padding on the feature dimension of time_step
+    # This step is required to avoid issues when RNN output sequence is shorter
+    # than the label length.
+    inputs = tf.pad(
+        inputs,
+        [[0, 0], [padding[0], padding[0]], [padding[1], padding[1]], [0, 0]])
+    inputs = tf.layers.conv2d(
+        inputs=inputs,
+        filters=filters,
+        kernel_size=kernel_size,
+        strides=strides,
+        padding='valid',
+        use_bias=False,
+        activation=tf.nn.relu6,
+        name='cnn_{}'.format(layer_id))
+    return self._batch_norm(inputs, training)
+  def _rnn_layer(self, inputs, rnn_cell, rnn_hidden_size, layer_id,
+                 use_batch_norm, is_bidirectional, training):
+    """Defines a batch normalization + rnn layer.
+    Args:
+      inputs: input tensors for the current layer.
+      rnn_cell: RNN cell instance to use.
+      rnn_hidden_size: an integer for the dimensionality of the rnn output
+        space.
+      layer_id: an integer for the index of current layer.
+      use_batch_norm: a boolean specifying whether to perform batch
+        normalization on input states.
+      is_bidirectional: a boolean specifying whether the rnn layer is
+        bi-directional.
+      training: a boolean to indicate which stage we are in (training/eval).
+    Returns:
+      tensor output for the current layer.
+    """
+    if use_batch_norm:
+      inputs = self._batch_norm(inputs, training)
+    # Construct forward/backward RNN cells.
+    fw_cell = rnn_cell(
+        num_units=rnn_hidden_size, name='rnn_fw_{}'.format(layer_id))
+    if is_bidirectional:
+      bw_cell = rnn_cell(
+          num_units=rnn_hidden_size, name='rnn_bw_{}'.format(layer_id))
+      outputs, _ = tf.nn.bidirectional_dynamic_rnn(
+          cell_fw=fw_cell,
+          cell_bw=bw_cell,
+          inputs=inputs,
+          dtype=tf.float32,
+          swap_memory=True)
+      rnn_outputs = tf.concat(outputs, -1)
+    else:
+      rnn_outputs = tf.nn.dynamic_rnn(
+          fw_cell, inputs, dtype=tf.float32, swap_memory=True)
+    return rnn_outputs
+  def get_input_data_types(self, subset):
+    """Returns the list of data types of the inputs."""
+    del subset  # Same data types for both train and validation subsets.
+    return [self.data_type, tf.int32, tf.int32, tf.int32]
+  def get_input_shapes(self, subset):
+    """Returns the list of shapes of the padded inputs."""
+    del subset  # Same shapes for both train and validation subsets
+    return [
+        [self.batch_size, self.max_time_steps, self.num_feature_bins, 1],
+        [self.batch_size, self.max_label_length],
+        [self.batch_size, 1],
+        [self.batch_size, 1],
+    ]
+  def get_synthetic_inputs(self, input_name, nclass):
+    inputs = tf.random_uniform(self.get_input_shapes('train')[0],
+                               dtype=self.get_input_data_types('train')[0])
+    inputs = variables.VariableV1(inputs, trainable=False,
+                                  collections=[tf.GraphKeys.LOCAL_VARIABLES],
+                                  name=input_name)
+    labels = tf.convert_to_tensor(
+        np.random.randint(28, size=[self.batch_size, self.max_label_length]))
+    input_lengths = tf.convert_to_tensor(
+        [self.max_time_steps] * self.batch_size)
+    label_lengths = tf.convert_to_tensor(
+        [self.max_label_length] * self.batch_size)
+    return [inputs, labels, input_lengths, label_lengths]
+  # TODO(laigd): support fp16.
+  # TODO(laigd): support multiple gpus.
+  def build_network(self, inputs, phase_train=True, nclass=29):
+    """Builds the forward pass of the deepspeech2 model.
+    Args:
+      inputs: The input list of the model.
+      phase_train: True during training. False during evaluation.
+      nclass: Number of classes that the input spectrogram can belong to.
+    Returns:
+      A BuildNetworkResult which contains the logits and model-specific extra
+        information.
+    """
+    inputs = inputs[0]  # Get the spectrogram feature.
+    # Two cnn layers.
+    inputs = self._conv_bn_layer(
+        inputs,
+        padding=(20, 5),
+        filters=DeepSpeech2Model.CONV_FILTERS,
+        kernel_size=(41, 11),
+        strides=(2, 2),
+        layer_id=1,
+        training=phase_train)
+    inputs = self._conv_bn_layer(
+        inputs,
+        padding=(10, 5),
+        filters=DeepSpeech2Model.CONV_FILTERS,
+        kernel_size=(21, 11),
+        strides=(2, 1),
+        layer_id=2,
+        training=phase_train)
+    # output of conv_layer2 with the shape of
+    # [batch_size (N), times (T), features (F), channels (C)].
+    # Convert the conv output to rnn input.
+    # batch_size = tf.shape(inputs)[0]
+    feat_size = inputs.get_shape().as_list()[2]
+    inputs = tf.reshape(
+        inputs,
+        [self.batch_size, -1, feat_size * DeepSpeech2Model.CONV_FILTERS])
+    # RNN layers.
+    rnn_cell = DeepSpeech2Model.SUPPORTED_RNNS[self.rnn_type]
+    for layer_counter in xrange(self.num_rnn_layers):
+      # No batch normalization on the first layer.
+      use_batch_norm = (layer_counter != 0)
+      inputs = self._rnn_layer(inputs, rnn_cell, self.rnn_hidden_size,
+                               layer_counter + 1, use_batch_norm,
+                               self.is_bidirectional, phase_train)
+    # FC layer with batch norm.
+    inputs = self._batch_norm(inputs, phase_train)
+    logits = tf.layers.dense(inputs, nclass, use_bias=self.use_bias)
+    return model_lib.BuildNetworkResult(logits=logits, extra_info=None)
+  def loss_function(self, inputs, build_network_result):
+    """Computes the ctc loss for the current batch of predictions.
+    Args:
+      inputs: the input list of the model.
+      build_network_result: a BuildNetworkResult returned by build_network().
+    Returns:
+      The loss tensor of the model.
+    """
+    logits = build_network_result.logits
+    actual_time_steps = inputs[2]
+    probs = tf.nn.softmax(logits)
+    ctc_time_steps = tf.shape(probs)[1]
+    ctc_input_length = tf.to_float(
+        tf.multiply(actual_time_steps, ctc_time_steps))
+    ctc_input_length = tf.to_int32(
+        tf.floordiv(ctc_input_length, tf.to_float(self.max_time_steps)))
+    label_length = inputs[3]
+    label_length = tf.to_int32(tf.squeeze(label_length))
+    ctc_input_length = tf.to_int32(tf.squeeze(ctc_input_length))
+    labels = inputs[1]
+    sparse_labels = tf.to_int32(
+        tf.keras.backend.ctc_label_dense_to_sparse(labels, label_length))
+    y_pred = tf.log(
+        tf.transpose(probs, perm=[1, 0, 2]) + tf.keras.backend.epsilon())
+    losses = tf.expand_dims(
+        tf.nn.ctc_loss(
+            labels=sparse_labels,
+            inputs=y_pred,
+            sequence_length=ctc_input_length,
+            ignore_longer_outputs_than_inputs=True),
+        axis=1)
+    loss = tf.reduce_mean(losses)
+    return loss
+  PROBABILITY_TENSOR = 'deepspeech2_prob'
+  LABEL_TENSOR = 'deepspeech2_label'
+  def accuracy_function(self, inputs, logits):
+    """Returns the ops to evaluate the model performance."""
+    # Get probabilities of each predicted class
+    probs = tf.nn.softmax(logits)
+    assert probs.shape.as_list()[0] == self.batch_size
+    return {
+        (constants.UNREDUCED_ACCURACY_OP_PREFIX + self.PROBABILITY_TENSOR):
+            probs,
+        (constants.UNREDUCED_ACCURACY_OP_PREFIX + self.LABEL_TENSOR):
+            inputs[1],
+    }
+  def postprocess(self, results):
+    """Postprocess results returned from model in Python."""
+    probs = results[self.PROBABILITY_TENSOR]
+    total_wer, total_cer = 0, 0
+    speech_labels = " abcdefghijklmnopqrstuvwxyz'-"
+    greedy_decoder = DeepSpeechDecoder(speech_labels)
+    # Evaluate the performance using WER (Word Error Rate) and CER (Character
+    # Error Rate) as metrics.
+    targets = results[self.LABEL_TENSOR]  # The ground truth transcript
+    for i in range(self.batch_size):
+      # Decode string.
+      predicted_str = greedy_decoder.decode_logits(probs[i])
+      expected_str = greedy_decoder.decode(targets[i])
+      # Compute CER.
+      total_cer += (greedy_decoder.cer(predicted_str, expected_str) /
+                    len(expected_str))
+      # Compute WER.
+      total_wer += (greedy_decoder.wer(predicted_str, expected_str) /
+                    len(expected_str.split()))
+    # Get mean value
+    total_cer /= self.batch_size
+    total_wer /= self.batch_size
+    log_fn('total CER: {:f}; total WER: {:f}; total example: {:d}.'.format(
+        total_cer, total_wer, self.batch_size))
+    # TODO(laigd): get rid of top_N_accuracy bindings in benchmark_cnn.py
+    return {'top_1_accuracy': 0., 'top_5_accuracy': 0.}
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/experimental/official_ncf_model.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Wrap the official recommendation model in a tf_cnn_benchmarks Model.
+This allows the recommendation NCF model to be used in tf_cnn_benchmarks.
+Currently, the implementation is fairly hacky, because tf_cnn_benchmarks is
+intended to be used only with CNNs.
+Only synthetic data with 1 GPU is currently supported.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import tensorflow.compat.v1 as tf
+from models import model
+# Obtained by running the official NCF model with the following command:
+#     python ncf_main.py  --dataset ml-20m
+# and printing the number of users and items here:
+# https://github.com/tensorflow/models/blob/d089975f630a8a01be63e45ef08a31be14bb96b4/official/recommendation/data_preprocessing.py#L68
+_NUM_USERS_20M = 138493
+_NUM_ITEMS_20M = 26744
+# TODO(reedwm): Support multi-GPU. Currently keras layers, which this model
+# uses, ignore variable_scopes, which we rely on for multi-GPU support.
+# TODO(reedwm): Support real data. This will require a significant refactor.
+# TODO(reedwm): All-reduce IndexedSlices more effectively.
+# TODO(reedwm): Support the 1M variant of this model.
+class NcfModel(model.Model):
+  r"""A model.Model wrapper around the official NCF recommendation model.
+  To do an NCF run with synthetic data that roughly matches what the official
+  model does, run:
+  python tf_cnn_benchmarks.py --optimizer=adam --model=ncf --batch_size=65536 \
+      --weight_decay=0 --sparse_to_dense_grads
+  """
+  def __init__(self, params=None):
+    super(NcfModel, self).__init__(
+        'official_ncf', batch_size=2048, learning_rate=0.0005,
+        fp16_loss_scale=128, params=params)
+    if self.fp16_vars:
+      raise ValueError('NCF model only supports float32 variables for now.')
+  def build_network(self, inputs, phase_train=True, nclass=1001):
+    try:
+      from official.recommendation import neumf_model  # pylint: disable=g-import-not-at-top
+    except ImportError as e:
+      if 'neumf_model' not in e.message:
+        raise
+      raise ImportError('To use the experimental NCF model, you must clone the '
+                        'repo https://github.com/tensorflow/models and add '
+                        'tensorflow/models to the PYTHONPATH.')
+    del nclass
+    users, items, _ = inputs
+    params = {
+        'num_users': _NUM_USERS_20M,
+        'num_items': _NUM_ITEMS_20M,
+        'model_layers': (256, 256, 128, 64),
+        'mf_dim': 64,
+        'mf_regularization': 0,
+        'mlp_reg_layers': (0, 0, 0, 0),
+        'use_tpu': False
+    }
+    user_input = tf.keras.layers.Input(tensor=users, name='user_input')
+    item_input = tf.keras.layers.Input(tensor=items, name='item_input')
+    if self.data_type == tf.float32:
+      keras_model = neumf_model.construct_model(user_input, item_input, params)
+      logits = keras_model.output
+    else:
+      assert self.data_type == tf.float16
+      old_floatx = tf.keras.backend.floatx()
+      try:
+        tf.keras.backend.set_floatx('float16')
+        # We cannot rely on the variable_scope's fp16 custom getter here,
+        # because the NCF model uses keras layers, which ignore variable scopes.
+        # So we use a variable_creator_scope instead.
+        with tf.variable_creator_scope(_fp16_variable_creator):
+          keras_model = neumf_model.construct_model(user_input, item_input,
+                                                    params)
+        logits = tf.cast(keras_model.output, tf.float32)
+      finally:
+        tf.keras.backend.set_floatx(old_floatx)
+    return model.BuildNetworkResult(logits=logits, extra_info=None)
+  def loss_function(self, inputs, build_network_result):
+    logits = build_network_result.logits
+    # Softmax with the first column of ones is equivalent to sigmoid.
+    # TODO(reedwm): Actually, the first column should be zeros to be equivalent
+    # to sigmoid. But, we keep it at ones to match the official models.
+    logits = tf.concat([tf.ones(logits.shape, dtype=logits.dtype), logits],
+                       axis=1)
+    return tf.losses.sparse_softmax_cross_entropy(
+        labels=inputs[2],
+        logits=logits
+    )
+  def get_synthetic_inputs(self, input_name, nclass):
+    """Returns the ops to generate synthetic inputs and labels."""
+    def users_init_val():
+      return tf.random_uniform((self.batch_size, 1), minval=0,
+                               maxval=_NUM_USERS_20M, dtype=tf.int32)
+    users = tf.Variable(users_init_val, dtype=tf.int32, trainable=False,
+                        collections=[tf.GraphKeys.LOCAL_VARIABLES],
+                        name='synthetic_users')
+    def items_init_val():
+      return tf.random_uniform((self.batch_size, 1), minval=0,
+                               maxval=_NUM_ITEMS_20M, dtype=tf.int32)
+    items = tf.Variable(items_init_val, dtype=tf.int32, trainable=False,
+                        collections=[tf.GraphKeys.LOCAL_VARIABLES],
+                        name='synthetic_items')
+    def labels_init_val():
+      return tf.random_uniform((self.batch_size,), minval=0, maxval=2,
+                               dtype=tf.int32)
+    labels = tf.Variable(labels_init_val, dtype=tf.int32, trainable=False,
+                         collections=[tf.GraphKeys.LOCAL_VARIABLES],
+                         name='synthetic_labels')
+    return [users, items, labels]
+  def get_input_shapes(self, subset):
+    del subset
+    return [[self.batch_size, 1], [self.batch_size, 1], [self.batch_size]]
+  def get_input_data_types(self, subset):
+    del subset
+    return [self.int32, tf.int32, tf.int32]
+def _fp16_variable_creator(next_creator, **kwargs):
+  """Variable creator to create variables in fp32 and cast them to fp16."""
+  dtype = kwargs.get('dtype', None)
+  initial_value = kwargs.get('initial_value', None)
+  if dtype is None:
+    if initial_value is not None and not callable(initial_value):
+      dtype = initial_value.dtype
+  if dtype == tf.float16:
+    if callable(initial_value):
+      new_initial_value = lambda: tf.cast(initial_value(), tf.float32)
+    else:
+      new_initial_value = tf.cast(initial_value, tf.float32)
+    kwargs['dtype'] = tf.float32
+    kwargs['initial_value'] = new_initial_value
+    var = next_creator(**kwargs)
+    return tf.cast(var, dtype=tf.float16)
+  else:
+    return next_creator(**kwargs)
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/googlenet_model.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/googlenet_model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Googlenet model configuration.
+References:
+  Szegedy, Christian, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
+  Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, and Andrew Rabinovich
+  Going deeper with convolutions
+  arXiv preprint arXiv:1409.4842 (2014)
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from models import model
+class GooglenetModel(model.CNNModel):
+  """GoogLeNet."""
+  def __init__(self, params=None):
+    super(GooglenetModel, self).__init__(
+        'googlenet', 224, 32, 0.005, params=params)
+  def add_inference(self, cnn):
+    def inception_v1(cnn, k, l, m, n, p, q):
+      cols = [[('conv', k, 1, 1)], [('conv', l, 1, 1), ('conv', m, 3, 3)],
+              [('conv', n, 1, 1), ('conv', p, 5, 5)],
+              [('mpool', 3, 3, 1, 1, 'SAME'), ('conv', q, 1, 1)]]
+      cnn.inception_module('incept_v1', cols)
+    cnn.conv(64, 7, 7, 2, 2)
+    cnn.mpool(3, 3, 2, 2, mode='SAME')
+    cnn.conv(64, 1, 1)
+    cnn.conv(192, 3, 3)
+    cnn.mpool(3, 3, 2, 2, mode='SAME')
+    inception_v1(cnn, 64, 96, 128, 16, 32, 32)
+    inception_v1(cnn, 128, 128, 192, 32, 96, 64)
+    cnn.mpool(3, 3, 2, 2, mode='SAME')
+    inception_v1(cnn, 192, 96, 208, 16, 48, 64)
+    inception_v1(cnn, 160, 112, 224, 24, 64, 64)
+    inception_v1(cnn, 128, 128, 256, 24, 64, 64)
+    inception_v1(cnn, 112, 144, 288, 32, 64, 64)
+    inception_v1(cnn, 256, 160, 320, 32, 128, 128)
+    cnn.mpool(3, 3, 2, 2, mode='SAME')
+    inception_v1(cnn, 256, 160, 320, 32, 128, 128)
+    inception_v1(cnn, 384, 192, 384, 48, 128, 128)
+    cnn.apool(7, 7, 1, 1, mode='VALID')
+    cnn.reshape([-1, 1024])
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/inception_model.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/inception_model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Inception model configuration.
+Includes multiple models: inception3, inception4, inception-resnet2.
+References:
+  Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi
+  Inception-v4, Inception-ResNet and the Impact of Residual Connections on
+  Learning
+  Christian Szegedy, Wei Liu, Yangqing Jia, Pierre Sermanet, Scott Reed,
+  Dragomir Anguelov, Dumitru Erhan, Vincent Vanhoucke, Andrew Rabinovich
+  Going Deeper with Convolutions
+  http://arxiv.org/pdf/1409.4842v1.pdf
+  Christian Szegedy, Vincent Vanhoucke, Sergey Ioffe, Jonathon Shlens,
+  Zbigniew Wojna
+  Rethinking the Inception Architecture for Computer Vision
+  arXiv preprint arXiv:1512.00567 (2015)
+  Inception v3 model: http://arxiv.org/abs/1512.00567
+  Inception v4 and Resnet V2 architectures: http://arxiv.org/abs/1602.07261
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from six.moves import xrange  # pylint: disable=redefined-builtin
+from models import model
+class Inceptionv3Model(model.CNNModel):
+  """InceptionV3."""
+  def __init__(self, auxiliary=False, params=None):
+    self._auxiliary = auxiliary
+    super(Inceptionv3Model, self).__init__(
+        'inception3', 299, 32, 0.005, params=params)
+  def add_inference(self, cnn):
+    def inception_v3_a(cnn, n):
+      cols = [[('conv', 64, 1, 1)], [('conv', 48, 1, 1), ('conv', 64, 5, 5)],
+              [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)],
+              [('apool', 3, 3, 1, 1, 'SAME'), ('conv', n, 1, 1)]]
+      cnn.inception_module('incept_v3_a', cols)
+    def inception_v3_b(cnn):
+      cols = [[('conv', 384, 3, 3, 2, 2, 'VALID')],
+              [('conv', 64, 1, 1),
+               ('conv', 96, 3, 3),
+               ('conv', 96, 3, 3, 2, 2, 'VALID')],
+              [('mpool', 3, 3, 2, 2, 'VALID')]]
+      cnn.inception_module('incept_v3_b', cols)
+    def inception_v3_c(cnn, n):
+      cols = [[('conv', 192, 1, 1)],
+              [('conv', n, 1, 1), ('conv', n, 1, 7), ('conv', 192, 7, 1)],
+              [('conv', n, 1, 1), ('conv', n, 7, 1), ('conv', n, 1, 7),
+               ('conv', n, 7, 1), ('conv', 192, 1, 7)],
+              [('apool', 3, 3, 1, 1, 'SAME'), ('conv', 192, 1, 1)]]
+      cnn.inception_module('incept_v3_c', cols)
+    def inception_v3_d(cnn):
+      cols = [[('conv', 192, 1, 1), ('conv', 320, 3, 3, 2, 2, 'VALID')],
+              [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 192, 7, 1),
+               ('conv', 192, 3, 3, 2, 2, 'VALID')],
+              [('mpool', 3, 3, 2, 2, 'VALID')]]
+      cnn.inception_module('incept_v3_d', cols)
+    def inception_v3_e(cnn, pooltype):
+      cols = [[('conv', 320, 1, 1)], [('conv', 384, 1, 1), ('conv', 384, 1, 3)],
+              [('share',), ('conv', 384, 3, 1)],
+              [('conv', 448, 1, 1), ('conv', 384, 3, 3), ('conv', 384, 1, 3)],
+              [('share',), ('share',), ('conv', 384, 3, 1)],
+              [('mpool' if pooltype == 'max' else 'apool', 3, 3, 1, 1, 'SAME'),
+               ('conv', 192, 1, 1)]]
+      cnn.inception_module('incept_v3_e', cols)
+    def incept_v3_aux(cnn):
+      assert cnn.aux_top_layer is None
+      cnn.aux_top_layer = cnn.top_layer
+      cnn.aux_top_size = cnn.top_size
+      with cnn.switch_to_aux_top_layer():
+        cnn.apool(5, 5, 3, 3, mode='VALID')
+        cnn.conv(128, 1, 1, mode='SAME')
+        cnn.conv(768, 5, 5, mode='VALID', stddev=0.01)
+        cnn.reshape([-1, 768])
+    cnn.use_batch_norm = True
+    cnn.conv(32, 3, 3, 2, 2, mode='VALID')   # 299 x 299 x 3
+    cnn.conv(32, 3, 3, 1, 1, mode='VALID')   # 149 x 149 x 32
+    cnn.conv(64, 3, 3, 1, 1, mode='SAME')    # 147 x 147 x 64
+    cnn.mpool(3, 3, 2, 2, mode='VALID')      # 147 x 147 x 64
+    cnn.conv(80, 1, 1, 1, 1, mode='VALID')   # 73 x 73 x 80
+    cnn.conv(192, 3, 3, 1, 1, mode='VALID')  # 71 x 71 x 192
+    cnn.mpool(3, 3, 2, 2, 'VALID')           # 35 x 35 x 192
+    inception_v3_a(cnn, 32)                  # 35 x 35 x 256 mixed.
+    inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_1.
+    inception_v3_a(cnn, 64)                  # 35 x 35 x 288 mixed_2
+    inception_v3_b(cnn)                      # 17 x 17 x 768 mixed_3
+    inception_v3_c(cnn, 128)                 # 17 x 17 x 768 mixed_4
+    inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_5
+    inception_v3_c(cnn, 160)                 # 17 x 17 x 768 mixed_6
+    inception_v3_c(cnn, 192)                 # 17 x 17 x 768 mixed_7
+    if self._auxiliary:
+      incept_v3_aux(cnn)                     # Auxillary Head logits
+    inception_v3_d(cnn)                      # 17 x 17 x 1280 mixed_8
+    inception_v3_e(cnn, 'avg')               # 8 x 8 x 2048 mixed_9
+    inception_v3_e(cnn, 'max')               # 8 x 8 x 2048 mixed_10
+    cnn.apool(8, 8, 1, 1, 'VALID')           # 8 x 8 x 2048
+    cnn.reshape([-1, 2048])                  # 1 x 1 x 2048
+# Stem functions
+def inception_v4_sa(cnn):
+  cols = [[('mpool', 3, 3, 2, 2, 'VALID')], [('conv', 96, 3, 3, 2, 2, 'VALID')]]
+  cnn.inception_module('incept_v4_sa', cols)
+def inception_v4_sb(cnn):
+  cols = [[('conv', 64, 1, 1), ('conv', 96, 3, 3, 1, 1, 'VALID')],
+          [('conv', 64, 1, 1), ('conv', 64, 7, 1), ('conv', 64, 1, 7),
+           ('conv', 96, 3, 3, 1, 1, 'VALID')]]
+  cnn.inception_module('incept_v4_sb', cols)
+def inception_v4_sc(cnn):
+  cols = [[('conv', 192, 3, 3, 2, 2, 'VALID')],
+          [('mpool', 3, 3, 2, 2, 'VALID')]]
+  cnn.inception_module('incept_v4_sc', cols)
+# Reduction functions
+def inception_v4_ra(cnn, k, l, m, n):
+  cols = [
+      [('mpool', 3, 3, 2, 2, 'VALID')], [('conv', n, 3, 3, 2, 2, 'VALID')],
+      [('conv', k, 1, 1), ('conv', l, 3, 3), ('conv', m, 3, 3, 2, 2, 'VALID')]
+  ]
+  cnn.inception_module('incept_v4_ra', cols)
+def inception_v4_rb(cnn):
+  cols = [[('mpool', 3, 3, 2, 2, 'VALID')],
+          [('conv', 192, 1, 1), ('conv', 192, 3, 3, 2, 2, 'VALID')],
+          [('conv', 256, 1, 1), ('conv', 256, 1, 7), ('conv', 320, 7, 1),
+           ('conv', 320, 3, 3, 2, 2, 'VALID')]]
+  cnn.inception_module('incept_v4_rb', cols)
+class Inceptionv4Model(model.CNNModel):
+  """Inceptionv4."""
+  def __init__(self, params=None):
+    super(Inceptionv4Model, self).__init__(
+        'inception4', 299, 32, 0.005, params=params)
+  def add_inference(self, cnn):
+    def inception_v4_a(cnn):
+      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 96, 1, 1)],
+              [('conv', 96, 1, 1)], [('conv', 64, 1, 1), ('conv', 96, 3, 3)],
+              [('conv', 64, 1, 1), ('conv', 96, 3, 3), ('conv', 96, 3, 3)]]
+      cnn.inception_module('incept_v4_a', cols)
+    def inception_v4_b(cnn):
+      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 128, 1, 1)],
+              [('conv', 384, 1, 1)],
+              [('conv', 192, 1, 1), ('conv', 224, 1, 7), ('conv', 256, 7, 1)],
+              [('conv', 192, 1, 1), ('conv', 192, 1, 7), ('conv', 224, 7, 1),
+               ('conv', 224, 1, 7), ('conv', 256, 7, 1)]]
+      cnn.inception_module('incept_v4_b', cols)
+    def inception_v4_c(cnn):
+      cols = [[('apool', 3, 3, 1, 1, 'SAME'), ('conv', 256, 1, 1)],
+              [('conv', 256, 1, 1)], [('conv', 384, 1, 1), ('conv', 256, 1, 3)],
+              [('share',), ('conv', 256, 3, 1)],
+              [('conv', 384, 1, 1), ('conv', 448, 1, 3), ('conv', 512, 3, 1),
+               ('conv', 256, 3, 1)], [('share',), ('share',), ('share',),
+                                      ('conv', 256, 1, 3)]]
+      cnn.inception_module('incept_v4_c', cols)
+    cnn.use_batch_norm = True
+    cnn.conv(32, 3, 3, 2, 2, mode='VALID')
+    cnn.conv(32, 3, 3, 1, 1, mode='VALID')
+    cnn.conv(64, 3, 3)
+    inception_v4_sa(cnn)
+    inception_v4_sb(cnn)
+    inception_v4_sc(cnn)
+    for _ in xrange(4):
+      inception_v4_a(cnn)
+    inception_v4_ra(cnn, 192, 224, 256, 384)
+    for _ in xrange(7):
+      inception_v4_b(cnn)
+    inception_v4_rb(cnn)
+    for _ in xrange(3):
+      inception_v4_c(cnn)
+    cnn.spatial_mean()
+    cnn.dropout(0.8)
--- a/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
+++ b/TensorFlow/Accuracy_Validation/benchmarks-master/scripts/tf_cnn_benchmarks/models/lenet_model.py
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Lenet model configuration.
+References:
+  LeCun, Yann, Leon Bottou, Yoshua Bengio, and Patrick Haffner
+  Gradient-based learning applied to document recognition
+  Proceedings of the IEEE (1998)
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from models import model
+class Lenet5Model(model.CNNModel):
+  """Lenet5."""
+  def __init__(self, params=None):
+    super(Lenet5Model, self).__init__('lenet5', 28, 32, 0.005, params=params)
+  def add_inference(self, cnn):
+    # Note: This matches TF's MNIST tutorial model
+    cnn.conv(32, 5, 5)
+    cnn.mpool(2, 2)
+    cnn.conv(64, 5, 5)
+    cnn.mpool(2, 2)
+    cnn.reshape([-1, 64 * 7 * 7])
+    cnn.affine(512)