Internal change

PiperOrigin-RevId: 326286926

Internal change
PiperOrigin-RevId: 326286926
94561082 · Hongkun Yu · A. Unique TensorFlower · 4ad4a3cc · 94561082 · 94561082
Commit 94561082 authored Aug 12, 2020 by Hongkun Yu Committed by A. Unique TensorFlower Aug 12, 2020
20 changed files
--- a/official/benchmark/bert_benchmark.py
+++ b/official/benchmark/bert_benchmark.py
@@ -25,6 +25,7 @@ import os
 import time
 # pylint: disable=g-bad-import-order
 from absl import flags
 from absl.testing import flagsaver
 import tensorflow as tf

--- a/official/benchmark/bert_benchmark_utils.py
+++ b/official/benchmark/bert_benchmark_utils.py
@@ -21,6 +21,7 @@ from __future__ import print_function
 import time
 # pylint: disable=g-bad-import-order
 import numpy as np
 from absl import flags
 import tensorflow as tf

--- a/official/benchmark/bert_pretrain_benchmark.py
+++ b/official/benchmark/bert_pretrain_benchmark.py
@@ -141,8 +141,8 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
    # writing summary to gcs may fail and summaries are not needed for this
    # accuracy benchmark test.
    FLAGS.train_summary_interval = -1
-    self._run_and_report_benchmark(summary_path=summary_path,
+    self._run_and_report_benchmark(
-                                   report_accuracy=True)
+        summary_path=summary_path, report_accuracy=True)
  @owner_utils.Owner('tf-model-garden')
  def benchmark_perf_2x2_tpu_bf16_seq128_10k_steps(self):
@@ -220,8 +220,8 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    # Disable accuracy check.
-    self._run_and_report_benchmark(summary_path=summary_path,
+    self._run_and_report_benchmark(
-                                   report_accuracy=False)
+        summary_path=summary_path, report_accuracy=False)
 if __name__ == '__main__':

--- a/official/benchmark/bert_squad_benchmark.py
+++ b/official/benchmark/bert_squad_benchmark.py
@@ -23,6 +23,7 @@ import os
 import time
 # pylint: disable=g-bad-import-order
 from absl import flags
 from absl import logging
 from absl.testing import flagsaver
@@ -75,7 +76,7 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
    Args:
      ds_type: String, the distribution strategy type to be used. Can be
-      'mirrored', 'multi_worker_mirrored', 'tpu' and 'off'.
+        'mirrored', 'multi_worker_mirrored', 'tpu' and 'off'.
    Returns:
      A `tf.distribute.DistibutionStrategy` object.
@@ -123,8 +124,8 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
    if input_meta_data.get('version_2_with_negative', False):
      logging.error('In memory evaluation result for SQuAD v2 is not accurate')
-    eval_metrics = run_squad.eval_squad(strategy=strategy,
+    eval_metrics = run_squad.eval_squad(
-                                        input_meta_data=input_meta_data)
+        strategy=strategy, input_meta_data=input_meta_data)
    # Use F1 score as reported evaluation metric.
    self.eval_metrics = eval_metrics['final_f1']
@@ -152,9 +153,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
    FLAGS.steps_per_loop = 100
  @benchmark_wrappers.enable_runtime_flags
-  def _run_and_report_benchmark(self,
+  def _run_and_report_benchmark(self, run_eagerly=False, ds_type='mirrored'):
-                                run_eagerly=False,
-                                ds_type='mirrored'):
    """Runs the benchmark and reports various metrics."""
    if FLAGS.train_batch_size <= 4 or run_eagerly:
      FLAGS.input_meta_data_path = SQUAD_MEDIUM_INPUT_META_DATA_PATH
@@ -367,9 +366,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
    FLAGS.steps_per_loop = 100
  @benchmark_wrappers.enable_runtime_flags
-  def _run_and_report_benchmark(self,
+  def _run_and_report_benchmark(self, run_eagerly=False, ds_type='mirrored'):
-                                run_eagerly=False,
-                                ds_type='mirrored'):
    """Runs the benchmark and reports various metrics."""
    start_time_sec = time.time()
    self._train_squad(run_eagerly=run_eagerly, ds_type=ds_type)
@@ -464,13 +461,10 @@ class BertSquadMultiWorkerAccuracy(BertSquadBenchmarkBase):
    FLAGS.steps_per_loop = 100
  @benchmark_wrappers.enable_runtime_flags
-  def _run_and_report_benchmark(self,
+  def _run_and_report_benchmark(self, use_ds=True, run_eagerly=False):
-                                use_ds=True,
-                                run_eagerly=False):
    """Runs the benchmark and reports various metrics."""
    start_time_sec = time.time()
-    self._train_squad(run_eagerly=run_eagerly,
+    self._train_squad(run_eagerly=run_eagerly, ds_type='multi_worker_mirrored')
-                      ds_type='multi_worker_mirrored')
    self._evaluate_squad(ds_type='multi_worker_mirrored')
    wall_time_sec = time.time() - start_time_sec
@@ -538,17 +532,14 @@ class BertSquadMultiWorkerBenchmark(BertSquadBenchmarkBase):
    FLAGS.steps_per_loop = 100
  @benchmark_wrappers.enable_runtime_flags
-  def _run_and_report_benchmark(self,
+  def _run_and_report_benchmark(self, use_ds=True, run_eagerly=False):
-                                use_ds=True,
-                                run_eagerly=False):
    """Runs the benchmark and reports various metrics."""
    if FLAGS.train_batch_size <= 4 * 8:
      FLAGS.input_meta_data_path = SQUAD_LONG_INPUT_META_DATA_PATH
    else:
      FLAGS.input_meta_data_path = SQUAD_FULL_INPUT_META_DATA_PATH
    start_time_sec = time.time()
-    self._train_squad(run_eagerly=run_eagerly,
+    self._train_squad(run_eagerly=run_eagerly, ds_type='multi_worker_mirrored')
-                      ds_type='multi_worker_mirrored')
    wall_time_sec = time.time() - start_time_sec
    summary = self._read_training_summary_from_file()

--- a/official/benchmark/keras_benchmark.py
+++ b/official/benchmark/keras_benchmark.py
@@ -61,12 +61,16 @@ class KerasBenchmark(PerfZeroBenchmark):
    metrics = []
    if 'accuracy_top_1' in stats:
-      metrics.append({'name': 'accuracy_top_1',
+      metrics.append({
-                      'value': stats['accuracy_top_1'],
+          'name': 'accuracy_top_1',
-                      'min_value': top_1_min,
+          'value': stats['accuracy_top_1'],
-                      'max_value': top_1_max})
+          'min_value': top_1_min,
-      metrics.append({'name': 'top_1_train_accuracy',
+          'max_value': top_1_max
-                      'value': stats['training_accuracy_top_1']})
+      })
+      metrics.append({
+          'name': 'top_1_train_accuracy',
+          'value': stats['training_accuracy_top_1']
+      })
    if (warmup and 'step_timestamp_log' in stats and
        len(stats['step_timestamp_log']) > warmup):
@@ -77,12 +81,13 @@ class KerasBenchmark(PerfZeroBenchmark):
      num_examples = (
          total_batch_size * log_steps * (len(time_log) - warmup - 1))
      examples_per_sec = num_examples / elapsed
-      metrics.append({'name': 'exp_per_second',
+      metrics.append({'name': 'exp_per_second', 'value': examples_per_sec})
-                      'value': examples_per_sec})
    if 'avg_exp_per_second' in stats:
-      metrics.append({'name': 'avg_exp_per_second',
+      metrics.append({
-                      'value': stats['avg_exp_per_second']})
+          'name': 'avg_exp_per_second',
+          'value': stats['avg_exp_per_second']
+      })
    if start_time_sec and 'step_timestamp_log' in stats:
      time_log = stats['step_timestamp_log']

--- a/official/benchmark/keras_cifar_benchmark.py
+++ b/official/benchmark/keras_cifar_benchmark.py
@@ -19,6 +19,7 @@ from __future__ import print_function
 import os
 import time
 from absl import flags
 import tensorflow as tf  # pylint: disable=g-bad-import-order
@@ -30,7 +31,7 @@ MIN_TOP_1_ACCURACY = 0.929
 MAX_TOP_1_ACCURACY = 0.938
 FLAGS = flags.FLAGS
-CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
+os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME) = '/placer/prod/home/distbelief/cifar10-orig'
 class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
@@ -43,8 +44,8 @@ class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
      **kwargs: arbitrary named arguments. This is needed to make the
-                constructor forward compatible in case PerfZero provides more
+        constructor forward compatible in case PerfZero provides more named
-                named arguments before updating the constructor.
+        arguments before updating the constructor.
    """
    self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)

--- a/official/benchmark/models/cifar_preprocessing.py
+++ b/official/benchmark/models/cifar_preprocessing.py
@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 import os
 from absl import logging
 import tensorflow as tf
@@ -49,8 +50,8 @@ def parse_record(raw_record, is_training, dtype):
  This method converts the label to one hot to fit the loss function.
  Args:
-    raw_record: scalar Tensor tf.string containing a serialized
+    raw_record: scalar Tensor tf.string containing a serialized Example protocol
-      Example protocol buffer.
+      buffer.
    is_training: A boolean denoting whether the input is for training.
    dtype: Data type to use for input images.
@@ -83,8 +84,7 @@ def preprocess_image(image, is_training):
  """Preprocess a single image of layout [height, width, depth]."""
  if is_training:
    # Resize the image to add four extra pixels on each side.
-    image = tf.image.resize_with_crop_or_pad(
+    image = tf.image.resize_with_crop_or_pad(image, HEIGHT + 8, WIDTH + 8)
-        image, HEIGHT + 8, WIDTH + 8)
    # Randomly crop a [HEIGHT, WIDTH] section of the image.
    image = tf.image.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS])
@@ -155,5 +155,4 @@ def input_fn(is_training,
      parse_record_fn=parse_record_fn,
      dtype=dtype,
      datasets_num_private_threads=datasets_num_private_threads,
-      drop_remainder=drop_remainder
+      drop_remainder=drop_remainder)
-  )
--- a/official/benchmark/models/resnet_cifar_main.py
+++ b/official/benchmark/models/resnet_cifar_main.py
@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+# Import libraries
 from absl import app
 from absl import flags
 from absl import logging

--- a/official/benchmark/models/resnet_cifar_model.py
+++ b/official/benchmark/models/resnet_cifar_model.py
@@ -24,13 +24,13 @@ from __future__ import division
 from __future__ import print_function
 import functools
 import tensorflow as tf
 from tensorflow.python.keras import backend
-from tensorflow.python.keras  import initializers
+from tensorflow.python.keras import initializers
 from tensorflow.python.keras import layers
 from tensorflow.python.keras import regularizers
 BATCH_NORM_DECAY = 0.997
 BATCH_NORM_EPSILON = 1e-5
 L2_WEIGHT_DECAY = 2e-4
@@ -46,8 +46,7 @@ def identity_building_block(input_tensor,
  Arguments:
    input_tensor: input tensor
-    kernel_size: default 3, the kernel size of
+    kernel_size: default 3, the kernel size of middle conv layer at main path
-        middle conv layer at main path
    filters: list of integers, the filters of 3 conv layer at main path
    stage: integer, current stage label, used for generating layer names
    block: current block label, used for generating layer names
@@ -65,24 +64,38 @@ def identity_building_block(input_tensor,
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'
-  x = layers.Conv2D(filters1, kernel_size,
+  x = layers.Conv2D(
-                    padding='same', use_bias=False,
+      filters1,
-                    kernel_initializer='he_normal',
+      kernel_size,
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      padding='same',
-                    name=conv_name_base + '2a')(input_tensor)
+      use_bias=False,
+      kernel_initializer='he_normal',
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      name=conv_name_base + '2a')(
+          input_tensor)
  x = layers.BatchNormalization(
-      axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
+      axis=bn_axis,
-      name=bn_name_base + '2a')(x, training=training)
+      momentum=BATCH_NORM_DECAY,
+      epsilon=BATCH_NORM_EPSILON,
+      name=bn_name_base + '2a')(
+          x, training=training)
  x = layers.Activation('relu')(x)
-  x = layers.Conv2D(filters2, kernel_size,
+  x = layers.Conv2D(
-                    padding='same', use_bias=False,
+      filters2,
-                    kernel_initializer='he_normal',
+      kernel_size,
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      padding='same',
-                    name=conv_name_base + '2b')(x)
+      use_bias=False,
+      kernel_initializer='he_normal',
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      name=conv_name_base + '2b')(
+          x)
  x = layers.BatchNormalization(
-      axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
+      axis=bn_axis,
-      name=bn_name_base + '2b')(x, training=training)
+      momentum=BATCH_NORM_DECAY,
+      epsilon=BATCH_NORM_EPSILON,
+      name=bn_name_base + '2b')(
+          x, training=training)
  x = layers.add([x, input_tensor])
  x = layers.Activation('relu')(x)
@@ -100,8 +113,7 @@ def conv_building_block(input_tensor,
  Arguments:
    input_tensor: input tensor
-    kernel_size: default 3, the kernel size of
+    kernel_size: default 3, the kernel size of middle conv layer at main path
-        middle conv layer at main path
    filters: list of integers, the filters of 3 conv layer at main path
    stage: integer, current stage label, used for generating layer names
    block: current block label, used for generating layer names
@@ -124,31 +136,54 @@ def conv_building_block(input_tensor,
  conv_name_base = 'res' + str(stage) + block + '_branch'
  bn_name_base = 'bn' + str(stage) + block + '_branch'
-  x = layers.Conv2D(filters1, kernel_size, strides=strides,
+  x = layers.Conv2D(
-                    padding='same', use_bias=False,
+      filters1,
-                    kernel_initializer='he_normal',
+      kernel_size,
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      strides=strides,
-                    name=conv_name_base + '2a')(input_tensor)
+      padding='same',
+      use_bias=False,
+      kernel_initializer='he_normal',
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      name=conv_name_base + '2a')(
+          input_tensor)
  x = layers.BatchNormalization(
-      axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
+      axis=bn_axis,
-      name=bn_name_base + '2a')(x, training=training)
+      momentum=BATCH_NORM_DECAY,
+      epsilon=BATCH_NORM_EPSILON,
+      name=bn_name_base + '2a')(
+          x, training=training)
  x = layers.Activation('relu')(x)
-  x = layers.Conv2D(filters2, kernel_size, padding='same', use_bias=False,
+  x = layers.Conv2D(
-                    kernel_initializer='he_normal',
+      filters2,
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      kernel_size,
-                    name=conv_name_base + '2b')(x)
+      padding='same',
+      use_bias=False,
+      kernel_initializer='he_normal',
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      name=conv_name_base + '2b')(
+          x)
  x = layers.BatchNormalization(
-      axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
+      axis=bn_axis,
-      name=bn_name_base + '2b')(x, training=training)
+      momentum=BATCH_NORM_DECAY,
+      epsilon=BATCH_NORM_EPSILON,
-  shortcut = layers.Conv2D(filters2, (1, 1), strides=strides, use_bias=False,
+      name=bn_name_base + '2b')(
-                           kernel_initializer='he_normal',
+          x, training=training)
-                           kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                           name=conv_name_base + '1')(input_tensor)
+  shortcut = layers.Conv2D(
+      filters2, (1, 1),
+      strides=strides,
+      use_bias=False,
+      kernel_initializer='he_normal',
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      name=conv_name_base + '1')(
+          input_tensor)
  shortcut = layers.BatchNormalization(
-      axis=bn_axis, momentum=BATCH_NORM_DECAY, epsilon=BATCH_NORM_EPSILON,
+      axis=bn_axis,
-      name=bn_name_base + '1')(shortcut, training=training)
+      momentum=BATCH_NORM_DECAY,
+      epsilon=BATCH_NORM_EPSILON,
+      name=bn_name_base + '1')(
+          shortcut, training=training)
  x = layers.add([x, shortcut])
  x = layers.Activation('relu')(x)
@@ -166,10 +201,9 @@ def resnet_block(input_tensor,
  Arguments:
    input_tensor: input tensor
-    size: integer, number of constituent conv/identity building blocks.
+    size: integer, number of constituent conv/identity building blocks. A conv
-    A conv block is applied once, followed by (size - 1) identity blocks.
+      block is applied once, followed by (size - 1) identity blocks.
-    kernel_size: default 3, the kernel size of
+    kernel_size: default 3, the kernel size of middle conv layer at main path
-        middle conv layer at main path
    filters: list of integers, the filters of 3 conv layer at main path
    stage: integer, current stage label, used for generating layer names
    conv_strides: Strides for the first conv layer in the block.
@@ -180,12 +214,22 @@ def resnet_block(input_tensor,
    Output tensor after applying conv and identity blocks.
  """
-  x = conv_building_block(input_tensor, kernel_size, filters, stage=stage,
+  x = conv_building_block(
-                          strides=conv_strides, block='block_0',
+      input_tensor,
-                          training=training)
+      kernel_size,
+      filters,
+      stage=stage,
+      strides=conv_strides,
+      block='block_0',
+      training=training)
  for i in range(size - 1):
-    x = identity_building_block(x, kernel_size, filters, stage=stage,
+    x = identity_building_block(
-                                block='block_%d' % (i + 1), training=training)
+        x,
+        kernel_size,
+        filters,
+        stage=stage,
+        block='block_%d' % (i + 1),
+        training=training)
  return x
@@ -193,15 +237,15 @@ def resnet(num_blocks, classes=10, training=None):
  """Instantiates the ResNet architecture.
  Arguments:
-    num_blocks: integer, the number of conv/identity blocks in each block.
+    num_blocks: integer, the number of conv/identity blocks in each block. The
-      The ResNet contains 3 blocks with each block containing one conv block
+      ResNet contains 3 blocks with each block containing one conv block
      followed by (layers_per_block - 1) number of idenity blocks. Each
      conv/idenity block has 2 convolutional layers. With the input
-      convolutional layer and the pooling layer towards the end, this brings
+      convolutional layer and the pooling layer towards the end, this brings the
-      the total size of the network to (6*num_blocks + 2)
+      total size of the network to (6*num_blocks + 2)
    classes: optional number of classes to classify images into
    training: Only used if training keras model with Estimator.  In other
-    scenarios it is handled automatically.
+      scenarios it is handled automatically.
  Returns:
    A Keras model instance.
@@ -211,43 +255,70 @@ def resnet(num_blocks, classes=10, training=None):
  img_input = layers.Input(shape=input_shape)
  if backend.image_data_format() == 'channels_first':
-    x = layers.Lambda(lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
+    x = layers.Lambda(
-                      name='transpose')(img_input)
+        lambda x: backend.permute_dimensions(x, (0, 3, 1, 2)),
+        name='transpose')(
+            img_input)
    bn_axis = 1
  else:  # channel_last
    x = img_input
    bn_axis = 3
  x = layers.ZeroPadding2D(padding=(1, 1), name='conv1_pad')(x)
-  x = layers.Conv2D(16, (3, 3),
+  x = layers.Conv2D(
-                    strides=(1, 1),
+      16, (3, 3),
-                    padding='valid', use_bias=False,
+      strides=(1, 1),
-                    kernel_initializer='he_normal',
+      padding='valid',
-                    kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      use_bias=False,
-                    name='conv1')(x)
+      kernel_initializer='he_normal',
-  x = layers.BatchNormalization(axis=bn_axis,
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                                momentum=BATCH_NORM_DECAY,
+      name='conv1')(
-                                epsilon=BATCH_NORM_EPSILON,
+          x)
-                                name='bn_conv1',)(x, training=training)
+  x = layers.BatchNormalization(
+      axis=bn_axis,
+      momentum=BATCH_NORM_DECAY,
+      epsilon=BATCH_NORM_EPSILON,
+      name='bn_conv1',
+  )(x, training=training)
  x = layers.Activation('relu')(x)
-  x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[16, 16],
+  x = resnet_block(
-                   stage=2, conv_strides=(1, 1), training=training)
+      x,
+      size=num_blocks,
-  x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[32, 32],
+      kernel_size=3,
-                   stage=3, conv_strides=(2, 2), training=training)
+      filters=[16, 16],
+      stage=2,
-  x = resnet_block(x, size=num_blocks, kernel_size=3, filters=[64, 64],
+      conv_strides=(1, 1),
-                   stage=4, conv_strides=(2, 2), training=training)
+      training=training)
+  x = resnet_block(
+      x,
+      size=num_blocks,
+      kernel_size=3,
+      filters=[32, 32],
+      stage=3,
+      conv_strides=(2, 2),
+      training=training)
+  x = resnet_block(
+      x,
+      size=num_blocks,
+      kernel_size=3,
+      filters=[64, 64],
+      stage=4,
+      conv_strides=(2, 2),
+      training=training)
  rm_axes = [1, 2] if backend.image_data_format() == 'channels_last' else [2, 3]
  x = layers.Lambda(lambda x: backend.mean(x, rm_axes), name='reduce_mean')(x)
-  x = layers.Dense(classes,
+  x = layers.Dense(
-                   activation='softmax',
+      classes,
-                   kernel_initializer=initializers.RandomNormal(stddev=0.01),
+      activation='softmax',
-                   kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      kernel_initializer=initializers.RandomNormal(stddev=0.01),
-                   bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      kernel_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
-                   name='fc10')(x)
+      bias_regularizer=regularizers.l2(L2_WEIGHT_DECAY),
+      name='fc10')(
+          x)
  inputs = img_input
  # Create model.

--- a/official/benchmark/models/resnet_cifar_test.py
+++ b/official/benchmark/models/resnet_cifar_test.py
@@ -33,9 +33,7 @@ class KerasCifarTest(googletest.TestCase):
  """Unit tests for Keras ResNet with Cifar."""
  _extra_flags = [
-      "-batch_size", "4",
+      "-batch_size", "4", "-train_steps", "1", "-use_synthetic_data", "true"
-      "-train_steps", "1",
-      "-use_synthetic_data", "true"
  ]
  _tempdir = None
@@ -61,119 +59,135 @@ class KerasCifarTest(googletest.TestCase):
    """Test Keras model with 1 GPU, no distribution strategy."""
    extra_flags = [
-        "-distribution_strategy", "off",
+        "-distribution_strategy",
-        "-model_dir", "keras_cifar_no_dist_strat",
+        "off",
-        "-data_format", "channels_last",
+        "-model_dir",
+        "keras_cifar_no_dist_strat",
+        "-data_format",
+        "channels_last",
    ]
    extra_flags = extra_flags + self._extra_flags
    integration.run_synthetic(
        main=resnet_cifar_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_graph_no_dist_strat(self):
    """Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
    extra_flags = [
-        "-enable_eager", "false",
+        "-enable_eager",
-        "-distribution_strategy", "off",
+        "false",
-        "-model_dir", "keras_cifar_graph_no_dist_strat",
+        "-distribution_strategy",
-        "-data_format", "channels_last",
+        "off",
+        "-model_dir",
+        "keras_cifar_graph_no_dist_strat",
+        "-data_format",
+        "channels_last",
    ]
    extra_flags = extra_flags + self._extra_flags
    integration.run_synthetic(
        main=resnet_cifar_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_1_gpu(self):
    """Test Keras model with 1 GPU."""
    if context.num_gpus() < 1:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(1, context.num_gpus()))
+          .format(1, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "1",
+        "-num_gpus",
-        "-distribution_strategy", "mirrored",
+        "1",
-        "-model_dir", "keras_cifar_1_gpu",
+        "-distribution_strategy",
-        "-data_format", "channels_last",
+        "mirrored",
+        "-model_dir",
+        "keras_cifar_1_gpu",
+        "-data_format",
+        "channels_last",
    ]
    extra_flags = extra_flags + self._extra_flags
    integration.run_synthetic(
        main=resnet_cifar_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_graph_1_gpu(self):
    """Test Keras model in legacy graph mode with 1 GPU."""
    if context.num_gpus() < 1:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(1, context.num_gpus()))
+          .format(1, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "1",
+        "-num_gpus",
+        "1",
        "-noenable_eager",
-        "-distribution_strategy", "mirrored",
+        "-distribution_strategy",
-        "-model_dir", "keras_cifar_graph_1_gpu",
+        "mirrored",
-        "-data_format", "channels_last",
+        "-model_dir",
+        "keras_cifar_graph_1_gpu",
+        "-data_format",
+        "channels_last",
    ]
    extra_flags = extra_flags + self._extra_flags
    integration.run_synthetic(
        main=resnet_cifar_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_2_gpu(self):
    """Test Keras model with 2 GPUs."""
    if context.num_gpus() < 2:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(2, context.num_gpus()))
+          .format(2, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "2",
+        "-num_gpus",
-        "-distribution_strategy", "mirrored",
+        "2",
-        "-model_dir", "keras_cifar_2_gpu",
+        "-distribution_strategy",
+        "mirrored",
+        "-model_dir",
+        "keras_cifar_2_gpu",
    ]
    extra_flags = extra_flags + self._extra_flags
    integration.run_synthetic(
        main=resnet_cifar_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_graph_2_gpu(self):
    """Test Keras model in legacy graph mode with 2 GPUs."""
    if context.num_gpus() < 2:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(2, context.num_gpus()))
+          .format(2, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "2",
+        "-num_gpus",
-        "-enable_eager", "false",
+        "2",
-        "-distribution_strategy", "mirrored",
+        "-enable_eager",
-        "-model_dir", "keras_cifar_graph_2_gpu",
+        "false",
+        "-distribution_strategy",
+        "mirrored",
+        "-model_dir",
+        "keras_cifar_graph_2_gpu",
    ]
    extra_flags = extra_flags + self._extra_flags
    integration.run_synthetic(
        main=resnet_cifar_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
 if __name__ == "__main__":

--- a/official/benchmark/models/resnet_imagenet_main.py
+++ b/official/benchmark/models/resnet_imagenet_main.py
@@ -20,6 +20,7 @@ from __future__ import print_function
 import os
+# Import libraries
 from absl import app
 from absl import flags
 from absl import logging

--- a/official/benchmark/models/resnet_imagenet_test.py
+++ b/official/benchmark/models/resnet_imagenet_test.py
@@ -36,29 +36,43 @@ from official.vision.image_classification.resnet import imagenet_preprocessing
 class KerasImagenetTest(tf.test.TestCase):
  """Unit tests for Keras Models with ImageNet."""
  _default_flags_dict = [
-      "-batch_size", "4",
+      "-batch_size",
-      "-train_steps", "1",
+      "4",
-      "-use_synthetic_data", "true",
+      "-train_steps",
-      "-data_format", "channels_last",
+      "1",
+      "-use_synthetic_data",
+      "true",
+      "-data_format",
+      "channels_last",
  ]
  _extra_flags_dict = {
      "resnet": [
-          "-model", "resnet50_v1.5",
+          "-model",
-          "-optimizer", "resnet50_default",
+          "resnet50_v1.5",
+          "-optimizer",
+          "resnet50_default",
      ],
      "resnet_polynomial_decay": [
-          "-model", "resnet50_v1.5",
+          "-model",
-          "-optimizer", "resnet50_default",
+          "resnet50_v1.5",
-          "-pruning_method", "polynomial_decay",
+          "-optimizer",
+          "resnet50_default",
+          "-pruning_method",
+          "polynomial_decay",
      ],
      "mobilenet": [
-          "-model", "mobilenet",
+          "-model",
-          "-optimizer", "mobilenet_default",
+          "mobilenet",
+          "-optimizer",
+          "mobilenet_default",
      ],
      "mobilenet_polynomial_decay": [
-          "-model", "mobilenet",
+          "-model",
-          "-optimizer", "mobilenet_default",
+          "mobilenet",
-          "-pruning_method", "polynomial_decay",
+          "-optimizer",
+          "mobilenet_default",
+          "-pruning_method",
+          "polynomial_decay",
      ],
  }
  _tempdir = None
@@ -86,50 +100,53 @@ class KerasImagenetTest(tf.test.TestCase):
    """Test Keras model with 1 GPU, no distribution strategy."""
    extra_flags = [
-        "-distribution_strategy", "off",
+        "-distribution_strategy",
+        "off",
    ]
    extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_graph_no_dist_strat(self, flags_key):
    """Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
    extra_flags = [
-        "-enable_eager", "false",
+        "-enable_eager",
-        "-distribution_strategy", "off",
+        "false",
+        "-distribution_strategy",
+        "off",
    ]
    extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_1_gpu(self, flags_key):
    """Test Keras model with 1 GPU."""
    if context.num_gpus() < 1:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(1, context.num_gpus()))
+          .format(1, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "1",
+        "-num_gpus",
-        "-distribution_strategy", "mirrored",
+        "1",
-        "-enable_checkpoint_and_export", "1",
+        "-distribution_strategy",
+        "mirrored",
+        "-enable_checkpoint_and_export",
+        "1",
    ]
    extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_1_gpu_fp16(self, flags_key):
    """Test Keras model with 1 GPU and fp16."""
@@ -140,9 +157,12 @@ class KerasImagenetTest(tf.test.TestCase):
          .format(1, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "1",
+        "-num_gpus",
-        "-dtype", "fp16",
+        "1",
-        "-distribution_strategy", "mirrored",
+        "-dtype",
+        "fp16",
+        "-distribution_strategy",
+        "mirrored",
    ]
    extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
@@ -152,62 +172,67 @@ class KerasImagenetTest(tf.test.TestCase):
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_2_gpu(self, flags_key):
    """Test Keras model with 2 GPUs."""
    if context.num_gpus() < 2:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(2, context.num_gpus()))
+          .format(2, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "2",
+        "-num_gpus",
-        "-distribution_strategy", "mirrored",
+        "2",
+        "-distribution_strategy",
+        "mirrored",
    ]
    extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_xla_2_gpu(self, flags_key):
    """Test Keras model with XLA and 2 GPUs."""
    if context.num_gpus() < 2:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(2, context.num_gpus()))
+          .format(2, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "2",
+        "-num_gpus",
-        "-enable_xla", "true",
+        "2",
-        "-distribution_strategy", "mirrored",
+        "-enable_xla",
+        "true",
+        "-distribution_strategy",
+        "mirrored",
    ]
    extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_2_gpu_fp16(self, flags_key):
    """Test Keras model with 2 GPUs and fp16."""
    if context.num_gpus() < 2:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(2, context.num_gpus()))
+          .format(2, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "2",
+        "-num_gpus",
-        "-dtype", "fp16",
+        "2",
-        "-distribution_strategy", "mirrored",
+        "-dtype",
+        "fp16",
+        "-distribution_strategy",
+        "mirrored",
    ]
    extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
@@ -217,21 +242,24 @@ class KerasImagenetTest(tf.test.TestCase):
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  def test_end_to_end_xla_2_gpu_fp16(self, flags_key):
    """Test Keras model with XLA, 2 GPUs and fp16."""
    if context.num_gpus() < 2:
      self.skipTest(
-          "{} GPUs are not available for this test. {} GPUs are available".
+          "{} GPUs are not available for this test. {} GPUs are available"
-          format(2, context.num_gpus()))
+          .format(2, context.num_gpus()))
    extra_flags = [
-        "-num_gpus", "2",
+        "-num_gpus",
-        "-dtype", "fp16",
+        "2",
-        "-enable_xla", "true",
+        "-dtype",
-        "-distribution_strategy", "mirrored",
+        "fp16",
+        "-enable_xla",
+        "true",
+        "-distribution_strategy",
+        "mirrored",
    ]
    extra_flags = extra_flags + self.get_extra_flags_dict(flags_key)
@@ -241,8 +269,7 @@ class KerasImagenetTest(tf.test.TestCase):
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
 if __name__ == "__main__":

--- a/official/benchmark/models/resnet_imagenet_test_tpu.py
+++ b/official/benchmark/models/resnet_imagenet_test_tpu.py
@@ -30,19 +30,30 @@ class KerasImagenetTest(tf.test.TestCase, parameterized.TestCase):
  _extra_flags_dict = {
      "resnet": [
-          "-batch_size", "4",
+          "-batch_size",
-          "-train_steps", "1",
+          "4",
-          "-use_synthetic_data", "true"
+          "-train_steps",
-          "-model", "resnet50_v1.5",
+          "1",
-          "-optimizer", "resnet50_default",
+          "-use_synthetic_data",
+          "true"
+          "-model",
+          "resnet50_v1.5",
+          "-optimizer",
+          "resnet50_default",
      ],
      "resnet_polynomial_decay": [
-          "-batch_size", "4",
+          "-batch_size",
-          "-train_steps", "1",
+          "4",
-          "-use_synthetic_data", "true",
+          "-train_steps",
-          "-model", "resnet50_v1.5",
+          "1",
-          "-optimizer", "resnet50_default",
+          "-use_synthetic_data",
-          "-pruning_method", "polynomial_decay",
+          "true",
+          "-model",
+          "resnet50_v1.5",
+          "-optimizer",
+          "resnet50_default",
+          "-pruning_method",
+          "polynomial_decay",
      ],
  }
  _tempdir = None
@@ -71,34 +82,38 @@ class KerasImagenetTest(tf.test.TestCase, parameterized.TestCase):
    """Test Keras model with TPU distribution strategy."""
    extra_flags = [
-        "-distribution_strategy", "tpu",
+        "-distribution_strategy",
-        "-data_format", "channels_last",
+        "tpu",
-        "-enable_checkpoint_and_export", "1",
+        "-data_format",
+        "channels_last",
+        "-enable_checkpoint_and_export",
+        "1",
    ]
    extra_flags = extra_flags + self._extra_flags_dict[flags_key]
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
  @parameterized.parameters(["resnet"])
  def test_end_to_end_tpu_bf16(self, flags_key):
    """Test Keras model with TPU and bfloat16 activation."""
    extra_flags = [
-        "-distribution_strategy", "tpu",
+        "-distribution_strategy",
-        "-data_format", "channels_last",
+        "tpu",
-        "-dtype", "bf16",
+        "-data_format",
+        "channels_last",
+        "-dtype",
+        "bf16",
    ]
    extra_flags = extra_flags + self._extra_flags_dict[flags_key]
    integration.run_synthetic(
        main=resnet_imagenet_main.run,
        tmp_root=self.get_temp_dir(),
-        extra_flags=extra_flags
+        extra_flags=extra_flags)
-    )
 if __name__ == "__main__":

--- a/official/benchmark/models/synthetic_util.py
+++ b/official/benchmark/models/synthetic_util.py
@@ -42,8 +42,8 @@ class SyntheticDataset(object):
    for t in flat_tensor:
      rebatched_t = tf.split(t, num_or_size_splits=split_by, axis=0)[0]
      assert rebatched_t.shape.is_fully_defined(), rebatched_t.shape
-      v = tf.compat.v1.get_local_variable(self._random_name(),
+      v = tf.compat.v1.get_local_variable(
-                                          initializer=rebatched_t)
+          self._random_name(), initializer=rebatched_t)
      variable_data.append(v)
      initializers.append(v.initializer)
    input_data = tf.nest.pack_sequence_as(tensor, variable_data)
@@ -90,6 +90,7 @@ class SyntheticIterator(object):
 def _monkey_patch_dataset_method(strategy):
  """Monkey-patch `strategy`'s `make_dataset_iterator` method."""
  def make_dataset(self, dataset):
    logging.info('Using pure synthetic data.')
    with self.scope():

--- a/official/benchmark/ncf_keras_benchmark.py
+++ b/official/benchmark/ncf_keras_benchmark.py
@@ -66,17 +66,20 @@ class NCFKerasBenchmarkBase(PerfZeroBenchmark):
    wall_time_sec = time.time() - start_time_sec
    metrics = []
-    metrics.append({'name': 'exp_per_second',
+    metrics.append({
-                    'value': stats['avg_exp_per_second']})
+        'name': 'exp_per_second',
+        'value': stats['avg_exp_per_second']
+    })
    if hr_at_10_min > 0:
-      metrics.append({'name': 'hr_at_10',
+      metrics.append({
-                      'value': stats['eval_hit_rate'],
+          'name': 'hr_at_10',
-                      'min_value': hr_at_10_min,
+          'value': stats['eval_hit_rate'],
-                      'max_value': hr_at_10_max})
+          'min_value': hr_at_10_min,
+          'max_value': hr_at_10_max
+      })
-      metrics.append({'name': 'train_loss',
+      metrics.append({'name': 'train_loss', 'value': stats['loss']})
-                      'value': stats['loss']})
    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
@@ -108,9 +111,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
    super(NCFKerasAccuracy, self).__init__(
-        output_dir=output_dir,
+        output_dir=output_dir, default_flags=default_flags, **kwargs)
-        default_flags=default_flags,
-        **kwargs)
  def _run_and_report_benchmark_mlperf_like(self):
    """Run test and report results.
@@ -131,8 +132,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
      hr_at_10_max: Maximum acceptable hr@10 value.
    """
    super(NCFKerasAccuracy, self)._run_and_report_benchmark(
-        hr_at_10_min=hr_at_10_min,
+        hr_at_10_min=hr_at_10_min, hr_at_10_max=hr_at_10_max)
-        hr_at_10_max=hr_at_10_max)
  def _set_8_gpu_defaults(self):
    FLAGS.num_gpus = 8
@@ -211,6 +211,7 @@ class NCFKerasAccuracy(NCFKerasBenchmarkBase):
    FLAGS.eval_batch_size = 160000
    self._run_and_report_benchmark()
 #############################################
 # Tests below with mlperf in the test name are of two types:
 #  1) 1 GPU tests are based on MLPerf 0.5 and the TensorFlow pulled submission.
@@ -449,10 +450,7 @@ class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase):
 class NCFKerasSynth(NCFKerasBenchmarkBase):
  """Benchmark NCF model using synthetic data."""
-  def __init__(self,
+  def __init__(self, output_dir=None, default_flags=None, **kwargs):
-               output_dir=None,
-               default_flags=None,
-               **kwargs):
    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
@@ -470,9 +468,7 @@ class NCFKerasSynth(NCFKerasBenchmarkBase):
    default_flags['use_synthetic_data'] = True
    super(NCFKerasSynth, self).__init__(
-        output_dir=output_dir,
+        output_dir=output_dir, default_flags=default_flags, **kwargs)
-        default_flags=default_flags,
-        **kwargs)
  def benchmark_1_gpu(self):
    self._setup()

--- a/official/benchmark/resnet_ctl_imagenet_benchmark.py
+++ b/official/benchmark/resnet_ctl_imagenet_benchmark.py
@@ -459,7 +459,8 @@ class Resnet50CtlBenchmarkReal(Resnet50CtlBenchmarkBase):
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
    def_flags = {}
    def_flags['skip_eval'] = True
-    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
+    def_flags[
+        'data_dir'] = os.path.join(root_data_dir, 'imagenet')
    def_flags['train_steps'] = 110
    def_flags['steps_per_loop'] = 10
    def_flags['log_steps'] = 10

--- a/official/benchmark/retinanet_benchmark.py
+++ b/official/benchmark/retinanet_benchmark.py
@@ -335,27 +335,21 @@ class RetinanetBenchmarkReal(DetectionBenchmarkReal):
  """Short benchmark performance tests for Retinanet model."""
  def __init__(self, **kwargs):
-    super(RetinanetBenchmarkReal, self).__init__(
+    super(RetinanetBenchmarkReal, self).__init__(model='retinanet', **kwargs)
-        model='retinanet',
-        **kwargs)
 class MaskRCNNBenchmarkReal(DetectionBenchmarkReal):
  """Short benchmark performance tests for Mask RCNN model."""
  def __init__(self, **kwargs):
-    super(MaskRCNNBenchmarkReal, self).__init__(
+    super(MaskRCNNBenchmarkReal, self).__init__(model='mask_rcnn', **kwargs)
-        model='mask_rcnn',
-        **kwargs)
 class ShapeMaskBenchmarkReal(DetectionBenchmarkReal):
  """Short benchmark performance tests for ShapeMask model."""
  def __init__(self, **kwargs):
-    super(ShapeMaskBenchmarkReal, self).__init__(
+    super(ShapeMaskBenchmarkReal, self).__init__(model='shapemask', **kwargs)
-        model='shapemask',
-        **kwargs)
 if __name__ == '__main__':

--- a/official/benchmark/shakespeare_benchmark.py
+++ b/official/benchmark/shakespeare_benchmark.py
@@ -68,10 +68,12 @@ class ShakespeareBenchmarkBase(PerfZeroBenchmark):
    wall_time_sec = time.time() - start_time_sec
    if top_1_train_min:
-      metrics.append({'name': 'accuracy_top_1_train',
+      metrics.append({
-                      'value': stats['history']['RecallAt1'][-1],
+          'name': 'accuracy_top_1_train',
-                      'min_value': top_1_train_min,
+          'value': stats['history']['RecallAt1'][-1],
-                      'max_value': top_1_train_max})
+          'min_value': top_1_train_min,
+          'max_value': top_1_train_max
+      })
    # Look for the time history callback which was used during keras.fit
    for callback in stats['callbacks']:
@@ -79,8 +81,7 @@ class ShakespeareBenchmarkBase(PerfZeroBenchmark):
        epoch_timings = callback.epoch_runtime_log
        if len(epoch_timings) > 1:
          average_time = sum(epoch_timings[1:]) / len(epoch_timings[1:])
-          metrics.append({'name': 'avg_epoch_time',
+          metrics.append({'name': 'avg_epoch_time', 'value': average_time})
-                          'value': average_time})
      # First entry in timestamp_log is the start of step 1. The rest of the
      # entries are the end of each step recorded.
@@ -90,13 +91,14 @@ class ShakespeareBenchmarkBase(PerfZeroBenchmark):
          total_batch_size * log_steps * (len(time_log) - warmup - 1))
      if elapsed > 0:
        examples_per_sec = num_examples / elapsed
-        metrics.append({'name': 'exp_per_second',
+        metrics.append({'name': 'exp_per_second', 'value': examples_per_sec})
-                        'value': examples_per_sec})
    flags_str = flags_core.get_nondefault_flags_as_str()
-    self.report_benchmark(iters=-1, wall_time=wall_time_sec,
+    self.report_benchmark(
-                          metrics=metrics,
+        iters=-1,
-                          extras={'flags': flags_str})
+        wall_time=wall_time_sec,
+        metrics=metrics,
+        extras={'flags': flags_str})
 class ShakespeareAccuracy(ShakespeareBenchmarkBase):
@@ -114,8 +116,8 @@ class ShakespeareAccuracy(ShakespeareBenchmarkBase):
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
      **kwargs: arbitrary named arguments. This is needed to make the
-                constructor forward compatible in case PerfZero provides more
+        constructor forward compatible in case PerfZero provides more named
-                named arguments before updating the constructor.
+        arguments before updating the constructor.
    """
    self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA)
    super(ShakespeareAccuracy, self).__init__(
@@ -212,8 +214,8 @@ class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
      **kwargs: arbitrary named arguments. This is needed to make the
-                constructor forward compatible in case PerfZero provides more
+        constructor forward compatible in case PerfZero provides more named
-                named arguments before updating the constructor.
+        arguments before updating the constructor.
    """
    self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA)

--- a/official/benchmark/tfhub_memory_usage_benchmark.py
+++ b/official/benchmark/tfhub_memory_usage_benchmark.py
@@ -45,8 +45,8 @@ class TfHubMemoryUsageBenchmark(PerfZeroBenchmark):
        # https://tfhub.dev/google/nnlm-en-dim128/1 to valid python method name
        # like google_nnlm_en_dim128_1.
        hub_model_method_name = hub_model_handle.replace(
-            'https://tfhub.dev',
+            'https://tfhub.dev', '').replace('/', '_').replace('-',
-            '').replace('/', '_').replace('-', '_').strip('_')
+                                                               '_').strip('_')
        setattr(
            self, 'benchmark_' + hub_model_method_name,
            functools.partial(self.benchmark_memory_usage, hub_model_handle))

--- a/official/benchmark/unet3d_benchmark.py
+++ b/official/benchmark/unet3d_benchmark.py
@@ -21,6 +21,7 @@ import functools
 import os
 import time
 from typing import Optional
 from absl import flags
 import tensorflow as tf  # pylint: disable=g-bad-import-order