Merge remote-tracking branch 'tf_model/main'

472e2f80 · zhanggzh · d91296eb · f3a14f85 · 472e2f80 · 472e2f80
Commit 472e2f80 authored Mar 16, 2024 by zhanggzh
20 changed files
--- a/models-2.13.1/official/benchmark/models/shakespeare/README.md
+++ b/models-2.13.1/official/benchmark/models/shakespeare/README.md
+# Shakespeare character LSTM model
+This is an implemention of a simple character LSTM used to generate text.
+## Instructions
+First download the source data:
+```
+wget https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt
+```
+Note that files other than shakepeare.txt can also be used to train the model to generater other text.
+Then train the model:
+```python
+python3 shakespeare_main.py --training_data shakespeare.txt \
+    --model_dir /tmp/shakespeare
+```
+This will place model checkpoints in `/tmp/shakespeare`, so that we can use them to make predictions.
+Then generate predictions:
+```python
+python3 shakespeare_main.py --training_data shakespeare.txt \
+    --model_dir /tmp/shakespeare --notrain --predict_context=ROMEO:
+```
+Change `--predict_context` and `--predict_length` to suit your needs.
--- a/models-2.13.1/official/benchmark/models/shakespeare/__init__.py
+++ b/models-2.13.1/official/benchmark/models/shakespeare/__init__.py
--- a/models-2.13.1/official/benchmark/models/shakespeare/shakespeare_main.py
+++ b/models-2.13.1/official/benchmark/models/shakespeare/shakespeare_main.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Runs a character LSTM model trained on Shakespeare."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import functools
+import os
+# pylint: disable=wrong-import-order
+from absl import app
+from absl import flags
+import numpy as np
+import tensorflow as tf
+from official.common import distribute_utils
+# pylint: enable=wrong-import-order
+from official.utils.flags import core as flags_core
+from official.utils.misc import keras_utils
+EMBEDDING_DIM = 256
+RNN_UNITS = 1024
+SEQ_LENGTH = 100
+# Calculated by running batch_size=1
+BATCHES_PER_EPOCH = 11043
+def define_flags():
+  """Define the flags for the Shakespeare character LSTM."""
+  flags_core.define_base(data_dir=False,
+                         clean=False,
+                         train_epochs=True,
+                         epochs_between_evals=False,
+                         stop_threshold=False,
+                         num_gpu=True,
+                         export_dir=False,
+                         run_eagerly=True,
+                         distribution_strategy=True)
+  flags_core.define_performance(num_parallel_calls=False,
+                                inter_op=False,
+                                intra_op=False,
+                                synthetic_data=False,
+                                max_train_steps=False,
+                                dtype=True,
+                                enable_xla=True)
+  flags_core.set_defaults(train_epochs=43,
+                          batch_size=64)
+  flags.DEFINE_boolean(name='enable_eager', default=True, help='Enable eager?')
+  flags.DEFINE_boolean(
+      name='train', default=True,
+      help='If true trains the model.')
+  flags.DEFINE_string(
+      name='predict_context', default=None,
+      help='If set, makes a prediction with the given context.')
+  flags.DEFINE_integer(
+      name='predict_length', default=1000,
+      help='Length of the predicted text including the context.')
+  flags.DEFINE_integer(name='train_steps', default=None,
+                       help='Overrides train_steps per epoch if not None.')
+  flags.DEFINE_integer(
+      name='log_steps', default=100,
+      help='For every log_steps, we log the timing information such as '
+      'examples per second.')
+  flags.DEFINE_string(
+      name='training_data', default=None,
+      help='Path to file containing the training data.')
+  flags.DEFINE_boolean(name='cudnn', default=True, help='Use CuDNN LSTM.')
+def get_dataset(path_to_file, batch_size=None, seq_length=SEQ_LENGTH):
+  """Creates a dataset from a given text file.
+  Args:
+    path_to_file: The path to the training data.
+    batch_size: Batch size to use.
+    seq_length: The length of the LSTM sequence.
+  Returns:
+    A tuple, consisting of the Dataset and the class to character mapping
+    and character to class mapping.
+  """
+  with tf.io.gfile.GFile(path_to_file, 'rb') as train_data:
+    text = train_data.read().decode(encoding='utf-8')
+  # Create vocab
+  vocab = sorted(set(text))
+  char2idx = {u: i for i, u in enumerate(vocab)}
+  idx2char = np.array(vocab)
+  # Split text into sequence length + 1 chucks to create examples
+  text_as_int = np.array([char2idx[c] for c in text])
+  char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
+  sequences = char_dataset.batch(
+      seq_length + 1, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
+  def split_input_target(chunk):
+    input_text = chunk[:-1]
+    target_text = chunk[1:]
+    return input_text, tf.one_hot(target_text, len(vocab))
+  dataset = sequences.map(split_input_target)
+  dataset = dataset.shuffle(10000).repeat()
+  dataset = dataset.batch(
+      batch_size, drop_remainder=True, num_parallel_calls=tf.data.AUTOTUNE)
+  return dataset, idx2char, char2idx
+def build_model(vocab_size,
+                embedding_dim=EMBEDDING_DIM,
+                rnn_units=RNN_UNITS,
+                batch_size=None,
+                stateful=False,
+                use_cudnn=True):
+  """Builds the Shakespeare model.
+  Args:
+    vocab_size: The number of character classes in the input.
+    embedding_dim: The dimension of the embedding space for each class.
+    rnn_units: The number of RNN units in the layer.
+    batch_size: When predicting, the batch size of the predictions.
+    stateful: If true, the LSTM is stateful.
+  Returns:
+    A Keras Model.
+  """
+  LSTM = functools.partial(tf.keras.layers.LSTM, implementation=2)
+  # By indirecting the activation through a lambda layer, the logic to dispatch
+  # to CuDNN in V2 doesn't trigger and we force the LSTM to run in non-CuDNN
+  # mode.
+  lstm_activation = ('tanh' if use_cudnn else
+                     lambda x: tf.math.tanh(x))
+  batch_shape = [batch_size if stateful else None, None]
+  return tf.keras.Sequential([
+      tf.keras.layers.Embedding(vocab_size, embedding_dim,
+                                batch_input_shape=batch_shape),
+      LSTM(rnn_units,
+           activation=lstm_activation,
+           return_sequences=True,
+           stateful=stateful,
+           recurrent_initializer='glorot_uniform'),
+      tf.keras.layers.Dense(vocab_size),
+      tf.keras.layers.Softmax(dtype=tf.float32)])
+def train_model(flags_obj, dataset, vocab_size, strategy, checkpoint_dir=None):
+  """Trains a Shakespeare model.
+  Args:
+    flags_obj: An object containing parsed flag values.s
+    dataset: the training data set.
+    vocab_size: the number of unique character classes.
+    strategy: distribution strategy to use.
+    checkpoint_dir: if not None, the directory in which to make checkpoints.
+  Returns:
+    The training history and callbacks.
+  """
+  if flags_obj.train_steps:
+    train_steps = flags_obj.train_steps
+  else:
+    train_steps = BATCHES_PER_EPOCH // flags_obj.batch_size
+  strategy_scope = distribute_utils.get_strategy_scope(strategy)
+  with strategy_scope:
+    model = build_model(vocab_size=vocab_size, batch_size=flags_obj.batch_size,
+                        use_cudnn=flags_obj.cudnn)
+    # Model.fit() automatically applies loss scaling so we don't need to create
+    # a LossScaleOptimizer.
+    model.compile(
+        optimizer=tf.keras.optimizers.Adam(),
+        loss=tf.keras.losses.CategoricalCrossentropy(),
+        metrics=[tf.keras.metrics.Recall(top_k=1, name='RecallAt1'),
+                 tf.keras.metrics.Recall(top_k=5, name='RecallAt5')],
+        run_eagerly=flags_obj.run_eagerly)
+  callbacks = []
+  if checkpoint_dir:
+    checkpoint_prefix = os.path.join(checkpoint_dir, 'ckpt_{epoch}')
+    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
+        filepath=checkpoint_prefix,
+        save_weights_only=True)
+    callbacks.append(checkpoint_callback)
+  time_callback = keras_utils.TimeHistory(flags_obj.batch_size,
+                                          flags_obj.log_steps)
+  callbacks.append(time_callback)
+  history = model.fit(dataset,
+                      epochs=flags_obj.train_epochs,
+                      steps_per_epoch=train_steps,
+                      callbacks=callbacks,
+                      verbose=2)
+  return history, callbacks
+def make_prediction(checkpoint_dir, length, context, idx2char, char2idx):
+  """Make predictions from a Shakespeare model.
+  Args:
+    checkpoint_dir: the directory from which to load checkpoints
+    length: the total length of the generated text (including the context).
+    context: the initial text with which the LSTM is primed.
+    idx2char: the character class to character mapping.
+    char2idx: the character to character class mapping.
+  Returns:
+    A generated string of text of the given length.
+  """
+  prediction_model = build_model(
+      vocab_size=len(idx2char), batch_size=1, stateful=True)
+  prediction_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
+  prediction_model.build(tf.TensorShape([1, None]))
+  input_eval = [char2idx[s] for s in context]
+  input_eval = tf.expand_dims(input_eval, 0)
+  text_generated = []
+  prediction_model.reset_states()
+  for _ in range(length - len(context)):
+    predictions = prediction_model(input_eval)
+    predictions = tf.squeeze(predictions, 0)
+    # We applied a softmax to the output of the model so that
+    # tf.keras.metrics.Recall would work. We need logits for
+    # tf.random.categorical, so we convert the probabilities back to log odds
+    predictions = tf.math.log(predictions / (1 - predictions))
+    random_output = tf.random.categorical(predictions, num_samples=1)
+    selected_id = random_output[-1, 0].numpy()
+    input_eval = tf.expand_dims([selected_id], 0)
+    text_generated.append(idx2char[selected_id])
+  return context + ''.join(text_generated)
+def run(flags_obj):
+  """Run Shakespeare training and predict.
+  Args:
+    flags_obj: An object containing parsed flag values.
+  Returns:
+    Dictionary with status from the run.
+  """
+  if not flags_obj.training_data:
+    raise ValueError(
+        'Must set the path to a training data file. e.g download the following '
+        'https://storage.googleapis.com/download.tensorflow.org/data/'
+        'shakespeare.txt')
+  if flags_obj.dtype == 'fp16':
+    tf.keras.mixed_precision.set_global_policy('mixed_float16')
+  keras_utils.set_session_config(
+      enable_xla=flags_obj.enable_xla)
+  strategy = distribute_utils.get_distribution_strategy(
+      distribution_strategy=flags_obj.distribution_strategy,
+      num_gpus=flags_obj.num_gpus)
+  dataset, idx2char, char2idx = get_dataset(flags_obj.training_data,
+                                            batch_size=flags_obj.batch_size)
+  stats = {}
+  if flags_obj.train:
+    history, callbacks = train_model(flags_obj, dataset,
+                                     len(idx2char), strategy,
+                                     checkpoint_dir=flags_obj.model_dir)
+    stats['history'] = history.history
+    stats['callbacks'] = callbacks
+  if flags_obj.predict_context:
+    if not flags_obj.model_dir:
+      raise ValueError('Must set model_dir to get predictions.')
+    print(make_prediction(flags_obj.model_dir,
+                          flags_obj.predict_length,
+                          flags_obj.predict_context,
+                          idx2char,
+                          char2idx))
+  return stats
+def main(_):
+  flags_obj = flags.FLAGS
+  run(flags_obj)
+if __name__ == '__main__':
+  define_flags()
+  app.run(main)
--- a/models-2.13.1/official/benchmark/models/synthetic_util.py
+++ b/models-2.13.1/official/benchmark/models/synthetic_util.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Helper functions to generate data directly on devices."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import random
+import string
+from absl import logging
+import tensorflow as tf
+# The `SyntheticDataset` is a temporary solution for generating synthetic data
+# directly on devices. It is only useful for Keras with Distribution
+# Strategies. We will have better support in `tf.data` or Distribution Strategy
+# later.
+class SyntheticDataset(object):
+  """A dataset that generates synthetic data on each device."""
+  def __init__(self, dataset, split_by=1):
+    # dataset.take(1) doesn't have GPU kernel.
+    with tf.device('device:CPU:0'):
+      tensor = tf.data.experimental.get_single_element(dataset.take(1))
+    flat_tensor = tf.nest.flatten(tensor)
+    variable_data = []
+    initializers = []
+    for t in flat_tensor:
+      rebatched_t = tf.split(t, num_or_size_splits=split_by, axis=0)[0]
+      assert rebatched_t.shape.is_fully_defined(), rebatched_t.shape
+      v = tf.compat.v1.get_local_variable(
+          self._random_name(), initializer=rebatched_t)
+      variable_data.append(v)
+      initializers.append(v.initializer)
+    input_data = tf.nest.pack_sequence_as(tensor, variable_data)
+    self._iterator = SyntheticIterator(input_data, initializers)
+  def _random_name(self, size=10, chars=string.ascii_uppercase + string.digits):
+    return ''.join(random.choice(chars) for _ in range(size))
+  def __iter__(self):
+    return self._iterator
+  def make_one_shot_iterator(self):
+    return self._iterator
+  def make_initializable_iterator(self):
+    return self._iterator
+class SyntheticIterator(object):
+  """A dataset that generates synthetic data on each device."""
+  def __init__(self, input_data, initializers):
+    self._input_data = input_data
+    self._initializers = initializers
+  def get_next(self):
+    return self._input_data
+  def next(self):
+    return self.__next__()
+  def __next__(self):
+    try:
+      return self.get_next()
+    except tf.errors.OutOfRangeError:
+      raise StopIteration
+  def initialize(self):
+    if tf.executing_eagerly():
+      return tf.no_op()
+    else:
+      return self._initializers
+def _monkey_patch_dataset_method(strategy):
+  """Monkey-patch `strategy`'s `make_dataset_iterator` method."""
+  def make_dataset(self, dataset):
+    logging.info('Using pure synthetic data.')
+    with self.scope():
+      if self.extended._global_batch_size:  # pylint: disable=protected-access
+        return SyntheticDataset(dataset, self.num_replicas_in_sync)
+      else:
+        return SyntheticDataset(dataset)
+  def make_iterator(self, dataset):
+    dist_dataset = make_dataset(self, dataset)
+    return iter(dist_dataset)
+  strategy.orig_make_dataset_iterator = strategy.make_dataset_iterator
+  strategy.make_dataset_iterator = make_iterator
+  strategy.orig_distribute_dataset = strategy.experimental_distribute_dataset
+  strategy.experimental_distribute_dataset = make_dataset
+def _undo_monkey_patch_dataset_method(strategy):
+  if hasattr(strategy, 'orig_make_dataset_iterator'):
+    strategy.make_dataset_iterator = strategy.orig_make_dataset_iterator
+  if hasattr(strategy, 'orig_distribute_dataset'):
+    strategy.make_dataset_iterator = strategy.orig_distribute_dataset
+def set_up_synthetic_data():
+  _monkey_patch_dataset_method(tf.distribute.OneDeviceStrategy)
+  _monkey_patch_dataset_method(tf.distribute.MirroredStrategy)
+  _monkey_patch_dataset_method(
+      tf.distribute.experimental.MultiWorkerMirroredStrategy)
+def undo_set_up_synthetic_data():
+  _undo_monkey_patch_dataset_method(tf.distribute.OneDeviceStrategy)
+  _undo_monkey_patch_dataset_method(tf.distribute.MirroredStrategy)
+  _undo_monkey_patch_dataset_method(
+      tf.distribute.experimental.MultiWorkerMirroredStrategy)
--- a/models-2.13.1/official/benchmark/ncf_keras_benchmark.py
+++ b/models-2.13.1/official/benchmark/ncf_keras_benchmark.py
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Executes Keras benchmarks and accuracy tests."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import time
+from absl import flags
+from absl import logging
+from absl.testing import flagsaver
+import tensorflow as tf
+from official.benchmark import benchmark_wrappers
+from official.benchmark import owner_utils
+from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
+from official.recommendation import ncf_common
+from official.recommendation import ncf_keras_main
+from official.utils.flags import core
+FLAGS = flags.FLAGS
+NCF_DATA_DIR_NAME = 'movielens_data'
+NCF_TF_REGRESSION_DATA_DIR_NAME = 'gs://tf-regression/ncf/data'
+class NCFKerasBenchmarkBase(PerfZeroBenchmark):
+  """Base class for NCF model benchmark."""
+  def __init__(self, output_dir=None, default_flags=None, **kwargs):
+    super(NCFKerasBenchmarkBase, self).__init__(output_dir, default_flags,
+                                                **kwargs)
+    # Run all benchmarks with ml_perf flag.
+    self.default_flags['ml_perf'] = True
+  def _setup(self):
+    """Sets up and resets flags before each test."""
+    logging.set_verbosity(logging.INFO)
+    if NCFKerasBenchmarkBase.local_flags is None:
+      ncf_common.define_ncf_flags()
+      # Loads flags to get defaults to then override. List cannot be empty.
+      flags.FLAGS(['foo'])
+      core.set_defaults(**self.default_flags)
+      saved_flag_values = flagsaver.save_flag_values()
+      NCFKerasBenchmarkBase.local_flags = saved_flag_values
+    else:
+      flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags)
+  @benchmark_wrappers.enable_runtime_flags
+  def _run_and_report_benchmark(self, hr_at_10_min=0, hr_at_10_max=0):
+    start_time_sec = time.time()
+    stats = ncf_keras_main.run_ncf(FLAGS)
+    wall_time_sec = time.time() - start_time_sec
+    metrics = []
+    metrics.append({
+        'name': 'exp_per_second',
+        'value': stats['avg_exp_per_second']
+    })
+    if hr_at_10_min > 0:
+      metrics.append({
+          'name': 'hr_at_10',
+          'value': stats['eval_hit_rate'],
+          'min_value': hr_at_10_min,
+          'max_value': hr_at_10_max
+      })
+      metrics.append({'name': 'train_loss', 'value': stats['loss']})
+    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
+class NCFKerasAccuracy(NCFKerasBenchmarkBase):
+  """Benchmark NCF model using real data."""
+  def __init__(self,
+               output_dir=None,
+               root_data_dir=None,
+               default_flags=None,
+               **kwargs):
+    root_data_dir = root_data_dir if root_data_dir else ''
+    default_flags = {}
+    default_flags['dataset'] = 'ml-20m'
+    default_flags['num_gpus'] = 1
+    default_flags['train_epochs'] = 10
+    default_flags['clean'] = True
+    default_flags['batch_size'] = 99000
+    default_flags['learning_rate'] = 0.00382059
+    default_flags['beta1'] = 0.783529
+    default_flags['beta2'] = 0.909003
+    default_flags['epsilon'] = 1.45439e-07
+    default_flags['layers'] = [256, 256, 128, 64]
+    default_flags['num_factors'] = 64
+    default_flags['hr_threshold'] = 0.635
+    default_flags['ml_perf'] = True
+    default_flags['use_synthetic_data'] = False
+    default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
+    super(NCFKerasAccuracy, self).__init__(
+        output_dir=output_dir, default_flags=default_flags, **kwargs)
+  def _run_and_report_benchmark_mlperf_like(self):
+    """Run test and report results.
+    Note: MLPerf like tests are not tuned to hit a specific hr@10 value, but
+    we want it recorded.
+    """
+    self._run_and_report_benchmark(hr_at_10_min=0.61)
+  def _run_and_report_benchmark(self, hr_at_10_min=0.630, hr_at_10_max=0.645):
+    """Run test and report results.
+    Note: Target is 0.635, but some runs are below that level. Until we have
+    multi-run tests, we have to accept a lower target.
+    Args:
+      hr_at_10_min: Minimum acceptable hr@10 value.
+      hr_at_10_max: Maximum acceptable hr@10 value.
+    """
+    super(NCFKerasAccuracy, self)._run_and_report_benchmark(
+        hr_at_10_min=hr_at_10_min, hr_at_10_max=hr_at_10_max)
+  def _set_8_gpu_defaults(self):
+    FLAGS.num_gpus = 8
+    FLAGS.learning_rate = 0.0045
+    FLAGS.beta1 = 0.25
+    FLAGS.beta2 = 0.5
+    FLAGS.epsilon = 1e-8
+    FLAGS.train_epochs = 14
+    FLAGS.batch_size = 99000
+    FLAGS.eval_batch_size = 160000
+    FLAGS.train_dataset_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME,
+                                            'training_cycle_*/*')
+    FLAGS.eval_dataset_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME,
+                                           'eval_data/*')
+    FLAGS.input_meta_data_path = os.path.join(NCF_TF_REGRESSION_DATA_DIR_NAME,
+                                              'metadata')
+    FLAGS.data_dir = NCF_TF_REGRESSION_DATA_DIR_NAME
+  def benchmark_1_gpu_early_stop(self):
+    self._setup()
+    FLAGS.early_stopping = True
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_no_dist_strat_early_stop(self):
+    self._setup()
+    FLAGS.distribution_strategy = 'off'
+    FLAGS.early_stopping = True
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
+    self._setup()
+    FLAGS.distribution_strategy = 'off'
+    FLAGS.early_stopping = True
+    FLAGS.run_eagerly = True
+    self._run_and_report_benchmark()
+  def benchmark_xla_1_gpu_early_stop(self):
+    self._setup()
+    FLAGS.early_stopping = True
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_ctl_early_stop(self):
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.early_stopping = True
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_ctl_run_eagerly_early_stop(self):
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.early_stopping = True
+    FLAGS.run_eagerly = True
+    self._run_and_report_benchmark()
+  def benchmark_xla_1_gpu_ctl_early_stop(self):
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.early_stopping = True
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark()
+  def benchmark_2_gpus_early_stop(self):
+    self._setup()
+    FLAGS.early_stopping = True
+    FLAGS.num_gpus = 2
+    FLAGS.eval_batch_size = 160000
+    self._run_and_report_benchmark()
+  def benchmark_2_gpus_ctl_early_stop(self):
+    """NCF with custom training loop. Works only in TF 2.0."""
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.early_stopping = True
+    FLAGS.num_gpus = 2
+    FLAGS.eval_batch_size = 160000
+    self._run_and_report_benchmark()
+#############################################
+# Tests below with mlperf in the test name are of two types:
+#  1) 1 GPU tests are based on MLPerf 0.5 and the TensorFlow pulled submission.
+#  2) 8 GPU tests are based on MLPerf 0.5 and use NVIDIA's hyper parameters.
+#
+# The purpose of both is to get a number to compare to existing results. To do
+# this the number of epochs is held constant rather than a race to a given
+# accuracy. The accuracy validation is done by the "early_stop" tests.
+#############################################
+  def benchmark_1_gpu_mlperf_like(self):
+    """1 GPU using keras fit/compile."""
+    self._setup()
+    FLAGS.train_epochs = 7
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
+    """1 GPU using compile/fit without dist_strat."""
+    self._setup()
+    FLAGS.train_epochs = 7
+    FLAGS.distribution_strategy = 'off'
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like(self):
+    self._setup()
+    FLAGS.train_epochs = 7
+    FLAGS.distribution_strategy = 'off'
+    FLAGS.run_eagerly = True
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_xla_1_gpu_mlperf_like(self):
+    """1 GPU using compile/fit with XLA."""
+    self._setup()
+    FLAGS.train_epochs = 7
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_1_gpu_ctl_mlperf_like(self):
+    """1 GPU using CTL."""
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.train_epochs = 7
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_1_gpu_ctl_fp16_mlperf_like(self):
+    """1 GPU using CTL and FP16."""
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.train_epochs = 7
+    FLAGS.dtype = 'fp16'
+    FLAGS.loss_scale = 8192
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_1_gpu_fp16_mlperf_like(self):
+    """1 GPU using FP16."""
+    self._setup()
+    FLAGS.train_epochs = 7
+    FLAGS.dtype = 'fp16'
+    FLAGS.loss_scale = 8192
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
+    """1 GPU using CTL with eager and distribution strategy."""
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.run_eagerly = True
+    FLAGS.train_epochs = 7
+    self._run_and_report_benchmark()
+  def benchmark_xla_1_gpu_ctl_mlperf_like(self):
+    """1 GPU using CTL with XLA."""
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.enable_xla = True
+    FLAGS.train_epochs = 7
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_xla_1_gpu_fp16_mlperf_like(self):
+    """1 GPU using with XLA and FP16."""
+    self._setup()
+    FLAGS.enable_xla = True
+    FLAGS.train_epochs = 7
+    FLAGS.dtype = 'fp16'
+    FLAGS.loss_scale = 8192
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_xla_1_gpu_ctl_fp16_mlperf_like(self):
+    """1 GPU using CTL with XLA and FP16."""
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.enable_xla = True
+    FLAGS.train_epochs = 7
+    FLAGS.dtype = 'fp16'
+    FLAGS.loss_scale = 8192
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_8_gpu_mlperf_like(self):
+    """8 GPU using keras fit/compile."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.train_epochs = 17
+    FLAGS.batch_size = 1048576
+    FLAGS.eval_batch_size = 160000
+    FLAGS.learning_rate = 0.0045
+    FLAGS.beta1 = 0.25
+    FLAGS.beta2 = 0.5
+    FLAGS.epsilon = 1e-8
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_8_gpu_ctl_mlperf_like(self):
+    """8 GPU using CTL."""
+    self._setup()
+    FLAGS.keras_use_ctl = True
+    FLAGS.num_gpus = 8
+    FLAGS.train_epochs = 17
+    FLAGS.batch_size = 1048576
+    FLAGS.eval_batch_size = 160000
+    FLAGS.learning_rate = 0.0045
+    FLAGS.beta1 = 0.25
+    FLAGS.beta2 = 0.5
+    FLAGS.epsilon = 1e-8
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_8_gpu_tf_data_ctl_mlperf_like(self):
+    """8 GPU using CTL."""
+    self._setup()
+    self._set_8_gpu_defaults()
+    FLAGS.keras_use_ctl = True
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_8_gpu_tf_data_fp16_mlperf_like(self):
+    """8 GPU FP16."""
+    self._setup()
+    self._set_8_gpu_defaults()
+    FLAGS.dtype = 'fp16'
+    FLAGS.loss_scale = 8192
+    self._run_and_report_benchmark_mlperf_like()
+  def benchmark_8_gpu_tf_data_ctl_fp16_mlperf_like(self):
+    """8 GPU FP16 using CTL."""
+    self._setup()
+    self._set_8_gpu_defaults()
+    FLAGS.keras_use_ctl = True
+    FLAGS.dtype = 'fp16'
+    FLAGS.loss_scale = 8192
+    self._run_and_report_benchmark_mlperf_like()
+class NCFKerasBenchmarkReal(NCFKerasBenchmarkBase):
+  """NCF Keras throughput benchmarks."""
+  def __init__(self,
+               output_dir=None,
+               root_data_dir=None,
+               default_flags=None,
+               **kwargs):
+    root_data_dir = root_data_dir if root_data_dir else ''
+    default_flags = {}
+    default_flags['dataset'] = 'ml-20m'
+    default_flags['num_gpus'] = 1
+    default_flags['train_epochs'] = 14
+    default_flags['clean'] = True
+    default_flags['batch_size'] = 99000
+    default_flags['eval_batch_size'] = 160000
+    default_flags['learning_rate'] = 0.00382059
+    default_flags['beta1'] = 0.783529
+    default_flags['beta2'] = 0.909003
+    default_flags['epsilon'] = 1.45439e-07
+    default_flags['layers'] = [256, 256, 128, 64]
+    default_flags['num_factors'] = 64
+    default_flags['hr_threshold'] = 0.635
+    default_flags['ml_perf'] = True
+    default_flags['use_synthetic_data'] = False
+    default_flags['train_dataset_path'] = os.path.join(
+        NCF_TF_REGRESSION_DATA_DIR_NAME, 'training_cycle_*/*')
+    default_flags['eval_dataset_path'] = os.path.join(
+        NCF_TF_REGRESSION_DATA_DIR_NAME, 'eval_data/*')
+    default_flags['input_meta_data_path'] = os.path.join(
+        NCF_TF_REGRESSION_DATA_DIR_NAME, 'metadata')
+    default_flags['data_dir'] = NCF_TF_REGRESSION_DATA_DIR_NAME
+    super(NCFKerasBenchmarkReal, self).__init__(
+        output_dir=output_dir, default_flags=default_flags, **kwargs)
+  def benchmark_2x2_tpu(self):
+    """2x2 TPU using CTL with distribution strategy."""
+    self._setup()
+    FLAGS.distribution_strategy = 'tpu'
+    FLAGS.keras_use_ctl = True
+    FLAGS.num_gpus = 0
+    FLAGS.train_epochs = 1
+    self._run_and_report_benchmark()
+  @owner_utils.Owner('tf-graph-compiler')
+  def benchmark_2x2_tpu_mlir(self):
+    """2x2 TPU using CTL with distribution strategy using the MLIR bridge."""
+    self._setup()
+    FLAGS.distribution_strategy = 'tpu'
+    FLAGS.keras_use_ctl = True
+    FLAGS.num_gpus = 0
+    FLAGS.train_epochs = 1
+    tf.config.experimental.enable_mlir_bridge()
+    self._run_and_report_benchmark()
+class NCFKerasSynth(NCFKerasBenchmarkBase):
+  """Benchmark NCF model using synthetic data."""
+  def __init__(self, output_dir=None, default_flags=None, **kwargs):
+    default_flags = {}
+    default_flags['dataset'] = 'ml-20m'
+    default_flags['num_gpus'] = 1
+    default_flags['train_epochs'] = 8
+    default_flags['batch_size'] = 99000
+    default_flags['eval_batch_size'] = 160000
+    default_flags['learning_rate'] = 0.00382059
+    default_flags['beta1'] = 0.783529
+    default_flags['beta2'] = 0.909003
+    default_flags['epsilon'] = 1.45439e-07
+    default_flags['layers'] = [256, 256, 128, 64]
+    default_flags['num_factors'] = 64
+    default_flags['hr_threshold'] = 0.635
+    default_flags['use_synthetic_data'] = True
+    super(NCFKerasSynth, self).__init__(
+        output_dir=output_dir, default_flags=default_flags, **kwargs)
+  def benchmark_1_gpu(self):
+    self._setup()
+    self._run_and_report_benchmark()
+  def benchmark_2_gpus(self):
+    self._setup()
+    FLAGS.num_gpus = 2
+    self._run_and_report_benchmark()
+if __name__ == '__main__':
+  tf.test.main()
--- a/models-2.13.1/official/benchmark/nhnet_benchmark.py
+++ b/models-2.13.1/official/benchmark/nhnet_benchmark.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Executes benchmark testing for bert pretraining."""
+# pylint: disable=line-too-long
+from __future__ import print_function
+import time
+from typing import Optional
+from absl import flags
+import tensorflow as tf
+from official.benchmark import benchmark_wrappers
+from official.benchmark import owner_utils
+from official.benchmark import perfzero_benchmark
+from official.projects.nhnet import trainer
+from official.utils.flags import core as flags_core
+MIN_LOSS = 0.40
+MAX_LOSS = 0.55
+NHNET_DATA = 'gs://tf-perfzero-data/nhnet/v1/processed/train.tfrecord*'
+PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-12_H-768_A-12/bert_model.ckpt'
+FLAGS = flags.FLAGS
+class NHNetBenchmark(perfzero_benchmark.PerfZeroBenchmark):
+  """Base benchmark class for NHNet."""
+  def __init__(self, output_dir=None, default_flags=None, tpu=None, **kwargs):
+    self.default_flags = default_flags or {}
+    flag_methods = trainer.define_flags()
+    super(NHNetBenchmark, self).__init__(
+        output_dir=output_dir,
+        default_flags=default_flags,
+        flag_methods=flag_methods,
+        tpu=tpu,
+        **kwargs)
+  def _report_benchmark(self,
+                        stats,
+                        wall_time_sec,
+                        max_value=None,
+                        min_value=None):
+    """Report benchmark results by writing to local protobuf file.
+    Args:
+      stats: dict returned from keras models with known entries.
+      wall_time_sec: the during of the benchmark execution in seconds
+      max_value: highest passing level.
+      min_value: lowest passing level.
+    """
+    metrics = []
+    metrics.append({
+        'name': 'training_loss',
+        'value': stats['training_loss'],
+        'min_value': min_value,
+        'max_value': max_value
+    })
+    # These metrics are placeholders to avoid PerfZero failure.
+    metrics.append({
+        'name': 'exp_per_second',
+        'value': 0.0,
+    })
+    metrics.append({
+        'name': 'startup_time',
+        'value': 9999.,
+    })
+    flags_str = flags_core.get_nondefault_flags_as_str()
+    self.report_benchmark(
+        iters=-1,
+        wall_time=wall_time_sec,
+        metrics=metrics,
+        extras={'flags': flags_str})
+class NHNetAccuracyBenchmark(NHNetBenchmark):
+  """Benchmark accuracy tests for NHNet."""
+  def __init__(self,
+               output_dir: Optional[str] = None,
+               tpu: Optional[str] = None,
+               **kwargs):
+    default_flags = dict(
+        mode='train',
+        train_file_pattern=NHNET_DATA,
+        train_batch_size=1024,
+        model_type='nhnet',
+        len_title=15,
+        len_passage=200,
+        num_encoder_layers=12,
+        num_decoder_layers=12,
+        num_nhnet_articles=5,
+        steps_per_loop=1000,
+        params_override='init_from_bert2bert=false')
+    super(NHNetAccuracyBenchmark, self).__init__(
+        output_dir=output_dir, default_flags=default_flags, tpu=tpu, **kwargs)
+  @benchmark_wrappers.enable_runtime_flags
+  def _run_and_report_benchmark(self, max_value=MAX_LOSS, min_value=MIN_LOSS):
+    """Runs and reports the benchmark given the provided configuration."""
+    start_time_sec = time.time()
+    stats = trainer.run()
+    wall_time_sec = time.time() - start_time_sec
+    self._report_benchmark(
+        stats, wall_time_sec, max_value=max_value, min_value=min_value)
+  @owner_utils.Owner('tf-model-garden')
+  def benchmark_accuracy_4x4_tpu_f32_50k_steps(self):
+    """Test bert pretraining with 4x4 TPU for 50k steps."""
+    # This is used for accuracy test.
+    self._setup()
+    FLAGS.train_steps = 50000
+    FLAGS.checkpoint_interval = FLAGS.train_steps
+    FLAGS.distribution_strategy = 'tpu'
+    FLAGS.init_checkpoint = PRETRAINED_CHECKPOINT_PATH
+    FLAGS.model_dir = self._get_model_dir(
+        'benchmark_accuracy_4x4_tpu_bf32_50k_steps')
+    self._run_and_report_benchmark()
+  @owner_utils.Owner('tf-model-garden')
+  def benchmark_accuracy_4x4_tpu_f32_1k_steps(self):
+    """Test bert pretraining with 4x4 TPU for 1k steps."""
+    self._setup()
+    FLAGS.train_steps = 1000
+    FLAGS.checkpoint_interval = FLAGS.train_steps
+    FLAGS.distribution_strategy = 'tpu'
+    FLAGS.model_dir = self._get_model_dir(
+        'benchmark_accuracy_4x4_tpu_bf32_1k_steps')
+    self._run_and_report_benchmark()
+if __name__ == '__main__':
+  tf.test.main()
--- a/models-2.13.1/official/benchmark/owner_utils.py
+++ b/models-2.13.1/official/benchmark/owner_utils.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utils to set Owner annotations on benchmarks.
+@owner_utils.Owner('owner_team/user') can be set either at the benchmark class
+level / benchmark method level or both.
+Runner frameworks can use owner_utils.GetOwner(benchmark_method) to get the
+actual owner. Python inheritance for the owner attribute is respected.  (E.g
+method level owner takes precedence over class level).
+See owner_utils_test for associated tests and more examples.
+The decorator can be applied both at the method level and at the class level.
+Simple example:
+===============
+class MLBenchmark:
+  @Owner('example_id')
+  def benchmark_method_1_gpu(self):
+    return True
+"""
+def Owner(owner_name):
+  """Sets the owner attribute on a decorated method or class."""
+  def _Wrapper(func_or_class):
+    """Sets the benchmark owner attribute."""
+    func_or_class.__benchmark__owner__ = owner_name
+    return func_or_class
+  return _Wrapper
+def GetOwner(benchmark_method_or_class):
+  """Gets the inherited owner attribute for this benchmark.
+  Checks for existence of __benchmark__owner__. If it's not present, looks for
+  it in the parent class's attribute list.
+  Args:
+    benchmark_method_or_class: A benchmark method or class.
+  Returns:
+    string - the associated owner if present / None.
+  """
+  if hasattr(benchmark_method_or_class, '__benchmark__owner__'):
+    return benchmark_method_or_class.__benchmark__owner__
+  elif hasattr(benchmark_method_or_class, '__self__'):
+    if hasattr(benchmark_method_or_class.__self__, '__benchmark__owner__'):
+      return benchmark_method_or_class.__self__.__benchmark__owner__
+  return None
--- a/models-2.13.1/official/benchmark/owner_utils_test.py
+++ b/models-2.13.1/official/benchmark/owner_utils_test.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for official.benchmark.owner_utils."""
+from absl.testing import absltest
+from official.benchmark import owner_utils
+@owner_utils.Owner('static_owner')
+def static_function(foo=5):
+  return foo
+def static_function_without_owner(foo=5):
+  return foo
+class BenchmarkClassWithoutOwner:
+  def method_without_owner(self):
+    return 100
+  @owner_utils.Owner('method_owner')
+  def method_with_owner(self):
+    return 200
+@owner_utils.Owner('class_owner')
+class SomeBenchmarkClass:
+  def method_inherited_owner(self):
+    return 123
+  @owner_utils.Owner('method_owner')
+  def method_override_owner(self):
+    return 345
+@owner_utils.Owner('new_class_owner')
+class InheritedClass(SomeBenchmarkClass):
+  def method_inherited_owner(self):
+    return 456
+  @owner_utils.Owner('new_method_owner')
+  def method_override_owner(self):
+    return 567
+class OwnerUtilsTest(absltest.TestCase):
+  """Tests to assert for owner decorator functionality."""
+  def test_owner_tag_missing(self):
+    self.assertEqual(None, owner_utils.GetOwner(static_function_without_owner))
+    benchmark_class = BenchmarkClassWithoutOwner()
+    self.assertEqual(None,
+                     owner_utils.GetOwner(benchmark_class.method_without_owner))
+    self.assertEqual(100, benchmark_class.method_without_owner())
+    self.assertEqual('method_owner',
+                     owner_utils.GetOwner(benchmark_class.method_with_owner))
+    self.assertEqual(200, benchmark_class.method_with_owner())
+  def test_owner_attributes_static(self):
+    self.assertEqual('static_owner', owner_utils.GetOwner(static_function))
+    self.assertEqual(5, static_function(5))
+  def test_owner_attributes_per_class(self):
+    level1 = SomeBenchmarkClass()
+    self.assertEqual('class_owner',
+                     owner_utils.GetOwner(level1.method_inherited_owner))
+    self.assertEqual(123, level1.method_inherited_owner())
+    self.assertEqual('method_owner',
+                     owner_utils.GetOwner(level1.method_override_owner))
+    self.assertEqual(345, level1.method_override_owner())
+  def test_owner_attributes_inherited_class(self):
+    level2 = InheritedClass()
+    self.assertEqual('new_class_owner',
+                     owner_utils.GetOwner(level2.method_inherited_owner))
+    self.assertEqual(456, level2.method_inherited_owner())
+    self.assertEqual('new_method_owner',
+                     owner_utils.GetOwner(level2.method_override_owner))
+    self.assertEqual(567, level2.method_override_owner())
+if __name__ == '__main__':
+  absltest.main()
--- a/models-2.13.1/official/benchmark/perfzero_benchmark.py
+++ b/models-2.13.1/official/benchmark/perfzero_benchmark.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utils for creating PerfZero benchmarks."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+from absl import flags
+from absl import logging
+from absl.testing import flagsaver
+import tensorflow as tf
+FLAGS = flags.FLAGS
+class PerfZeroBenchmark(tf.test.Benchmark):
+  """Common methods used in PerfZero Benchmarks.
+     Handles the resetting of flags between tests, loading of default_flags,
+     overriding of defaults.  PerfZero (OSS) runs each test in a separate
+     process reducing some need to reset the flags.
+  """
+  local_flags = None
+  def __init__(self,
+               output_dir=None,
+               default_flags=None,
+               root_data_dir=None,
+               flag_methods=None,
+               tpu=None):
+    """Initialize class.
+    Args:
+      output_dir: Base directory to store all output for the test.
+      default_flags: Set of flags to pass to model.
+      root_data_dir: Optional param used by child classes to look for the
+        dataset.
+      flag_methods: Set of flag methods to run during setup.
+      tpu: (optional) TPU name to use in a TPU benchmark.
+    """
+    if os.getenv('BENCHMARK_OUTPUT_DIR'):
+      self.output_dir = os.getenv('BENCHMARK_OUTPUT_DIR')
+    elif output_dir:
+      self.output_dir = output_dir
+    else:
+      self.output_dir = '/tmp'
+    self.default_flags = default_flags or {}
+    self.flag_methods = flag_methods or {}
+    if os.getenv('BENCHMARK_TPU'):
+      resolved_tpu = os.getenv('BENCHMARK_TPU')
+    elif tpu:
+      resolved_tpu = tpu
+    else:
+      resolved_tpu = None
+    if resolved_tpu:
+      # TPU models are expected to accept a --tpu=name flag. PerfZero creates
+      # the TPU at runtime and passes the TPU's name to this flag.
+      self.default_flags['tpu'] = resolved_tpu
+    logging.info('root_data_dir: %s', root_data_dir)
+  @property
+  def tpu(self):
+    return self.default_flags.get('tpu', None)
+  def _get_model_dir(self, folder_name):
+    """Returns directory to store info, e.g. saved model and event log."""
+    return os.path.join(self.output_dir, folder_name)
+  def _setup(self):
+    """Sets up and resets flags before each test."""
+    logging.set_verbosity(logging.INFO)
+    if PerfZeroBenchmark.local_flags is None:
+      for flag_method in self.flag_methods:
+        flag_method()
+      # Loads flags to get defaults to then override. List cannot be empty.
+      flags.FLAGS(['foo'])
+      # Overrides flag values with defaults for the class of tests.
+      for k, v in self.default_flags.items():
+        setattr(FLAGS, k, v)
+      saved_flag_values = flagsaver.save_flag_values()
+      PerfZeroBenchmark.local_flags = saved_flag_values
+    else:
+      flagsaver.restore_flag_values(PerfZeroBenchmark.local_flags)
--- a/models-2.13.1/official/benchmark/resnet50_keras_core.py
+++ b/models-2.13.1/official/benchmark/resnet50_keras_core.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Resnet50 Keras core benchmark."""
+import statistics
+import tempfile
+import time
+import tensorflow as tf
+import tensorflow_datasets as tfds
+from official.benchmark import perfzero_benchmark
+def _decode_and_center_crop(image_bytes):
+  """Crops to center of image with padding then scales image_size."""
+  shape = tf.image.extract_jpeg_shape(image_bytes)
+  image_height, image_width, image_size = shape[0], shape[1], 224
+  padded_center_crop_size = tf.cast(
+      ((image_size / (image_size + 32)) *
+       tf.cast(tf.minimum(image_height, image_width), tf.float32)),
+      tf.int32,
+  )
+  offset_height = ((image_height - padded_center_crop_size) + 1) // 2
+  offset_width = ((image_width - padded_center_crop_size) + 1) // 2
+  crop_window = tf.stack([
+      offset_height, offset_width, padded_center_crop_size,
+      padded_center_crop_size
+  ])
+  image = tf.image.decode_and_crop_jpeg(image_bytes, crop_window, channels=3)
+  return tf.image.resize(image, [image_size, image_size], method="bicubic")
+def _preprocessing(data):
+  return (
+      tf.cast(_decode_and_center_crop(data["image"]), tf.float32),
+      data["label"],
+  )
+def _run_benchmark():
+  """Runs a resnet50 compile/fit() call and returns the wall time."""
+  tmp_dir = tempfile.mkdtemp()
+  start_time = time.time()
+  batch_size = 64
+  dataset = tfds.load(
+      "imagenette",
+      decoders={"image": tfds.decode.SkipDecoding()},
+      split="train",
+  )
+  dataset = (
+      dataset.cache().repeat(
+          2
+      )  # Artificially increase time per epoch to make it easier to measure
+      .map(_preprocessing,
+           num_parallel_calls=tf.data.experimental.AUTOTUNE).batch(
+               batch_size).prefetch(1))
+  with tf.distribute.MirroredStrategy().scope():
+    model = tf.keras.applications.ResNet50(weights=None)
+    model.compile(
+        optimizer=tf.compat.v1.mixed_precision
+        .enable_mixed_precision_graph_rewrite(
+            tf.keras.optimizers.Adam(), loss_scale="dynamic"),
+        loss="sparse_categorical_crossentropy",
+    )
+  tb_cbk = tf.keras.callbacks.TensorBoard(
+      f"{tmp_dir}/{tf.__version__}", profile_batch=300)
+  model.fit(dataset, verbose=2, epochs=3, callbacks=[tb_cbk])
+  end_time = time.time()
+  return end_time - start_time
+class Resnet50KerasCoreBenchmark(perfzero_benchmark.PerfZeroBenchmark):
+  """Resnet50 Keras core benchmarks."""
+  def benchmark_1_gpu(self):
+    wall_time = _run_benchmark()
+    self.report_benchmark(iters=-1, wall_time=wall_time)
+  def benchmark_1_gpu_avg_3(self):
+    num_trials = 3
+    wall_times = []
+    for _ in range(num_trials):
+      wall_times.append(_run_benchmark())
+    avg_wall_time = statistics.mean(wall_times)
+    self.report_benchmark(iters=-1, wall_time=avg_wall_time)
+  def benchmark_1_gpu_max_3(self):
+    num_trials = 3
+    wall_times = []
+    for _ in range(num_trials):
+      wall_times.append(_run_benchmark())
+    max_wall_time = max(wall_times)
+    self.report_benchmark(iters=-1, wall_time=max_wall_time)
+  def benchmark_1_gpu_min_3(self):
+    num_trials = 3
+    wall_times = []
+    for _ in range(num_trials):
+      wall_times.append(_run_benchmark())
+    min_wall_time = min(wall_times)
+    self.report_benchmark(iters=-1, wall_time=min_wall_time)
+  def benchmark_1_gpu_med_3(self):
+    num_trials = 3
+    wall_times = []
+    for _ in range(num_trials):
+      wall_times.append(_run_benchmark())
+    med_wall_time = statistics.median(wall_times)
+    self.report_benchmark(iters=-1, wall_time=med_wall_time)
+if __name__ == "__main__":
+  tf.test.main()
--- a/models-2.13.1/official/benchmark/resnet_ctl_imagenet_benchmark.py
+++ b/models-2.13.1/official/benchmark/resnet_ctl_imagenet_benchmark.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Executes CTL benchmarks and accuracy tests."""
+# pylint: disable=line-too-long,g-bad-import-order
+from __future__ import print_function
+import os  # pylint: disable=unused-import
+import time
+from absl import flags
+import tensorflow as tf
+from official.benchmark import owner_utils
+from official.legacy.image_classification.resnet import common
+from official.legacy.image_classification.resnet import resnet_ctl_imagenet_main
+from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
+from official.benchmark import benchmark_wrappers
+from official.utils.flags import core as flags_core
+IMAGENET_DEFAULT_DATA_PATH = 'gs://mlcompass-data/imagenet/imagenet-2012-tfrecord'
+# TODO(emizan) Remove comment once you make sure that dataset caching has similar or better
+# performance as the uncached local SSD dataset below.
+# IMAGENET_EXP_DATA_PATH = 'gs://mlcompass-data/imagenet/imagenet-2012-tfrecord'
+MIN_TOP_1_ACCURACY = 0.76
+MAX_TOP_1_ACCURACY = 0.77
+FLAGS = flags.FLAGS
+class CtlBenchmark(PerfZeroBenchmark):
+  """Base benchmark class with methods to simplify testing."""
+  def __init__(self,
+               output_dir=None,
+               default_flags=None,
+               flag_methods=None,
+               **kwargs):
+    self.default_flags = default_flags or {}
+    self.flag_methods = flag_methods or {}
+    super(CtlBenchmark, self).__init__(
+        output_dir=output_dir,
+        default_flags=self.default_flags,
+        flag_methods=self.flag_methods,
+        **kwargs)
+  def _report_benchmark(self,
+                        stats,
+                        wall_time_sec,
+                        top_1_max=None,
+                        top_1_min=None,
+                        total_batch_size=None,
+                        log_steps=None,
+                        warmup=1,
+                        start_time_sec=None):
+    """Report benchmark results by writing to local protobuf file.
+    Args:
+      stats: dict returned from keras models with known entries.
+      wall_time_sec: the during of the benchmark execution in seconds
+      top_1_max: highest passing level for top_1 accuracy.
+      top_1_min: lowest passing level for top_1 accuracy.
+      total_batch_size: Global batch-size.
+      log_steps: How often the log was created for stats['step_timestamp_log'].
+      warmup: number of entries in stats['step_timestamp_log'] to ignore.
+      start_time_sec: the start time of the program in seconds since epoch.
+    """
+    metrics = []
+    if 'eval_acc' in stats:
+      metrics.append({
+          'name': 'accuracy_top_1',
+          'value': stats['eval_acc'],
+          'min_value': top_1_min,
+          'max_value': top_1_max
+      })
+      metrics.append({'name': 'eval_loss', 'value': stats['eval_loss']})
+      metrics.append({
+          'name': 'top_1_train_accuracy',
+          'value': stats['train_acc']
+      })
+      metrics.append({'name': 'train_loss', 'value': stats['train_loss']})
+    if (warmup and 'step_timestamp_log' in stats and
+        len(stats['step_timestamp_log']) > warmup + 1):
+      # first entry in the time_log is start of step 0. The rest of the
+      # entries are the end of each step recorded
+      time_log = stats['step_timestamp_log']
+      steps_elapsed = time_log[-1].batch_index - time_log[warmup].batch_index
+      time_elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
+      examples_per_sec = total_batch_size * (steps_elapsed / time_elapsed)
+      metrics.append({'name': 'exp_per_second', 'value': examples_per_sec})
+    if 'avg_exp_per_second' in stats:
+      metrics.append({
+          'name': 'avg_exp_per_second',
+          'value': stats['avg_exp_per_second']
+      })
+    if start_time_sec and 'step_timestamp_log' in stats:
+      time_log = stats['step_timestamp_log']
+      # time_log[0] is recorded at the beginning of the first step.
+      startup_time = time_log[0].timestamp - start_time_sec
+      metrics.append({'name': 'startup_time', 'value': startup_time})
+    flags_str = flags_core.get_nondefault_flags_as_str()
+    self.report_benchmark(
+        iters=-1,
+        wall_time=wall_time_sec,
+        metrics=metrics,
+        extras={'flags': flags_str})
+class Resnet50CtlAccuracy(CtlBenchmark):
+  """Benchmark accuracy tests for ResNet50 in CTL."""
+  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
+    """A benchmark class.
+    Args:
+      output_dir: directory where to output e.g. log files
+      root_data_dir: directory under which to look for dataset
+      **kwargs: arbitrary named arguments. This is needed to make the
+        constructor forward compatible in case PerfZero provides more named
+        arguments before updating the constructor.
+    """
+    flag_methods = [common.define_keras_flags]
+    self.data_dir = os.path.join(root_data_dir, 'imagenet')
+    super(Resnet50CtlAccuracy, self).__init__(
+        output_dir=output_dir, flag_methods=flag_methods)
+  def benchmark_8_gpu(self):
+    """Test Keras model with eager, dist_strat and 8 GPUs."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.data_dir = self.data_dir
+    FLAGS.batch_size = 128 * 8
+    FLAGS.train_epochs = 90
+    FLAGS.epochs_between_evals = 10
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
+    FLAGS.dtype = 'fp32'
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu_fp16(self):
+    """Test Keras model with eager, 8 GPUs with tf.keras mixed precision."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.data_dir = self.data_dir
+    FLAGS.batch_size = 256 * 8
+    FLAGS.train_epochs = 90
+    FLAGS.epochs_between_evals = 10
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
+    FLAGS.dtype = 'fp16'
+    self._run_and_report_benchmark()
+  @benchmark_wrappers.enable_runtime_flags
+  def _run_and_report_benchmark(self):
+    start_time_sec = time.time()
+    stats = resnet_ctl_imagenet_main.run(flags.FLAGS)
+    wall_time_sec = time.time() - start_time_sec
+    super(Resnet50CtlAccuracy, self)._report_benchmark(
+        stats,
+        wall_time_sec,
+        top_1_min=MIN_TOP_1_ACCURACY,
+        top_1_max=MAX_TOP_1_ACCURACY,
+        total_batch_size=FLAGS.batch_size,
+        log_steps=100,
+        start_time_sec=start_time_sec)
+class Resnet50CtlBenchmarkBase(CtlBenchmark):
+  """Resnet50 benchmarks."""
+  def __init__(self, output_dir=None, default_flags=None, **kwargs):
+    flag_methods = [common.define_keras_flags]
+    super(Resnet50CtlBenchmarkBase, self).__init__(
+        output_dir=output_dir,
+        flag_methods=flag_methods,
+        default_flags=default_flags,
+        **kwargs)
+  @benchmark_wrappers.enable_runtime_flags
+  def _run_and_report_benchmark(self):
+    start_time_sec = time.time()
+    stats = resnet_ctl_imagenet_main.run(FLAGS)
+    wall_time_sec = time.time() - start_time_sec
+    # Warmup means the number of logged step time entries that are excluded in
+    # performance report. Default to exclude 1 FLAGS.log_steps time.
+    super(Resnet50CtlBenchmarkBase, self)._report_benchmark(
+        stats,
+        wall_time_sec,
+        total_batch_size=FLAGS.batch_size,
+        log_steps=FLAGS.log_steps,
+        warmup=1,
+        start_time_sec=start_time_sec)
+  def benchmark_1_gpu_no_dist_strat(self):
+    """Test Keras model with 1 GPU, no distribution strategy."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.distribution_strategy = 'off'
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
+    FLAGS.batch_size = 128
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu(self):
+    """Test Keras model with 1 GPU."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.distribution_strategy = 'one_device'
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
+    FLAGS.batch_size = 128
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_fp16(self):
+    """Test Keras model with 1 GPU with tf.keras mixed precision."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.distribution_strategy = 'one_device'
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16')
+    FLAGS.batch_size = 256
+    FLAGS.dtype = 'fp16'
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_eager(self):
+    """Test Keras model with 1 GPU in pure eager mode."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.distribution_strategy = 'one_device'
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_eager')
+    FLAGS.batch_size = 120
+    FLAGS.use_tf_function = False
+    FLAGS.use_tf_while_loop = False
+    FLAGS.single_l2_loss_op = True
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_fp16_eager(self):
+    """Test Keras model with 1 GPU with fp16 and pure eager mode."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.distribution_strategy = 'one_device'
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_eager')
+    FLAGS.batch_size = 232
+    FLAGS.dtype = 'fp16'
+    FLAGS.use_tf_function = False
+    FLAGS.use_tf_while_loop = False
+    FLAGS.single_l2_loss_op = True
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu(self):
+    """Test Keras model with 8 GPUs."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.distribution_strategy = 'mirrored'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
+    FLAGS.batch_size = 128 * 8  # 8 GPUs
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu_fp32_no_tf32(self):
+    """Test Keras model with 8 GPUs.Runs in FP32 by disabling TF32 execution."""
+    self._setup()
+    tf.config.experimental.enable_tensor_float_32_execution(False)
+    FLAGS.num_gpus = 8
+    FLAGS.distribution_strategy = 'mirrored'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp32_no_tf32')
+    FLAGS.batch_size = 128 * 8  # 8 GPUs
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu_fp16(self):
+    """Test Keras model with 8 GPUs with tf.keras mixed precision."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.distribution_strategy = 'mirrored'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
+    FLAGS.batch_size = 256 * 8  # 8 GPUs
+    FLAGS.dtype = 'fp16'
+    self._run_and_report_benchmark()
+  def benchmark_xla_8_gpu_fp16(self):
+    """Test Keras model with 8 GPUs with tf.keras mixed precision."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.distribution_strategy = 'mirrored'
+    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
+    FLAGS.batch_size = 256 * 8  # 8 GPUs
+    FLAGS.dtype = 'fp16'
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu_eager(self):
+    """Test Keras model with 8 GPUs, eager, fp32."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.use_tf_function = False
+    FLAGS.use_tf_while_loop = False
+    FLAGS.distribution_strategy = 'mirrored'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_eager')
+    FLAGS.batch_size = 128
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu_eager_fp16(self):
+    """Test Keras model with 8 GPUs, eager, fp16."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.dtype = 'fp16'
+    FLAGS.use_tf_function = False
+    FLAGS.use_tf_while_loop = False
+    FLAGS.distribution_strategy = 'mirrored'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_eager_fp16')
+    FLAGS.batch_size = 128
+    self._run_and_report_benchmark()
+  def _set_df_common(self):
+    FLAGS.steps_per_loop = 500
+    FLAGS.train_epochs = 2
+    FLAGS.train_steps = None
+    FLAGS.skip_eval = True
+    FLAGS.enable_eager = True
+    FLAGS.enable_tensorboard = False
+    FLAGS.distribution_strategy = 'tpu'
+    FLAGS.report_accuracy_metrics = False
+    FLAGS.log_steps = 50
+    FLAGS.single_l2_loss_op = True
+    FLAGS.use_tf_function = True
+    FLAGS.enable_checkpoint_and_export = False
+    FLAGS.data_dir = IMAGENET_DEFAULT_DATA_PATH
+  def benchmark_2x2_tpu_bf16(self):
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 1024
+    FLAGS.dtype = 'bf16'
+    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_bf16')
+    self._run_and_report_benchmark()
+  @owner_utils.Owner('tf-graph-compiler')
+  def benchmark_2x2_tpu_bf16_mlir(self):
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 1024
+    FLAGS.dtype = 'bf16'
+    tf.config.experimental.enable_mlir_bridge()
+    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_bf16_mlir')
+    self._run_and_report_benchmark()
+  def benchmark_4x4_tpu_bf16(self):
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 8192
+    FLAGS.train_epochs = 4
+    FLAGS.dtype = 'bf16'
+    FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu_bf16')
+    FLAGS.data_dir = IMAGENET_DEFAULT_DATA_PATH
+    FLAGS.training_dataset_cache = True
+    self._run_and_report_benchmark()
+  @owner_utils.Owner('tf-graph-compiler')
+  def benchmark_4x4_tpu_bf16_mlir(self):
+    """Run resnet model on 4x4 with the MLIR Bridge enabled."""
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 4096
+    FLAGS.dtype = 'bf16'
+    FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu_bf16_mlir')
+    tf.config.experimental.enable_mlir_bridge()
+    self._run_and_report_benchmark()
+  def benchmark_8x8_tpu_bf16(self):
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 8192
+    FLAGS.dtype = 'bf16'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu_bf16')
+    self._run_and_report_benchmark()
+  @owner_utils.Owner('tf-graph-compiler')
+  def benchmark_8x8_tpu_bf16_mlir(self):
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 8192
+    FLAGS.dtype = 'bf16'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu_bf16_mlir')
+    tf.config.experimental.enable_mlir_bridge()
+    self._run_and_report_benchmark()
+  def benchmark_8x8_tpu(self):
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 8192
+    FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu')
+    self._run_and_report_benchmark()
+  @owner_utils.Owner('tf-graph-compiler')
+  def benchmark_8x8_tpu_mlir(self):
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 8192
+    FLAGS.model_dir = self._get_model_dir('benchmark_8x8_tpu_mlir')
+    tf.config.experimental.enable_mlir_bridge()
+    self._run_and_report_benchmark()
+  def benchmark_8x16_tpu_bf16(self):
+    self._setup()
+    self._set_df_common()
+    FLAGS.batch_size = 8192
+    FLAGS.dtype = 'bf16'
+    FLAGS.model_dir = self._get_model_dir('benchmark_8x16_tpu_bf16')
+    self._run_and_report_benchmark()
+  def fill_report_object(self, stats):
+    super(Resnet50CtlBenchmarkBase, self).fill_report_object(
+        stats, total_batch_size=FLAGS.batch_size, log_steps=FLAGS.log_steps)
+class Resnet50CtlBenchmarkSynth(Resnet50CtlBenchmarkBase):
+  """Resnet50 synthetic benchmark tests."""
+  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
+    def_flags = {}
+    def_flags['skip_eval'] = True
+    def_flags['use_synthetic_data'] = True
+    def_flags['train_steps'] = 110
+    def_flags['steps_per_loop'] = 10
+    def_flags['log_steps'] = 10
+    super(Resnet50CtlBenchmarkSynth, self).__init__(
+        output_dir=output_dir, default_flags=def_flags, **kwargs)
+class Resnet50CtlBenchmarkReal(Resnet50CtlBenchmarkBase):
+  """Resnet50 real data benchmark tests."""
+  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
+    def_flags = {}
+    def_flags['skip_eval'] = True
+    def_flags[
+        'data_dir'] = os.path.join(root_data_dir, 'imagenet')
+    def_flags['train_steps'] = 110
+    def_flags['steps_per_loop'] = 10
+    def_flags['log_steps'] = 10
+    super(Resnet50CtlBenchmarkReal, self).__init__(
+        output_dir=output_dir, default_flags=def_flags, **kwargs)
+if __name__ == '__main__':
+  tf.test.main()
--- a/models-2.13.1/official/benchmark/retinanet_benchmark.py
+++ b/models-2.13.1/official/benchmark/retinanet_benchmark.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Executes RetinaNet benchmarks and accuracy tests."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+# pylint: disable=g-bad-import-order
+import json
+import time
+from absl import flags
+from absl.testing import flagsaver
+import tensorflow as tf
+# pylint: enable=g-bad-import-order
+from official.benchmark import benchmark_wrappers
+from official.benchmark import perfzero_benchmark
+from official.legacy.detection import main as detection
+from official.legacy.detection.configs import base_config
+from official.utils.flags import core as flags_core
+from official.utils.misc import keras_utils
+FLAGS = flags.FLAGS
+# pylint: disable=line-too-long
+COCO_TRAIN_DATA = 'gs://tf-perfzero-data/coco/train*'
+COCO_EVAL_DATA = 'gs://tf-perfzero-data/coco/val*'
+COCO_EVAL_JSON = 'gs://tf-perfzero-data/coco/instances_val2017.json'
+RESNET_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/retinanet/resnet50-checkpoint-2018-02-07'
+# pylint: enable=line-too-long
+class BenchmarkBase(perfzero_benchmark.PerfZeroBenchmark):
+  """Base class to hold methods common to test classes."""
+  def __init__(self, **kwargs):
+    super(BenchmarkBase, self).__init__(**kwargs)
+    self.timer_callback = None
+  def _report_benchmark(self, stats, start_time_sec, wall_time_sec, min_ap,
+                        max_ap, warmup):
+    """Report benchmark results by writing to local protobuf file.
+    Args:
+      stats: dict returned from Detection models with known entries.
+      start_time_sec: the start of the benchmark execution in seconds
+      wall_time_sec: the duration of the benchmark execution in seconds
+      min_ap: Minimum detection AP constraint to verify correctness of the
+        model.
+      max_ap: Maximum detection AP accuracy constraint to verify correctness of
+        the model.
+      warmup: Number of time log entries to ignore when computing examples/sec.
+    """
+    metrics = [{
+        'name': 'total_loss',
+        'value': stats['total_loss'],
+    }]
+    if self.timer_callback:
+      metrics.append({
+          'name': 'exp_per_second',
+          'value': self.timer_callback.get_examples_per_sec(warmup)
+      })
+      metrics.append({
+          'name': 'startup_time',
+          'value': self.timer_callback.get_startup_time(start_time_sec)
+      })
+    else:
+      metrics.append({
+          'name': 'exp_per_second',
+          'value': 0.0,
+      })
+    if 'eval_metrics' in stats:
+      metrics.append({
+          'name': 'AP',
+          'value': stats['AP'],
+          'min_value': min_ap,
+          'max_value': max_ap,
+      })
+    flags_str = flags_core.get_nondefault_flags_as_str()
+    self.report_benchmark(
+        iters=stats['total_steps'],
+        wall_time=wall_time_sec,
+        metrics=metrics,
+        extras={'flags': flags_str})
+class DetectionBenchmarkBase(BenchmarkBase):
+  """Base class to hold methods common to test classes in the module."""
+  def __init__(self, **kwargs):
+    self.train_data_path = COCO_TRAIN_DATA
+    self.eval_data_path = COCO_EVAL_DATA
+    self.eval_json_path = COCO_EVAL_JSON
+    self.resnet_checkpoint_path = RESNET_CHECKPOINT_PATH
+    super(DetectionBenchmarkBase, self).__init__(**kwargs)
+  def _run_detection_main(self):
+    """Starts detection job."""
+    if self.timer_callback:
+      FLAGS.log_steps = 0  # prevent detection.run from adding the same callback
+      return detection.run(callbacks=[self.timer_callback])
+    else:
+      return detection.run()
+class DetectionAccuracy(DetectionBenchmarkBase):
+  """Accuracy test for RetinaNet model.
+  Tests RetinaNet detection task model accuracy. The naming
+  convention of below test cases follow
+  `benchmark_(number of gpus)_gpu_(dataset type)` format.
+  """
+  def __init__(self, model, per_gpu_batch_size=8, **kwargs):
+    self.model = model
+    self.per_gpu_batch_size = per_gpu_batch_size
+    super(DetectionAccuracy, self).__init__(**kwargs)
+  @benchmark_wrappers.enable_runtime_flags
+  def _run_and_report_benchmark(self,
+                                params,
+                                min_ap=0.325,
+                                max_ap=0.35,
+                                do_eval=True,
+                                warmup=1):
+    """Starts Detection accuracy benchmark test."""
+    FLAGS.params_override = json.dumps(params)
+    # Need timer callback to measure performance
+    self.timer_callback = keras_utils.TimeHistory(
+        batch_size=params['train']['batch_size'],
+        log_steps=FLAGS.log_steps,
+    )
+    start_time_sec = time.time()
+    FLAGS.mode = 'train'
+    summary, _ = self._run_detection_main()
+    wall_time_sec = time.time() - start_time_sec
+    if do_eval:
+      FLAGS.mode = 'eval'
+      eval_metrics = self._run_detection_main()
+      summary.update(eval_metrics)
+    summary['total_steps'] = params['train']['total_steps']
+    self._report_benchmark(summary, start_time_sec, wall_time_sec, min_ap,
+                           max_ap, warmup)
+  def _setup(self):
+    super(DetectionAccuracy, self)._setup()
+    FLAGS.model = self.model
+  def _params(self):
+    return {
+        'architecture': {
+            'use_bfloat16': True,
+        },
+        'train': {
+            'batch_size': 64,
+            'iterations_per_loop': 100,
+            'total_steps': 22500,
+            'train_file_pattern': self.train_data_path,
+            'checkpoint': {
+                'path': self.resnet_checkpoint_path,
+                'prefix': 'resnet50/'
+            },
+            # Speed up ResNet training when loading from the checkpoint.
+            'frozen_variable_prefix': base_config.RESNET_FROZEN_VAR_PREFIX,
+        },
+        'eval': {
+            'batch_size': 8,
+            'eval_samples': 5000,
+            'val_json_file': self.eval_json_path,
+            'eval_file_pattern': self.eval_data_path,
+        },
+    }
+  @flagsaver.flagsaver
+  def benchmark_8_gpu_coco(self):
+    """Run RetinaNet model accuracy test with 8 GPUs."""
+    self._setup()
+    params = self._params()
+    FLAGS.num_gpus = 8
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_coco')
+    FLAGS.strategy_type = 'mirrored'
+    self._run_and_report_benchmark(params)
+class DetectionBenchmarkReal(DetectionAccuracy):
+  """Short benchmark performance tests for a detection model.
+  Tests detection performance in different accelerator configurations.
+  The naming convention of below test cases follow
+  `benchmark_(number of gpus)_gpu` format.
+  """
+  def _setup(self):
+    super(DetectionBenchmarkReal, self)._setup()
+    # Use negative value to avoid saving checkpoints.
+    FLAGS.save_checkpoint_freq = -1
+  @flagsaver.flagsaver
+  def benchmark_8_gpu_coco(self):
+    """Run detection model accuracy test with 8 GPUs."""
+    self._setup()
+    params = self._params()
+    params['architecture']['use_bfloat16'] = False
+    params['train']['total_steps'] = 1875  # One epoch.
+    params['train']['batch_size'] = 8 * self.per_gpu_batch_size
+    # The iterations_per_loop must be one, otherwise the number of examples per
+    # second would be wrong. Currently only support calling callback per batch
+    # when each loop only runs on one batch, i.e. host loop for one step. The
+    # performance of this situation might be lower than the case of
+    # iterations_per_loop > 1.
+    # Related bug: b/135933080
+    params['train']['iterations_per_loop'] = 1
+    params['eval']['eval_samples'] = 8
+    FLAGS.num_gpus = 8
+    FLAGS.model_dir = self._get_model_dir('real_benchmark_8_gpu_coco')
+    FLAGS.strategy_type = 'mirrored'
+    self._run_and_report_benchmark(params)
+  @flagsaver.flagsaver
+  def benchmark_1_gpu_coco(self):
+    """Run detection model accuracy test with 1 GPU."""
+    self._setup()
+    params = self._params()
+    params['architecture']['use_bfloat16'] = False
+    params['train']['batch_size'] = 1 * self.per_gpu_batch_size
+    params['train']['total_steps'] = 200
+    params['train']['iterations_per_loop'] = 1
+    params['eval']['eval_samples'] = 8
+    FLAGS.num_gpus = 1
+    FLAGS.model_dir = self._get_model_dir('real_benchmark_1_gpu_coco')
+    FLAGS.strategy_type = 'one_device'
+    self._run_and_report_benchmark(params)
+  @flagsaver.flagsaver
+  def benchmark_xla_1_gpu_coco(self):
+    """Run detection model accuracy test with 1 GPU and XLA enabled."""
+    self._setup()
+    params = self._params()
+    params['architecture']['use_bfloat16'] = False
+    params['train']['batch_size'] = 1 * self.per_gpu_batch_size
+    params['train']['total_steps'] = 200
+    params['train']['iterations_per_loop'] = 1
+    params['eval']['eval_samples'] = 8
+    FLAGS.num_gpus = 1
+    FLAGS.model_dir = self._get_model_dir('real_benchmark_xla_1_gpu_coco')
+    FLAGS.strategy_type = 'one_device'
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark(params)
+  @flagsaver.flagsaver
+  def benchmark_2x2_tpu_coco(self):
+    """Run detection model accuracy test with 4 TPUs."""
+    self._setup()
+    params = self._params()
+    params['train']['batch_size'] = 64
+    params['train']['total_steps'] = 1875  # One epoch.
+    params['train']['iterations_per_loop'] = 500
+    FLAGS.model_dir = self._get_model_dir('real_benchmark_2x2_tpu_coco')
+    FLAGS.strategy_type = 'tpu'
+    self._run_and_report_benchmark(params, do_eval=False, warmup=0)
+  @flagsaver.flagsaver
+  def benchmark_4x4_tpu_coco(self):
+    """Run detection model accuracy test with 4x4 TPU."""
+    self._setup()
+    params = self._params()
+    params['train']['batch_size'] = 256
+    params['train']['total_steps'] = 10 * 469  # 10 epochs.
+    params['train']['iterations_per_loop'] = 500
+    FLAGS.model_dir = self._get_model_dir('real_benchmark_4x4_tpu_coco')
+    FLAGS.strategy_type = 'tpu'
+    self._run_and_report_benchmark(params, do_eval=False, warmup=1)
+  @flagsaver.flagsaver
+  def benchmark_2x2_tpu_coco_mlir(self):
+    """Run detection model accuracy test with 4 TPUs."""
+    self._setup()
+    params = self._params()
+    params['train']['batch_size'] = 64
+    params['train']['total_steps'] = 1875  # One epoch.
+    params['train']['iterations_per_loop'] = 500
+    FLAGS.model_dir = self._get_model_dir('real_benchmark_2x2_tpu_coco_mlir')
+    FLAGS.strategy_type = 'tpu'
+    tf.config.experimental.enable_mlir_bridge()
+    self._run_and_report_benchmark(params, do_eval=False, warmup=0)
+  @flagsaver.flagsaver
+  def benchmark_4x4_tpu_coco_mlir(self):
+    """Run RetinaNet model accuracy test with 4 TPUs."""
+    self._setup()
+    params = self._params()
+    params['train']['batch_size'] = 256
+    params['train']['total_steps'] = 469  # One epoch.
+    params['train']['iterations_per_loop'] = 500
+    FLAGS.model_dir = self._get_model_dir('real_benchmark_4x4_tpu_coco_mlir')
+    FLAGS.strategy_type = 'tpu'
+    tf.config.experimental.enable_mlir_bridge()
+    self._run_and_report_benchmark(params, do_eval=False, warmup=0)
+  @flagsaver.flagsaver
+  def benchmark_2x2_tpu_spinenet_coco(self):
+    """Run detection model with SpineNet backbone accuracy test with 4 TPUs."""
+    self._setup()
+    params = self._params()
+    params['architecture']['backbone'] = 'spinenet'
+    params['architecture']['multilevel_features'] = 'identity'
+    params['architecture']['use_bfloat16'] = False
+    params['train']['batch_size'] = 64
+    params['train']['total_steps'] = 1875  # One epoch.
+    params['train']['iterations_per_loop'] = 500
+    params['train']['checkpoint']['path'] = ''
+    FLAGS.model_dir = self._get_model_dir(
+        'real_benchmark_2x2_tpu_spinenet_coco')
+    FLAGS.strategy_type = 'tpu'
+    self._run_and_report_benchmark(params, do_eval=False, warmup=0)
+class RetinanetBenchmarkReal(DetectionBenchmarkReal):
+  """Short benchmark performance tests for Retinanet model."""
+  def __init__(self, **kwargs):
+    super(RetinanetBenchmarkReal, self).__init__(model='retinanet',
+                                                 per_gpu_batch_size=8,
+                                                 **kwargs)
+class MaskRCNNBenchmarkReal(DetectionBenchmarkReal):
+  """Short benchmark performance tests for Mask RCNN model."""
+  def __init__(self, **kwargs):
+    super(MaskRCNNBenchmarkReal, self).__init__(model='mask_rcnn',
+                                                per_gpu_batch_size=4,
+                                                **kwargs)
+class ShapeMaskBenchmarkReal(DetectionBenchmarkReal):
+  """Short benchmark performance tests for ShapeMask model."""
+  def __init__(self, **kwargs):
+    super(ShapeMaskBenchmarkReal, self).__init__(model='shapemask',
+                                                 per_gpu_batch_size=4,
+                                                 **kwargs)
+if __name__ == '__main__':
+  tf.test.main()
--- a/models-2.13.1/official/benchmark/shakespeare_benchmark.py
+++ b/models-2.13.1/official/benchmark/shakespeare_benchmark.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Executes Shakespeare (LSTM) benchmark and accuracy tests."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import time
+from absl import flags
+import tensorflow as tf  # pylint: disable=g-bad-import-order
+from official.benchmark.models.shakespeare import shakespeare_main
+from official.utils.flags import core as flags_core
+from official.utils.misc import keras_utils
+from official.benchmark import benchmark_wrappers
+from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
+SHAKESPEARE_TRAIN_DATA = 'shakespeare/shakespeare.txt'
+TMP_DIR = os.getenv('TMPDIR')
+FLAGS = flags.FLAGS
+class ShakespeareBenchmarkBase(PerfZeroBenchmark):
+  """Base class for Shakespeare (LSTM) benchmark and accuracy tests."""
+  def __init__(self, output_dir=None, default_flags=None, root_data_dir=None):
+    super(ShakespeareBenchmarkBase, self).__init__(
+        output_dir=output_dir,
+        default_flags=default_flags,
+        flag_methods=[shakespeare_main.define_flags])
+  @benchmark_wrappers.enable_runtime_flags
+  def _run_and_report_benchmark(self,
+                                top_1_train_min=0.91,
+                                top_1_train_max=0.94,
+                                warmup=1,
+                                log_steps=100):
+    """Report benchmark results by writing to local protobuf file.
+    Average epoch time is calculated by skipping the first epoch. This average
+    ignores time spent between epoch and is recorded by begin and end epoch. To
+    skip accuracy check set `top_1_train_min=None`.
+    Args:
+      top_1_train_min: lowest passing value.
+      top_1_train_max: highest passing value.
+      warmup: number of entries in `timestamp_log` to ignore.
+      log_steps: How often the log was created for `timestamp_log`.
+    """
+    total_batch_size = FLAGS.batch_size
+    metrics = []
+    start_time_sec = time.time()
+    stats = shakespeare_main.run(FLAGS)
+    wall_time_sec = time.time() - start_time_sec
+    if top_1_train_min:
+      metrics.append({
+          'name': 'accuracy_top_1_train',
+          'value': stats['history']['RecallAt1'][-1],
+          'min_value': top_1_train_min,
+          'max_value': top_1_train_max
+      })
+    # Look for the time history callback which was used during keras.fit
+    for callback in stats['callbacks']:
+      if isinstance(callback, keras_utils.TimeHistory):
+        epoch_timings = callback.epoch_runtime_log
+        if len(epoch_timings) > 1:
+          average_time = sum(epoch_timings[1:]) / len(epoch_timings[1:])
+          metrics.append({'name': 'avg_epoch_time', 'value': average_time})
+      # First entry in timestamp_log is the start of step 1. The rest of the
+      # entries are the end of each step recorded.
+      time_log = callback.timestamp_log
+      elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
+      num_examples = (
+          total_batch_size * log_steps * (len(time_log) - warmup - 1))
+      if elapsed > 0:
+        examples_per_sec = num_examples / elapsed
+        metrics.append({'name': 'exp_per_second', 'value': examples_per_sec})
+    flags_str = flags_core.get_nondefault_flags_as_str()
+    self.report_benchmark(
+        iters=-1,
+        wall_time=wall_time_sec,
+        metrics=metrics,
+        extras={'flags': flags_str})
+class ShakespeareAccuracy(ShakespeareBenchmarkBase):
+  """Shakespeare accuracy tests.
+  This is not an ideal test. The best we can use for the accuracy check is to
+  validate top_1 of the training set. At batch size 64 the top_1 training
+  stabilizes to ~0.92 around 40-45 epochs.
+  """
+  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
+    """Shakespeare accuracy tests.
+    Args:
+      output_dir: directory where to output e.g. log files
+      root_data_dir: directory under which to look for dataset
+      **kwargs: arbitrary named arguments. This is needed to make the
+        constructor forward compatible in case PerfZero provides more named
+        arguments before updating the constructor.
+    """
+    self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA)
+    super(ShakespeareAccuracy, self).__init__(
+        output_dir=output_dir, root_data_dir=root_data_dir)
+  def benchmark_cpu(self):
+    """Benchmark cpu."""
+    self._setup()
+    FLAGS.num_gpus = 0
+    FLAGS.training_data = self.train_data
+    FLAGS.batch_size = 64
+    FLAGS.train_epochs = 43
+    FLAGS.model_dir = ''
+    self._run_and_report_benchmark()
+  def benchmark_cpu_no_ds_run_eagerly(self):
+    """Benchmark cpu without distribution strategies and run eagerly."""
+    self._setup()
+    FLAGS.num_gpus = 0
+    FLAGS.training_data = self.train_data
+    FLAGS.batch_size = 64
+    FLAGS.train_epochs = 43
+    FLAGS.model_dir = ''
+    FLAGS.run_eagerly = True
+    FLAGS.distribution_strategy = 'off'
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu(self):
+    """Benchmark 1 gpu."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.training_data = self.train_data
+    FLAGS.batch_size = 64
+    FLAGS.train_epochs = 43
+    FLAGS.model_dir = ''
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_no_ds(self):
+    """Benchmark 1 gpu without distribution strategies."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.training_data = self.train_data
+    FLAGS.batch_size = 64
+    FLAGS.train_epochs = 43
+    FLAGS.model_dir = ''
+    FLAGS.distribution_strategy = 'off'
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_no_ds_run_eagerly(self):
+    """Benchmark 1 gpu without distribution strategies and run eagerly."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.training_data = self.train_data
+    FLAGS.batch_size = 64
+    FLAGS.train_epochs = 43
+    FLAGS.model_dir = ''
+    FLAGS.run_eagerly = True
+    FLAGS.distribution_strategy = 'off'
+    self._run_and_report_benchmark()
+  def benchmark_xla_1_gpu(self):
+    """Benchmark 1 gpu w/xla."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.training_data = self.train_data
+    FLAGS.batch_size = 64
+    FLAGS.train_epochs = 43
+    FLAGS.model_dir = ''
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu(self):
+    """Benchmark 8 gpu.
+    This is test is for accuracy not scaling.  The batch-size is not scaled to
+    the number of gpus.
+    """
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.training_data = self.train_data
+    FLAGS.batch_size = 64
+    FLAGS.train_epochs = 43
+    FLAGS.model_dir = ''
+    self._run_and_report_benchmark()
+class ShakespeareKerasBenchmarkReal(ShakespeareBenchmarkBase):
+  """Benchmark accuracy tests."""
+  def __init__(self, output_dir=None, root_data_dir=TMP_DIR, **kwargs):
+    """Benchmark tests w/Keras.
+    Args:
+      output_dir: directory where to output e.g. log files
+      root_data_dir: directory under which to look for dataset
+      **kwargs: arbitrary named arguments. This is needed to make the
+        constructor forward compatible in case PerfZero provides more named
+        arguments before updating the constructor.
+    """
+    self.train_data = os.path.join(root_data_dir, SHAKESPEARE_TRAIN_DATA)
+    def_flags = {}
+    def_flags['training_data'] = self.train_data
+    def_flags['model_dir'] = ''
+    def_flags['train_epochs'] = 4
+    def_flags['log_steps'] = 50
+    super(ShakespeareKerasBenchmarkReal, self).__init__(
+        output_dir=output_dir,
+        root_data_dir=root_data_dir,
+        default_flags=def_flags)
+  def benchmark_cpu(self):
+    """Benchmark cpu."""
+    self._setup()
+    FLAGS.num_gpus = 0
+    FLAGS.batch_size = 64
+    self._run_and_report_benchmark()
+  def benchmark_cpu_no_ds_run_eagerly(self):
+    """Benchmark cpu without distribution strategy and run eagerly."""
+    self._setup()
+    FLAGS.num_gpus = 0
+    FLAGS.batch_size = 64
+    FLAGS.distribution_strategy = 'off'
+    FLAGS.run_eagerly = True
+    self._run_and_report_benchmark()
+  def benchmark_cpu_no_ds(self):
+    """Benchmark cpu without distribution strategy."""
+    self._setup()
+    FLAGS.num_gpus = 0
+    FLAGS.batch_size = 64
+    FLAGS.distribution_strategy = 'off'
+    self._run_and_report_benchmark()
+  def benchmark_cpu_no_ds_force_v2(self):
+    """Benchmark cpu no ds, and force v2."""
+    self._setup()
+    FLAGS.num_gpus = 0
+    FLAGS.batch_size = 64
+    FLAGS.distribution_strategy = 'off'
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu(self):
+    """Benchmark 1 gpu."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = 64
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_no_cudnn(self):
+    """Benchmark 1 gpu with CuDNN disabled."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = 64
+    FLAGS.cudnn = False
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_no_ds(self):
+    """Benchmark 1 gpu without distribution strategies."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = 64
+    FLAGS.distribution_strategy = 'off'
+    self._run_and_report_benchmark()
+  def benchmark_1_gpu_no_ds_run_eagerly(self):
+    """Benchmark 1 gpu."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = 64
+    FLAGS.run_eagerly = True
+    FLAGS.distribution_strategy = 'off'
+    self._run_and_report_benchmark()
+  def benchmark_xla_1_gpu(self):
+    """Benchmark 1 gpu."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = 64
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark()
+  def benchmark_xla_1_gpu_no_cudnn(self):
+    """Benchmark 1 gpu w/xla and CuDNN disabled."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = 64
+    FLAGS.cudnn = False
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu(self):
+    """Benchmark 8 gpu."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.batch_size = 64 * 8
+    FLAGS.log_steps = 10
+    self._run_and_report_benchmark()
+  def benchmark_8_gpu_no_cudnn(self):
+    """Benchmark 8 gpu with CuDNN disabled."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.batch_size = 64 * 8
+    FLAGS.log_steps = 10
+    FLAGS.cudnn = False
+    self._run_and_report_benchmark()
+  def benchmark_xla_8_gpu(self):
+    """Benchmark 8 gpu w/xla."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.batch_size = 64 * 8
+    FLAGS.log_steps = 10
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark()
+  def benchmark_xla_8_gpu_no_cudnn(self):
+    """Benchmark 8 gpu w/xla and CuDNN disabled."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.batch_size = 64 * 8
+    FLAGS.log_steps = 10
+    FLAGS.cudnn = False
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark()
+  def _run_and_report_benchmark(self):
+    """Run and report benchmark."""
+    super(ShakespeareKerasBenchmarkReal, self)._run_and_report_benchmark(
+        top_1_train_min=None, log_steps=FLAGS.log_steps)
+if __name__ == '__main__':
+  tf.test.main()
--- a/models-2.13.1/official/benchmark/tf_scan_benchmark.py
+++ b/models-2.13.1/official/benchmark/tf_scan_benchmark.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Script to setup a tf scan e2e benchmark."""
+import time
+import numpy as np
+import tensorflow as tf
+from tqdm import tqdm
+from official.benchmark import perfzero_benchmark
+# pylint: disable=invalid-name
+# pylint: disable=no-value-for-parameter
+# pylint: disable=unused-variable
+def gen_batches(num_batches, batch_size, units):
+  for _ in range(num_batches):
+    x = np.random.random((batch_size, 20, units))
+    y = np.random.randint(1, units, size=(batch_size, 20))
+    yield x, y
+class MyModel(tf.keras.models.Model):
+  """Test model."""
+  def __init__(self, units):
+    super().__init__()
+    self._tf_layers = {}
+    self.units = units
+    self.transition_param = self.add_weight(
+        name="transition_param", shape=(units, units))
+    self.optimizer = tf.keras.optimizers.Adam()
+    self._training = False
+  def _loss_fn_with_scan(self, inputs, transition_params):
+    first_input = tf.slice(inputs, [0, 0, 0], [-1, 1, -1])
+    first_input = tf.squeeze(first_input, [1])
+    rest_of_input = tf.slice(inputs, [0, 1, 0], [-1, -1, -1])
+    rest_of_input = tf.transpose(rest_of_input, [1, 0, 2])
+    transition_params = tf.expand_dims(transition_params, 0)
+    def _scan_fn(_state, _inputs):
+      _state = tf.expand_dims(_state, 2)
+      transition_scores = _state + transition_params
+      new_alphas = _inputs + tf.reduce_logsumexp(transition_scores, [1])
+      return new_alphas
+    all_alphas = tf.transpose(
+        tf.scan(_scan_fn, rest_of_input, first_input), [1, 0, 2])
+    # add first state for sequences of length 1
+    all_alphas = tf.concat([tf.expand_dims(first_input, 1), all_alphas], 1)
+    return all_alphas
+  def _loss(self, x, y):
+    logits = tf.cast(x, dtype=tf.float32)
+    loss = self._loss_fn_with_scan(logits, self.transition_param)
+    return tf.reduce_mean(loss)
+  @tf.function
+  def train_on_batch(self, *args):
+    with tf.GradientTape(persistent=True) as tape:
+      loss = self._loss(*args)
+    grads = tape.gradient(loss, self.trainable_weights)
+    self.optimizer.apply_gradients(zip(grads, self.trainable_variables))
+    return loss
+  def train(self, epochs, batch_size, num_batches):
+    data_generator_iter = gen_batches(num_batches, batch_size, self.units)
+    sample_x, sample_y = next(data_generator_iter)
+    self.train_on_batch(sample_x, sample_y)
+    self._training = True
+    progress_bar = tqdm(range(epochs), desc="Epochs")
+    for epoch in progress_bar:
+      for batch_x, batch_y in data_generator_iter:
+        loss = self.train_on_batch(batch_x, batch_y)
+      progress_bar.update(1)
+      progress_bar.set_postfix({"loss": f"{loss.numpy():.3f}"})
+def _run_benchmark(model):
+  """Runs the benchmark."""
+  np.random.seed(123)
+  num_batches = 5000
+  batch_size = 32
+  epochs = 100
+  start_time = time.time()
+  model.train(epochs, batch_size, num_batches)
+  end_time = time.time()
+  wall_time = end_time - start_time
+  return wall_time
+class TfScanE2EBenchmark(perfzero_benchmark.PerfZeroBenchmark):
+  """Scan E2E benchmark."""
+  def benchmark_cpu(self):
+    units = 64
+    model = MyModel(units)
+    wall_time = _run_benchmark(model)
+    self.report_benchmark(iters=-1, wall_time=wall_time)
+  def benchmark_cpu_avg_4(self):
+    units = 64
+    model = MyModel(units)
+    num_trials = 4
+    wall_times = []
+    for _ in range(num_trials):
+      wall_times.append(_run_benchmark(model))
+    avg_wall_time = sum(wall_times) / float(len(wall_times))
+    self.report_benchmark(iters=-1, wall_time=avg_wall_time)
+if __name__ == "__main__":
+  tf.test.main()
--- a/models-2.13.1/official/benchmark/tf_vision_saved_model_benchmark.py
+++ b/models-2.13.1/official/benchmark/tf_vision_saved_model_benchmark.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Benchmark TF-vision saved models on a TFRecord dataset."""
+import time
+from absl import app
+from absl import flags
+from absl import logging
+import tensorflow as tf
+FLAGS = flags.FLAGS
+flags.DEFINE_string('saved_model_path', None, 'Path to saved model.')
+flags.DEFINE_string('tf_examples_path', None, 'Path to TF examples.')
+flags.DEFINE_integer('num_samples', 100, 'Number of samples.')
+flags.DEFINE_integer('num_ignore_samples', 5,
+                     ('Number of initial samples to ignore. '
+                      'The first few samples (usually 1) are used by '
+                      'tensorflow to optimize the tf.function call'))
+flags.mark_flag_as_required('saved_model_path')
+flags.mark_flag_as_required('tf_examples_path')
+flags.mark_flag_as_required('num_samples')
+def main(_) -> None:
+  files = tf.data.Dataset.list_files(FLAGS.tf_examples_path)
+  logging.info('Found %d files.', len(files))
+  dataset = tf.data.TFRecordDataset(files)
+  model = tf.saved_model.load(FLAGS.saved_model_path)
+  detect_fn = model.signatures['serving_default']
+  time_taken = 0.0
+  for (i, sample) in enumerate(dataset.take(FLAGS.num_samples)):
+    example = tf.train.Example()
+    example.ParseFromString(sample.numpy())
+    image_encoded = example.features.feature['image/encoded']
+    image = tf.io.decode_image(image_encoded.bytes_list.value[0])
+    image = image[tf.newaxis]
+    start_time = time.time()
+    _ = detect_fn(image)
+    sample_time = time.time() - start_time
+    if (i % 10) == 0:
+      logging.info('Finished sample %d %.2f ms', i, sample_time * 1000.0)
+    if i < FLAGS.num_ignore_samples:
+      continue
+    time_taken += sample_time
+  num_benchmark_samples = FLAGS.num_samples - FLAGS.num_ignore_samples
+  logging.info('Per-sample time for {} samples = {:.2f}ms'.format(
+      num_benchmark_samples, 1000.0 * time_taken / num_benchmark_samples))
+if __name__ == '__main__':
+  app.run(main)
--- a/models-2.13.1/official/benchmark/tfhub_memory_usage_benchmark.py
+++ b/models-2.13.1/official/benchmark/tfhub_memory_usage_benchmark.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Runs a memory usage benchmark for a Tensorflow Hub model.
+Loads a SavedModel and records memory usage.
+"""
+import functools
+import time
+from absl import flags
+import tensorflow as tf
+import tensorflow_hub as hub
+from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
+FLAGS = flags.FLAGS
+class TfHubMemoryUsageBenchmark(PerfZeroBenchmark):
+  """A benchmark measuring memory usage for a given TF Hub SavedModel."""
+  def __init__(self,
+               hub_model_handle_list=None,
+               output_dir=None,
+               default_flags=None,
+               root_data_dir=None,
+               **kwargs):
+    super(TfHubMemoryUsageBenchmark, self).__init__(
+        output_dir=output_dir, default_flags=default_flags, **kwargs)
+    if hub_model_handle_list:
+      for hub_model_handle in hub_model_handle_list.split(';'):
+        # Converts a model handle of the form
+        # https://tfhub.dev/google/nnlm-en-dim128/1 to valid python method name
+        # like google_nnlm_en_dim128_1.
+        hub_model_method_name = hub_model_handle.replace(
+            'https://tfhub.dev', '').replace('/', '_').replace('-',
+                                                               '_').strip('_')
+        setattr(
+            self, 'benchmark_' + hub_model_method_name,
+            functools.partial(self.benchmark_memory_usage, hub_model_handle))
+  def benchmark_memory_usage(
+      self, hub_model_handle='https://tfhub.dev/google/nnlm-en-dim128/1'):
+    start_time_sec = time.time()
+    self.load_model(hub_model_handle)
+    wall_time_sec = time.time() - start_time_sec
+    metrics = []
+    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
+  def load_model(self, hub_model_handle):
+    """Loads a TF Hub module."""
+    hub.load(hub_model_handle)
+if __name__ == '__main__':
+  tf.test.main()
--- a/models-2.13.1/official/benchmark/tflite_utils.py
+++ b/models-2.13.1/official/benchmark/tflite_utils.py
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""TFLite utils."""
+import orbit
+from official.core import base_task
+from official.core import base_trainer
+from official.core import config_definitions
+def train_and_evaluate(
+    params: config_definitions.ExperimentConfig,
+    task: base_task.Task,
+    trainer: base_trainer.Trainer,
+    controller: orbit.Controller):
+  """Train and evaluate on TFLite."""
+  raise NotImplementedError('train_and_evaluate on tflite_utils is not '
+                            'implemented yet.')
--- a/models-2.13.1/official/benchmark/transformer_benchmark.py
+++ b/models-2.13.1/official/benchmark/transformer_benchmark.py
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Executes Transformer w/Keras benchmark and accuracy tests."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import time
+from absl import flags
+import tensorflow as tf
+from official.benchmark import benchmark_wrappers
+from official.benchmark import owner_utils
+from official.benchmark.perfzero_benchmark import PerfZeroBenchmark
+from official.legacy.transformer import misc
+from official.legacy.transformer import transformer_main
+from official.utils.flags import core as flags_core
+TPU_DATA_DIR = 'gs://mlcompass-data/transformer'
+GPU_DATA_DIR = os.getenv('TMPDIR')
+TRANSFORMER_EN2DE_DATA_DIR_NAME = 'wmt32k-en2de-official'
+EN2DE_2014_BLEU_DATA_DIR_NAME = 'newstest2014'
+FLAGS = flags.FLAGS
+TMP_DIR = os.getenv('TMPDIR')
+class TransformerBenchmark(PerfZeroBenchmark):
+  """Methods common to executing transformer w/keras tests.
+     Code under test for the Transformer Keras models report the same data and
+     require the same FLAG setup.
+  """
+  def __init__(self, output_dir=None, default_flags=None, root_data_dir=None,
+               flag_methods=None, tpu=None):
+    self._set_data_files(root_data_dir=root_data_dir)
+    if default_flags is None:
+      default_flags = {}
+    default_flags['data_dir'] = self.train_data_dir
+    default_flags['vocab_file'] = self.vocab_file
+    super(TransformerBenchmark, self).__init__(
+        output_dir=output_dir,
+        default_flags=default_flags,
+        flag_methods=flag_methods,
+        tpu=tpu)
+  def _set_data_files(self, root_data_dir=None, tpu_run=False):
+    """Sets train_data_dir, vocab_file, bleu_source and bleu_ref."""
+    # Use remote storage for TPU, remote storage for GPU if defined, else
+    # use environment provided root_data_dir.
+    if tpu_run:
+      root_data_dir = TPU_DATA_DIR
+    elif GPU_DATA_DIR is not None:
+      root_data_dir = GPU_DATA_DIR
+    root_data_dir = root_data_dir if root_data_dir else ''
+    self.train_data_dir = os.path.join(root_data_dir,
+                                       TRANSFORMER_EN2DE_DATA_DIR_NAME)
+    self.vocab_file = os.path.join(root_data_dir,
+                                   TRANSFORMER_EN2DE_DATA_DIR_NAME,
+                                   'vocab.ende.32768')
+    self.bleu_source = os.path.join(root_data_dir,
+                                    EN2DE_2014_BLEU_DATA_DIR_NAME,
+                                    'newstest2014.en')
+    self.bleu_ref = os.path.join(root_data_dir,
+                                 EN2DE_2014_BLEU_DATA_DIR_NAME,
+                                 'newstest2014.de')
+  def _set_data_file_flags(self):
+    """Sets the FLAGS for the data files."""
+    FLAGS.data_dir = self.train_data_dir
+    FLAGS.vocab_file = self.vocab_file
+    # Sets values directly to avoid validation check.
+    FLAGS['bleu_source'].value = self.bleu_source
+    FLAGS['bleu_ref'].value = self.bleu_ref
+  @benchmark_wrappers.enable_runtime_flags
+  def _run_and_report_benchmark(self,
+                                bleu_max=None,
+                                bleu_min=None,
+                                log_steps=None,
+                                total_batch_size=None,
+                                warmup=1):
+    """Report benchmark results by writing to local protobuf file.
+    Args:
+      bleu_max: highest passing level for bleu score.
+      bleu_min: lowest passing level for bleu score.
+      log_steps: How often the log was created for stats['step_timestamp_log'].
+      total_batch_size: Global batch-size.
+      warmup: number of entries in stats['step_timestamp_log'] to ignore.
+    """
+    start_time_sec = time.time()
+    task = transformer_main.TransformerTask(FLAGS)
+    stats = task.train()
+    wall_time_sec = time.time() - start_time_sec
+    metrics = []
+    if 'bleu_uncased' in stats:
+      if 'bleu_uncased_history' in stats:
+        bleu_uncased_best = max(stats['bleu_uncased_history'],
+                                key=lambda x: x[1])
+        metrics.append({'name': 'bleu_uncased',
+                        'value': bleu_uncased_best[1],
+                        'min_value': bleu_min,
+                        'max_value': bleu_max})
+        metrics.append({'name': 'bleu_best_score_iteration',
+                        'value': bleu_uncased_best[0]})
+        metrics.append({'name': 'bleu_uncased_last',
+                        'value': stats['bleu_uncased']})
+      else:
+        metrics.append({'name': 'bleu_uncased',
+                        'value': stats['bleu_uncased'],
+                        'min_value': bleu_min,
+                        'max_value': bleu_max})
+    if (warmup and 'step_timestamp_log' in stats and
+        len(stats['step_timestamp_log']) > warmup + 1):
+      # first entry in the time_log is start of step 1. The rest of the
+      # entries are the end of each step recorded
+      time_log = stats['step_timestamp_log']
+      elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
+      num_examples = (
+          total_batch_size * log_steps * (len(time_log) - warmup - 1))
+      examples_per_sec = num_examples / elapsed
+      metrics.append({'name': 'exp_per_second',
+                      'value': examples_per_sec})
+    if 'avg_exp_per_second' in stats:
+      metrics.append({'name': 'avg_exp_per_second',
+                      'value': stats['avg_exp_per_second']})
+    if 'step_timestamp_log' in stats:
+      time_log = stats['step_timestamp_log']
+      metrics.append({'name': 'startup_time',
+                      'value': time_log[0].timestamp - start_time_sec})
+    flags_str = flags_core.get_nondefault_flags_as_str()
+    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics,
+                          extras={'flags': flags_str})
+class TransformerBaseKerasAccuracy(TransformerBenchmark):
+  """Benchmark accuracy tests for Transformer Base model w/ Keras."""
+  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
+    """Benchmark accuracy tests for Transformer Base model w/ Keras.
+    Args:
+      output_dir: directory where to output e.g. log files
+      root_data_dir: directory under which to look for dataset
+      **kwargs: arbitrary named arguments. This is needed to make the
+                constructor forward compatible in case PerfZero provides more
+                named arguments before updating the constructor.
+    """
+    flag_methods = [misc.define_transformer_flags]
+    super(TransformerBaseKerasAccuracy, self).__init__(
+        output_dir=output_dir, root_data_dir=root_data_dir,
+        flag_methods=flag_methods)
+  def benchmark_1_gpu(self):
+    """Benchmark 1 gpu.
+      The paper uses 8 GPUs and a much larger effective batch size, this is will
+      not converge to the 27.3 BLEU (uncased) SOTA.
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 1
+    FLAGS.param_set = 'base'
+    FLAGS.batch_size = 2048
+    FLAGS.train_steps = 1000
+    FLAGS.steps_between_evals = 500
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
+    # These bleu scores are based on test runs after at this limited
+    # number of steps and batch size after verifying SOTA at 8xV100s.
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=25.3,
+                                   bleu_max=26)
+  def benchmark_1_gpu_static_batch(self):
+    """Benchmark 1 gpu with static_batch.
+      The paper uses 8 GPUs and a much larger effective batch size, this is will
+      not converge to the 27.3 BLEU (uncased) SOTA.
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 1
+    FLAGS.param_set = 'base'
+    FLAGS.batch_size = 4096
+    FLAGS.train_steps = 100000
+    FLAGS.steps_between_evals = 5000
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_static_batch')
+    # These bleu scores are based on test runs after at this limited
+    # number of steps and batch size after verifying SOTA at 8xV100s.
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=25.3,
+                                   bleu_max=26)
+  def benchmark_8_gpu(self):
+    """Benchmark 8 gpu.
+      Should converge to 27.3 BLEU (uncased). This has not been confirmed yet.
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 8
+    FLAGS.param_set = 'base'
+    FLAGS.batch_size = 4096*8
+    FLAGS.train_steps = 100000
+    FLAGS.steps_between_evals = 20000
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=27,
+                                   bleu_max=28)
+  def benchmark_8_gpu_static_batch(self):
+    """Benchmark 8 gpu.
+      Should converge to 27.3 BLEU (uncased). This has not been confirmed yet.
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 8
+    FLAGS.param_set = 'base'
+    FLAGS.batch_size = 4096*8
+    FLAGS.train_steps = 100000
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    FLAGS.steps_between_evals = 5000
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=27,
+                                   bleu_max=28)
+class TransformerBigKerasAccuracy(TransformerBenchmark):
+  """Benchmark accuracy tests for Transformer Big model w/ Keras."""
+  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
+    """Benchmark accuracy tests for Transformer Big model w/ Keras.
+    Args:
+      output_dir: directory where to output e.g. log files
+      root_data_dir: directory under which to look for dataset
+      **kwargs: arbitrary named arguments. This is needed to make the
+                constructor forward compatible in case PerfZero provides more
+                named arguments before updating the constructor.
+    """
+    flag_methods = [misc.define_transformer_flags]
+    super(TransformerBigKerasAccuracy, self).__init__(
+        output_dir=output_dir, root_data_dir=root_data_dir,
+        flag_methods=flag_methods)
+  def benchmark_8_gpu(self):
+    """Benchmark 8 gpu.
+    Over 6 runs with eval every 20K steps the average highest value was 28.195
+    (bleu uncased). 28.424 was the highest and 27.96 the lowest. The values are
+    the highest value seen during a run and occurred at a median of iteration 9.
+    Iterations are not epochs, an iteration is a number of steps between evals.
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 8
+    FLAGS.param_set = 'big'
+    FLAGS.batch_size = 3072*8
+    FLAGS.train_steps = 20000 * 12
+    FLAGS.steps_between_evals = 20000
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=27.9,
+                                   bleu_max=29.2)
+  def benchmark_8_gpu_static_batch(self):
+    """Benchmark 8 gpu.
+    Should converge to 28.4 BLEU (uncased). This has not be verified yet."
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 8
+    FLAGS.param_set = 'big'
+    FLAGS.batch_size = 3072*8
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    FLAGS.train_steps = 20000 * 12
+    FLAGS.steps_between_evals = 20000
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=28,
+                                   bleu_max=29.2)
+  def benchmark_8_gpu_fp16(self):
+    """Benchmark 8 gpu with dynamic batch and fp16.
+    Over 6 runs with eval every 20K steps the average highest value was 28.247
+    (bleu uncased). 28.424 was the highest and 28.09 the lowest. The values are
+    the highest value seen during a run and occurred at a median of iteration
+    11. While this could be interpreted as worse than FP32, if looking at the
+    first iteration at which 28 is passed FP16 performs equal and possibly
+    better. Although not part of the initial test runs, the highest value
+    recorded with the arguments below was 28.9 at iteration 12. Iterations are
+    not epochs, an iteration is a number of steps between evals.
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 8
+    FLAGS.dtype = 'fp16'
+    FLAGS.param_set = 'big'
+    FLAGS.batch_size = 3072*8
+    FLAGS.train_steps = 20000 * 12
+    FLAGS.steps_between_evals = 20000
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=28,
+                                   bleu_max=29.2)
+  def benchmark_8_gpu_static_batch_fp16(self):
+    """Benchmark 8 gpu with static batch and fp16.
+      Should converge to 28.4 BLEU (uncased). This has not be verified yet."
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 8
+    FLAGS.dtype = 'fp16'
+    FLAGS.param_set = 'big'
+    FLAGS.batch_size = 3072*8
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    FLAGS.train_steps = 400000
+    FLAGS.steps_between_evals = 20000
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch_fp16')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=28,
+                                   bleu_max=29.2)
+  def benchmark_xla_8_gpu_static_batch_fp16(self):
+    """Benchmark 8 gpu with static batch, XLA, and FP16.
+      Should converge to 28.4 BLEU (uncased). This has not be verified yet."
+    """
+    self._setup()
+    self._set_data_file_flags()
+    FLAGS.num_gpus = 8
+    FLAGS.dtype = 'fp16'
+    FLAGS.enable_xla = True
+    FLAGS.param_set = 'big'
+    FLAGS.batch_size = 3072*8
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    FLAGS.train_steps = 400000
+    FLAGS.steps_between_evals = 20000
+    FLAGS.model_dir = self._get_model_dir(
+        'benchmark_xla_8_gpu_static_batch_fp16')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps,
+                                   bleu_min=28,
+                                   bleu_max=29.2)
+class TransformerKerasBenchmark(TransformerBenchmark):
+  """Benchmarks for Transformer (Base and Big) using Keras."""
+  def __init__(self, output_dir=None, default_flags=None,
+               root_data_dir=None, batch_per_gpu=4096, tpu=None):
+    """Initialize.
+    Args:
+      output_dir: Based directory for saving artifacts, e.g. checkpoints.
+      default_flags: default flags to use for all tests.
+      root_data_dir: root directory for data, e.g. training.
+      batch_per_gpu: batch size to use per gpu.
+      tpu: Target TPU to use.
+    """
+    flag_methods = [misc.define_transformer_flags]
+    self.batch_per_gpu = batch_per_gpu
+    super(TransformerKerasBenchmark, self).__init__(
+        output_dir=output_dir,
+        default_flags=default_flags,
+        root_data_dir=root_data_dir,
+        flag_methods=flag_methods,
+        tpu=tpu)
+  def benchmark_1_gpu_no_dist_strat(self):
+    """Benchmark 1 gpu without distribution strategy."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.distribution_strategy = 'off'
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_1_gpu_no_dist_strat_static_batch(self):
+    """Benchmark 1 gpu without distribution strategy with static batch."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.distribution_strategy = 'off'
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_ds_sb')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_1_gpu(self):
+    """Benchmark 1 gpu."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_1_gpu_fp16(self):
+    """Benchmark 1 gpu FP16."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16')
+    FLAGS.dtype = 'fp16'
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_1_gpu(self):
+    """Benchmark 1 gpu w/xla."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu')
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_1_gpu_fp16(self):
+    """Benchmark 1 gpu w/xla and FP16."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16')
+    FLAGS.enable_xla = True
+    FLAGS.dtype = 'fp16'
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_1_gpu_static_batch(self):
+    """Benchmark 1 gpu with static batch."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_static_batch')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_1_gpu_static_batch(self):
+    """Benchmark 1 gpu with static batch w/xla."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_static_batch')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    FLAGS.enable_xla = True
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_1_gpu_static_batch_fp16(self):
+    """Benchmark 1 gpu with static batch FP16."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir(
+        'benchmark_1_gpu_static_batch_fp16')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    FLAGS.dtype = 'fp16'
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_1_gpu_static_batch_fp16(self):
+    """Benchmark 1 gpu with static batch w/xla and FP16."""
+    self._setup()
+    FLAGS.num_gpus = 1
+    FLAGS.batch_size = self.batch_per_gpu
+    FLAGS.model_dir = self._get_model_dir(
+        'benchmark_xla_1_gpu_static_batch_fp16')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    FLAGS.enable_xla = True
+    FLAGS.dtype = 'fp16'
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_8_gpu(self):
+    """Benchmark 8 gpu. This defaults to using TF32."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_8_gpu_fp16(self):
+    """Benchmark 8 gpu FP16."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.dtype = 'fp16'
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_8_gpu(self):
+    """Benchmark 8 gpu w/xla. This defaults to using TF32."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.enable_xla = True
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_8_gpu_fp16(self):
+    """Benchmark 8 gpu w/xla and FP16."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.enable_xla = True
+    FLAGS.dtype = 'fp16'
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_8_gpu_static_batch(self):
+    """Benchmark 8 gpu with static batch."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_static_batch')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_8_gpu_static_batch_fp16(self):
+    """Benchmark 8 gpu with static batch FP16."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.dtype = 'fp16'
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir(
+        'benchmark_8_gpu_static_batch_fp16')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_8_gpu_static_batch(self):
+    """Benchmark 8 gpu with static batch w/xla."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.enable_xla = True
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_static_batch')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_8_gpu_static_batch_fp16(self):
+    """Benchmark 8 gpu with static batch w/xla and FP16."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.enable_xla = True
+    FLAGS.dtype = 'fp16'
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir(
+        'benchmark_xla_8_gpu_static_batch_fp16')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+  def benchmark_xla_8_gpu_static_batch_fp32_no_tf32(self):
+    """Benchmark 8 gpu with static batch w/xla and FP16."""
+    self._setup()
+    FLAGS.num_gpus = 8
+    FLAGS.enable_xla = True
+    FLAGS.batch_size = self.batch_per_gpu * 8
+    FLAGS.model_dir = self._get_model_dir(
+        'benchmark_xla_8_gpu_static_batch_fp32_no_tf32')
+    FLAGS.static_batch = True
+    FLAGS.max_length = 64
+    self._run_and_report_benchmark(total_batch_size=FLAGS.batch_size,
+                                   log_steps=FLAGS.log_steps)
+class TransformerBaseKerasBenchmarkReal(TransformerKerasBenchmark):
+  """Transformer based version real data benchmark tests."""
+  def __init__(self, output_dir=TMP_DIR, root_data_dir=TMP_DIR, **kwargs):
+    def_flags = {}
+    def_flags['param_set'] = 'base'
+    def_flags['train_steps'] = 50
+    def_flags['log_steps'] = 10
+    super(TransformerBaseKerasBenchmarkReal, self).__init__(
+        output_dir=output_dir, default_flags=def_flags,
+        root_data_dir=root_data_dir, batch_per_gpu=4096)
+class TransformerBigKerasBenchmarkReal(TransformerKerasBenchmark):
+  """Transformer based version real data benchmark tests."""
+  def __init__(self, output_dir=TMP_DIR, root_data_dir=TMP_DIR,
+               tpu=None, **kwargs):
+    def_flags = {}
+    def_flags['param_set'] = 'big'
+    def_flags['train_steps'] = 50
+    def_flags['log_steps'] = 10
+    super(TransformerBigKerasBenchmarkReal, self).__init__(
+        output_dir=output_dir, default_flags=def_flags,
+        root_data_dir=root_data_dir, batch_per_gpu=3072,
+        tpu=tpu)
+  def _set_df_common(self):
+    self._set_data_files(tpu_run=True)
+    FLAGS.data_dir = self.train_data_dir
+    FLAGS.vocab_file = self.vocab_file
+    FLAGS.distribution_strategy = 'tpu'
+    FLAGS.padded_decode = True
+    FLAGS.train_steps = 300
+    FLAGS.log_steps = 150
+    FLAGS.steps_between_evals = 150
+    FLAGS.static_batch = True
+    FLAGS.use_ctl = True
+    FLAGS.enable_checkpointing = False
+    FLAGS.max_length = 64
+    FLAGS.decode_batch_size = 32
+    FLAGS.decode_max_length = 97
+  def benchmark_2x2_tpu(self):
+    """Port of former snaggletooth transformer_big model on 2x2."""
+    self._setup()
+    self._set_df_common()
+    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu')
+    FLAGS.batch_size = 6144
+    self._run_and_report_benchmark(
+        total_batch_size=FLAGS.batch_size,
+        log_steps=FLAGS.log_steps)
+  @owner_utils.Owner('tf-graph-compiler')
+  def benchmark_2x2_tpu_mlir(self):
+    """Run transformer_big model on 2x2 with the MLIR Bridge enabled."""
+    self._setup()
+    self._set_df_common()
+    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mlir')
+    FLAGS.batch_size = 6144
+    tf.config.experimental.enable_mlir_bridge()
+    self._run_and_report_benchmark(
+        total_batch_size=FLAGS.batch_size,
+        log_steps=FLAGS.log_steps)
+  def benchmark_4x4_tpu(self):
+    """Port of former GCP transformer_big model on 4x4."""
+    self._setup()
+    self._set_df_common()
+    FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu')
+    FLAGS.batch_size = 24576
+    self._run_and_report_benchmark(
+        total_batch_size=FLAGS.batch_size,
+        log_steps=FLAGS.log_steps)
+  @owner_utils.Owner('tf-graph-compiler')
+  def benchmark_4x4_tpu_mlir(self):
+    """Run transformer_big model on 4x4 with the MLIR Bridge enabled."""
+    self._setup()
+    self._set_df_common()
+    FLAGS.model_dir = self._get_model_dir('benchmark_4x4_tpu_mlir')
+    FLAGS.batch_size = 24576
+    tf.config.experimental.enable_mlir_bridge()
+    self._run_and_report_benchmark(
+        total_batch_size=FLAGS.batch_size,
+        log_steps=FLAGS.log_steps)
+if __name__ == '__main__':
+  tf.test.main()
--- a/models-2.13.1/official/common/__init__.py
+++ b/models-2.13.1/official/common/__init__.py
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
--- a/models-2.13.1/official/common/dataset_fn.py
+++ b/models-2.13.1/official/common/dataset_fn.py
+# Copyright 2023 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Utility library for picking an appropriate dataset function."""
+import functools
+from typing import Any, Callable, Type, Union
+import tensorflow as tf
+PossibleDatasetType = Union[Type[tf.data.Dataset], Callable[[tf.Tensor], Any]]
+def pick_dataset_fn(file_type: str) -> PossibleDatasetType:
+  if file_type == 'tfrecord':
+    return tf.data.TFRecordDataset
+  if file_type == 'tfrecord_compressed':
+    return functools.partial(tf.data.TFRecordDataset, compression_type='GZIP')
+  raise ValueError('Unrecognized file_type: {}'.format(file_type))