Internal change

PiperOrigin-RevId: 309586836

Internal change
PiperOrigin-RevId: 309586836
ef7b1dac · Jing Li · A. Unique TensorFlower · c627506f · ef7b1dac · ef7b1dac
Commit ef7b1dac authored May 02, 2020 by Jing Li Committed by A. Unique TensorFlower May 02, 2020
5 changed files
--- a/official/benchmark/bert_benchmark.py
+++ b/official/benchmark/bert_benchmark.py
@@ -31,6 +31,7 @@ import tensorflow as tf
 # pylint: enable=g-bad-import-order
 from official.benchmark import bert_benchmark_utils as benchmark_utils
+from official.benchmark import owner_utils
 from official.nlp.bert import configs
 from official.nlp.bert import run_classifier
 from official.utils.misc import distribution_utils
@@ -55,7 +56,6 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
    super(BertClassifyBenchmarkBase, self).__init__(output_dir)
    self.num_epochs = None
    self.num_steps_per_epoch = None
-    self.tpu = tpu
    FLAGS.steps_per_loop = 50
  @flagsaver.flagsaver
@@ -74,9 +74,9 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
    warmup_steps = int(epochs * steps_per_epoch * 0.1)
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
-    if self.tpu:
+    if self.default_flags['tpu']:
      strategy = distribution_utils.get_distribution_strategy(
-          distribution_strategy='tpu', tpu_address=self.tpu)
+          distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
    else:
      strategy = distribution_utils.get_distribution_strategy(
          distribution_strategy='mirrored' if use_ds else 'off',
@@ -211,6 +211,7 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_8_gpu_mrpc(self):
    """Test BERT model performance with 8 GPUs."""
@@ -264,6 +265,7 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu_mrpc(self):
    """Test BERT model performance with 2x2 TPU."""
@@ -289,14 +291,14 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
-  def __init__(self, output_dir=TMP_DIR, **kwargs):
+  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
-    super(BertClassifyAccuracy, self).__init__(output_dir=output_dir)
+    super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
  @benchmark_wrappers.enable_runtime_flags
  def _run_and_report_benchmark(self,
@@ -326,6 +328,7 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.init_checkpoint = self.pretrained_checkpoint_path
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_8_gpu_mrpc(self):
    """Run BERT model accuracy test with 8 GPUs.
@@ -349,6 +352,16 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path)
+  @owner_utils.Owner('tf-model-garden')
+  def benchmark_2x2_tpu_mrpc(self):
+    """Run BERT model accuracy test on 2x2 TPU."""
+    self._setup()
+    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
+    summary_path = os.path.join(FLAGS.model_dir,
+                                'summaries/training_summary.txt')
+    self._run_and_report_benchmark(summary_path)
 if __name__ == '__main__':
  tf.test.main()
--- a/official/benchmark/bert_pretrain_benchmark.py
+++ b/official/benchmark/bert_pretrain_benchmark.py
@@ -28,6 +28,7 @@ import tensorflow as tf  # pylint: disable=g-bad-import-order
 from official.benchmark import benchmark_wrappers
 from official.benchmark import bert_benchmark_utils
+from official.benchmark import owner_utils
 from official.nlp.bert import run_pretraining
 from official.utils.flags import core as flags_core
 from official.utils.misc import distribution_utils
@@ -64,10 +65,10 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
        output_dir=output_dir, tpu=tpu, **kwargs)
  @benchmark_wrappers.enable_runtime_flags
-  def _run_and_report_benchmark(self, summary_path: str):
+  def _run_and_report_benchmark(self, summary_path: str, report_accuracy: bool):
    """Runs and reports the benchmark given the provided configuration."""
    distribution = distribution_utils.get_distribution_strategy(
-        distribution_strategy='tpu', tpu_address=FLAGS.tpu)
+        distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
    logging.info('Flags: %s', flags_core.get_nondefault_flags_as_str())
    start_time_sec = time.time()
    run_pretraining.run_bert_pretrain(
@@ -76,32 +77,36 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
    with tf.io.gfile.GFile(summary_path, 'rb') as reader:
      summary = json.loads(reader.read().decode('utf-8'))
-    self._report_benchmark(summary, start_time_sec, wall_time_sec)
+    self._report_benchmark(summary, start_time_sec, wall_time_sec,
+                           report_accuracy)
-  def _report_benchmark(self, summary, start_time_sec, wall_time_sec):
+  def _report_benchmark(self, summary, start_time_sec, wall_time_sec,
+                        report_accuracy):
    metrics = [{
        'name': 'train_loss',
        'value': summary['train_loss'],
    }, {
        'name':
-            'example_per_second',
+            'exp_per_second',
        'value':
            self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size *
                                                     FLAGS.steps_per_loop)
    }, {
        'name': 'startup_time',
        'value': self.timer_callback.get_startup_time(start_time_sec)
-    }, {
-        'name': 'masked_lm_accuracy',
-        'value': summary['masked_lm_accuracy'],
-        'min_value': MIN_MLM_ACCURACY,
-        'max_value': MAX_MLM_ACCURACY,
-    }, {
-        'name': 'next_sentence_accuracy',
-        'value': summary['next_sentence_accuracy'],
-        'min_value': MIN_NSP_ACCURACY,
-        'max_value': MAX_NSP_ACCURACY,
    }]
+    if report_accuracy:
+      metrics.extend([{
+          'name': 'masked_lm_accuracy',
+          'value': summary['masked_lm_accuracy'],
+          'min_value': MIN_MLM_ACCURACY,
+          'max_value': MAX_MLM_ACCURACY,
+      }, {
+          'name': 'next_sentence_accuracy',
+          'value': summary['next_sentence_accuracy'],
+          'min_value': MIN_NSP_ACCURACY,
+          'max_value': MAX_NSP_ACCURACY,
+      }])
    self.report_benchmark(
        iters=summary['total_training_steps'],
        wall_time=wall_time_sec,
@@ -120,7 +125,8 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
    FLAGS.max_predictions_per_seq = 20
    FLAGS.dtype = 'bf16'
-  def benchmark_8x8_tpu_bf16_seq128_1m_steps(self):
+  @owner_utils.Owner('tf-model-garden')
+  def benchmark_accuracy_8x8_tpu_bf16_seq128_1m_steps(self):
    """Test bert pretraining with 8x8 TPU for 1 million steps."""
    # This is used for accuracy test.
    self._setup()
@@ -128,23 +134,26 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
    FLAGS.num_steps_per_epoch = 250000
    FLAGS.num_train_epochs = 4
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_8x8_tpu_bf16_seq128_1m_steps')
+        'benchmark_accuracy_8x8_tpu_bf16_seq128_1m_steps')
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
-    self._run_and_report_benchmark(summary_path=summary_path)
+    self._run_and_report_benchmark(summary_path=summary_path,
+                                   report_accuracy=True)
-  def benchmark_4x4_tpu_bf16_seq128_1k_steps(self):
+  @owner_utils.Owner('tf-model-garden')
-    """Test bert pretraining with 4x4 TPU for 1000 steps."""
+  def benchmark_perf_8x8_tpu_bf16_seq128_10k_steps(self):
-    # This is used for througput test.
+    """Test bert pretraining with 8x8 TPU for 10000 steps."""
    self._setup()
    self._specify_common_flags()
-    FLAGS.num_steps_per_epoch = 1000
+    FLAGS.num_steps_per_epoch = 5000
-    FLAGS.num_train_epochs = 1
+    FLAGS.num_train_epochs = 2
    FLAGS.model_dir = self._get_model_dir(
-        'benchmark_4x4_tpu_bf16_seq128_1k_steps')
+        'benchmark_perf_8x8_tpu_bf16_seq128_10k_steps')
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
-    self._run_and_report_benchmark(summary_path=summary_path)
+    # Disable accuracy check.
+    self._run_and_report_benchmark(summary_path=summary_path,
+                                   report_accuracy=False)
 if __name__ == '__main__':

--- a/official/benchmark/bert_squad_benchmark.py
+++ b/official/benchmark/bert_squad_benchmark.py
@@ -30,6 +30,7 @@ import tensorflow as tf
 # pylint: enable=g-bad-import-order
 from official.benchmark import bert_benchmark_utils as benchmark_utils
+from official.benchmark import owner_utils
 from official.nlp.bert import run_squad
 from official.utils.misc import distribution_utils
 from official.utils.misc import keras_utils
@@ -79,9 +80,9 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
    Returns:
      A `tf.distribute.DistibutionStrategy` object.
    """
-    if FLAGS.tpu or ds_type == 'tpu':
+    if self.default_flags['tpu'] or ds_type == 'tpu':
      return distribution_utils.get_distribution_strategy(
-          distribution_strategy='tpu', tpu_address=FLAGS.tpu)
+          distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
    elif ds_type == 'multi_worker_mirrored':
      # Configures cluster spec for multi-worker distribution strategy.
      _ = distribution_utils.configure_cluster(FLAGS.worker_hosts,
@@ -225,26 +226,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
    self._run_and_report_benchmark(ds_type='off', run_eagerly=True)
-  def benchmark_2_gpu(self):
+  @owner_utils.Owner('tf-model-garden')
-    """Tests BERT SQuAD model performance with 2 GPUs."""
-    self._setup()
-    self.num_gpus = 2
-    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad')
-    FLAGS.train_batch_size = 8
-    self._run_and_report_benchmark()
-  def benchmark_4_gpu(self):
-    """Tests BERT SQuAD model performance with 4 GPUs."""
-    self._setup()
-    self.num_gpus = 4
-    FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad')
-    FLAGS.train_batch_size = 16
-    self._run_and_report_benchmark()
  def benchmark_8_gpu(self):
    """Tests BERT SQuAD model performance with 8 GPUs."""
@@ -293,30 +275,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
    self._run_and_report_benchmark()
-  def benchmark_2_gpu_fp16(self):
-    """Tests BERT SQuAD model performance with 2 GPUs and FP16."""
-    self._setup()
-    self.num_gpus = 2
-    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad_fp16')
-    FLAGS.train_batch_size = 8
-    FLAGS.dtype = 'fp16'
-    FLAGS.loss_scale = 'dynamic'
-    self._run_and_report_benchmark()
-  def benchmark_4_gpu_fp16(self):
-    """Tests BERT SQuAD model performance with 4 GPUs and FP16."""
-    self._setup()
-    self.num_gpus = 4
-    FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad_fp16')
-    FLAGS.train_batch_size = 16
-    FLAGS.dtype = 'fp16'
-    FLAGS.loss_scale = 'dynamic'
-    self._run_and_report_benchmark()
  def benchmark_8_gpu_fp16(self):
    """Tests BERT SQuAD model performance with 8 GPUs."""
@@ -355,18 +313,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
    self._run_and_report_benchmark()
-  def benchmark_4_gpu_amp(self):
-    """Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
-    self._setup()
-    self.num_gpus = 4
-    FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_amp_squad')
-    FLAGS.train_batch_size = 16
-    FLAGS.dtype = 'fp16'
-    FLAGS.fp16_implementation = 'graph_rewrite'
-    self._run_and_report_benchmark()
  def benchmark_8_gpu_amp(self):
    """Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
@@ -380,6 +326,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
    self._run_and_report_benchmark()
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu(self):
    """Tests BERT SQuAD model performance with 2x2 TPU."""
@@ -449,6 +396,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
    self._run_and_report_benchmark(ds_type='off', run_eagerly=True)
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_8_gpu(self):
    """Tests BERT SQuAD model accuracy with 8 GPUs."""
@@ -485,6 +433,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
    self._run_and_report_benchmark()
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu(self):
    """Tests BERT SQuAD model accuracy with 2x2 TPU."""

--- a/official/benchmark/unet3d_benchmark.py
+++ b/official/benchmark/unet3d_benchmark.py
@@ -26,6 +26,7 @@ import tensorflow as tf  # pylint: disable=g-bad-import-order
 from official.benchmark import benchmark_wrappers
 from official.benchmark import keras_benchmark
+from official.benchmark import owner_utils
 from official.vision.segmentation import unet_main as unet_training_lib
 from official.vision.segmentation import unet_model as unet_model_lib
@@ -119,6 +120,7 @@ class Unet3DAccuracyBenchmark(keras_benchmark.KerasBenchmark):
  def _get_model_dir(self, folder_name):
    return os.path.join(self.output_dir, folder_name)
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_4x4_tpu_bf16(self):
    """Test Keras model with 4x4 TPU, fp16."""
    experiment_name = 'benchmark_4x4_tpu_fp16'

--- a/official/benchmark/xlnet_benchmark.py
+++ b/official/benchmark/xlnet_benchmark.py
@@ -29,6 +29,7 @@ import tensorflow as tf
 # pylint: enable=g-bad-import-order
 from official.benchmark import bert_benchmark_utils as benchmark_utils
+from official.benchmark import owner_utils
 from official.nlp.xlnet import run_classifier
 from official.nlp.xlnet import run_squad
 from official.benchmark import benchmark_wrappers
@@ -47,8 +48,8 @@ FLAGS = flags.FLAGS
 class XLNetBenchmarkBase(benchmark_utils.BertBenchmarkBase):
  """Base class to hold methods common to test classes in the module."""
-  def __init__(self, output_dir=None):
+  def __init__(self, output_dir=None, tpu=None):
-    super(XLNetBenchmarkBase, self).__init__(output_dir)
+    super(XLNetBenchmarkBase, self).__init__(output_dir=output_dir, tpu=tpu)
    self.num_epochs = None
    self.num_steps_per_epoch = None
@@ -71,12 +72,12 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
-  def __init__(self, output_dir=None, **kwargs):
+  def __init__(self, output_dir=None, tpu=None, **kwargs):
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
-    super(XLNetClassifyAccuracy, self).__init__(output_dir=output_dir)
+    super(XLNetClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
  @benchmark_wrappers.enable_runtime_flags
  def _run_and_report_benchmark(self,
@@ -123,6 +124,7 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
    FLAGS.train_tfrecord_path = self.train_data_path
    FLAGS.test_tfrecord_path = self.eval_data_path
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_8_gpu_imdb(self):
    """Run XLNet model accuracy test with 8 GPUs."""
    self._setup()
@@ -134,6 +136,19 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path)
+  @owner_utils.Owner('tf-model-garden')
+  def benchmark_2x2_tpu_imdb(self):
+    """Run XLNet model accuracy test on 2x2 tpu."""
+    self._setup()
+    FLAGS.strategy_type = 'tpu'
+    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_imdb')
+    # Sets timer_callback to None as we do not use it now.
+    self.timer_callback = None
+    summary_path = os.path.join(FLAGS.model_dir,
+                                'summaries/training_summary.txt')
+    self._run_and_report_benchmark(summary_path)
 class XLNetSquadAccuracy(XLNetBenchmarkBase):
  """Short accuracy test for XLNet squad model.
@@ -143,14 +158,14 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
-  def __init__(self, output_dir=None, **kwargs):
+  def __init__(self, output_dir=None, tpu=None, **kwargs):
    self.train_data_path = SQUAD_DATA_PATH
    self.predict_file = os.path.join(SQUAD_DATA_PATH, "dev-v2.0.json")
    self.test_data_path = os.path.join(SQUAD_DATA_PATH, "12048.eval.tf_record")
    self.spiece_model_file = os.path.join(SQUAD_DATA_PATH, "spiece.cased.model")
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
-    super(XLNetSquadAccuracy, self).__init__(output_dir=output_dir)
+    super(XLNetSquadAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
  @benchmark_wrappers.enable_runtime_flags
  def _run_and_report_benchmark(self,
@@ -196,9 +211,10 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
    FLAGS.test_tfrecord_path = self.test_data_path
    FLAGS.spiece_model_file = self.spiece_model_file
    FLAGS.predict_file = self.predict_file
-    FLAGS.adam_epsilon=1e-6
+    FLAGS.adam_epsilon = 1e-6
-    FLAGS.lr_layer_decay_rate=0.75
+    FLAGS.lr_layer_decay_rate = 0.75
+  @owner_utils.Owner('tf-model-garden')
  def benchmark_8_gpu_squadv2(self):
    """Run XLNet model squad v2 accuracy test with 8 GPUs."""
    self._setup()
@@ -211,6 +227,20 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path)
+  @owner_utils.Owner('tf-model-garden')
+  def benchmark_2x2_tpu_squadv2(self):
+    """Run XLNet model squad v2 accuracy test on 2x2 tpu."""
+    self._setup()
+    FLAGS.strategy_type = 'tpu'
+    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_squadv2')
+    FLAGS.predict_dir = FLAGS.model_dir
+    # Sets timer_callback to None as we do not use it now.
+    self.timer_callback = None
+    summary_path = os.path.join(FLAGS.model_dir,
+                                'summaries/training_summary.txt')
+    self._run_and_report_benchmark(summary_path)
 if __name__ == '__main__':
  tf.test.main()