Commit ef7b1dac authored by Jing Li's avatar Jing Li Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 309586836
parent c627506f
......@@ -31,6 +31,7 @@ import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.benchmark import owner_utils
from official.nlp.bert import configs
from official.nlp.bert import run_classifier
from official.utils.misc import distribution_utils
......@@ -55,7 +56,6 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
super(BertClassifyBenchmarkBase, self).__init__(output_dir)
self.num_epochs = None
self.num_steps_per_epoch = None
self.tpu = tpu
FLAGS.steps_per_loop = 50
@flagsaver.flagsaver
......@@ -74,9 +74,9 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
warmup_steps = int(epochs * steps_per_epoch * 0.1)
eval_steps = int(
math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
if self.tpu:
if self.default_flags['tpu']:
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='tpu', tpu_address=self.tpu)
distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
else:
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored' if use_ds else 'off',
......@@ -211,6 +211,7 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_mrpc(self):
"""Test BERT model performance with 8 GPUs."""
......@@ -264,6 +265,7 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_mrpc(self):
"""Test BERT model performance with 2x2 TPU."""
......@@ -289,14 +291,14 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
`benchmark_(number of gpus)_gpu_(dataset type)` format.
"""
def __init__(self, output_dir=TMP_DIR, **kwargs):
def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
self.bert_config_file = MODEL_CONFIG_FILE_PATH
self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
super(BertClassifyAccuracy, self).__init__(output_dir=output_dir)
super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self,
......@@ -326,6 +328,7 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
FLAGS.bert_config_file = self.bert_config_file
FLAGS.init_checkpoint = self.pretrained_checkpoint_path
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_mrpc(self):
"""Run BERT model accuracy test with 8 GPUs.
......@@ -349,6 +352,16 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_mrpc(self):
"""Run BERT model accuracy test on 2x2 TPU."""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
if __name__ == '__main__':
tf.test.main()
......@@ -28,6 +28,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
from official.benchmark import benchmark_wrappers
from official.benchmark import bert_benchmark_utils
from official.benchmark import owner_utils
from official.nlp.bert import run_pretraining
from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils
......@@ -64,10 +65,10 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
output_dir=output_dir, tpu=tpu, **kwargs)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, summary_path: str):
def _run_and_report_benchmark(self, summary_path: str, report_accuracy: bool):
"""Runs and reports the benchmark given the provided configuration."""
distribution = distribution_utils.get_distribution_strategy(
distribution_strategy='tpu', tpu_address=FLAGS.tpu)
distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
logging.info('Flags: %s', flags_core.get_nondefault_flags_as_str())
start_time_sec = time.time()
run_pretraining.run_bert_pretrain(
......@@ -76,22 +77,26 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
with tf.io.gfile.GFile(summary_path, 'rb') as reader:
summary = json.loads(reader.read().decode('utf-8'))
self._report_benchmark(summary, start_time_sec, wall_time_sec)
self._report_benchmark(summary, start_time_sec, wall_time_sec,
report_accuracy)
def _report_benchmark(self, summary, start_time_sec, wall_time_sec):
def _report_benchmark(self, summary, start_time_sec, wall_time_sec,
report_accuracy):
metrics = [{
'name': 'train_loss',
'value': summary['train_loss'],
}, {
'name':
'example_per_second',
'exp_per_second',
'value':
self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size *
FLAGS.steps_per_loop)
}, {
'name': 'startup_time',
'value': self.timer_callback.get_startup_time(start_time_sec)
}, {
}]
if report_accuracy:
metrics.extend([{
'name': 'masked_lm_accuracy',
'value': summary['masked_lm_accuracy'],
'min_value': MIN_MLM_ACCURACY,
......@@ -101,7 +106,7 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
'value': summary['next_sentence_accuracy'],
'min_value': MIN_NSP_ACCURACY,
'max_value': MAX_NSP_ACCURACY,
}]
}])
self.report_benchmark(
iters=summary['total_training_steps'],
wall_time=wall_time_sec,
......@@ -120,7 +125,8 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
FLAGS.max_predictions_per_seq = 20
FLAGS.dtype = 'bf16'
def benchmark_8x8_tpu_bf16_seq128_1m_steps(self):
@owner_utils.Owner('tf-model-garden')
def benchmark_accuracy_8x8_tpu_bf16_seq128_1m_steps(self):
"""Test bert pretraining with 8x8 TPU for 1 million steps."""
# This is used for accuracy test.
self._setup()
......@@ -128,23 +134,26 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
FLAGS.num_steps_per_epoch = 250000
FLAGS.num_train_epochs = 4
FLAGS.model_dir = self._get_model_dir(
'benchmark_8x8_tpu_bf16_seq128_1m_steps')
'benchmark_accuracy_8x8_tpu_bf16_seq128_1m_steps')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path=summary_path)
self._run_and_report_benchmark(summary_path=summary_path,
report_accuracy=True)
def benchmark_4x4_tpu_bf16_seq128_1k_steps(self):
"""Test bert pretraining with 4x4 TPU for 1000 steps."""
# This is used for througput test.
@owner_utils.Owner('tf-model-garden')
def benchmark_perf_8x8_tpu_bf16_seq128_10k_steps(self):
"""Test bert pretraining with 8x8 TPU for 10000 steps."""
self._setup()
self._specify_common_flags()
FLAGS.num_steps_per_epoch = 1000
FLAGS.num_train_epochs = 1
FLAGS.num_steps_per_epoch = 5000
FLAGS.num_train_epochs = 2
FLAGS.model_dir = self._get_model_dir(
'benchmark_4x4_tpu_bf16_seq128_1k_steps')
'benchmark_perf_8x8_tpu_bf16_seq128_10k_steps')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path=summary_path)
# Disable accuracy check.
self._run_and_report_benchmark(summary_path=summary_path,
report_accuracy=False)
if __name__ == '__main__':
......
......@@ -30,6 +30,7 @@ import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.benchmark import owner_utils
from official.nlp.bert import run_squad
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
......@@ -79,9 +80,9 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
Returns:
A `tf.distribute.DistibutionStrategy` object.
"""
if FLAGS.tpu or ds_type == 'tpu':
if self.default_flags['tpu'] or ds_type == 'tpu':
return distribution_utils.get_distribution_strategy(
distribution_strategy='tpu', tpu_address=FLAGS.tpu)
distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
elif ds_type == 'multi_worker_mirrored':
# Configures cluster spec for multi-worker distribution strategy.
_ = distribution_utils.configure_cluster(FLAGS.worker_hosts,
......@@ -225,26 +226,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark(ds_type='off', run_eagerly=True)
def benchmark_2_gpu(self):
"""Tests BERT SQuAD model performance with 2 GPUs."""
self._setup()
self.num_gpus = 2
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad')
FLAGS.train_batch_size = 8
self._run_and_report_benchmark()
def benchmark_4_gpu(self):
"""Tests BERT SQuAD model performance with 4 GPUs."""
self._setup()
self.num_gpus = 4
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad')
FLAGS.train_batch_size = 16
self._run_and_report_benchmark()
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu(self):
"""Tests BERT SQuAD model performance with 8 GPUs."""
......@@ -293,30 +275,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_2_gpu_fp16(self):
"""Tests BERT SQuAD model performance with 2 GPUs and FP16."""
self._setup()
self.num_gpus = 2
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad_fp16')
FLAGS.train_batch_size = 8
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 'dynamic'
self._run_and_report_benchmark()
def benchmark_4_gpu_fp16(self):
"""Tests BERT SQuAD model performance with 4 GPUs and FP16."""
self._setup()
self.num_gpus = 4
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad_fp16')
FLAGS.train_batch_size = 16
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 'dynamic'
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16(self):
"""Tests BERT SQuAD model performance with 8 GPUs."""
......@@ -355,18 +313,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_4_gpu_amp(self):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
self._setup()
self.num_gpus = 4
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_amp_squad')
FLAGS.train_batch_size = 16
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
......@@ -380,6 +326,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark()
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu(self):
"""Tests BERT SQuAD model performance with 2x2 TPU."""
......@@ -449,6 +396,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self._run_and_report_benchmark(ds_type='off', run_eagerly=True)
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu(self):
"""Tests BERT SQuAD model accuracy with 8 GPUs."""
......@@ -485,6 +433,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self._run_and_report_benchmark()
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu(self):
"""Tests BERT SQuAD model accuracy with 2x2 TPU."""
......
......@@ -26,6 +26,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
from official.benchmark import benchmark_wrappers
from official.benchmark import keras_benchmark
from official.benchmark import owner_utils
from official.vision.segmentation import unet_main as unet_training_lib
from official.vision.segmentation import unet_model as unet_model_lib
......@@ -119,6 +120,7 @@ class Unet3DAccuracyBenchmark(keras_benchmark.KerasBenchmark):
def _get_model_dir(self, folder_name):
return os.path.join(self.output_dir, folder_name)
@owner_utils.Owner('tf-model-garden')
def benchmark_4x4_tpu_bf16(self):
"""Test Keras model with 4x4 TPU, fp16."""
experiment_name = 'benchmark_4x4_tpu_fp16'
......
......@@ -29,6 +29,7 @@ import tensorflow as tf
# pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.benchmark import owner_utils
from official.nlp.xlnet import run_classifier
from official.nlp.xlnet import run_squad
from official.benchmark import benchmark_wrappers
......@@ -47,8 +48,8 @@ FLAGS = flags.FLAGS
class XLNetBenchmarkBase(benchmark_utils.BertBenchmarkBase):
"""Base class to hold methods common to test classes in the module."""
def __init__(self, output_dir=None):
super(XLNetBenchmarkBase, self).__init__(output_dir)
def __init__(self, output_dir=None, tpu=None):
super(XLNetBenchmarkBase, self).__init__(output_dir=output_dir, tpu=tpu)
self.num_epochs = None
self.num_steps_per_epoch = None
......@@ -71,12 +72,12 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
`benchmark_(number of gpus)_gpu_(dataset type)` format.
"""
def __init__(self, output_dir=None, **kwargs):
def __init__(self, output_dir=None, tpu=None, **kwargs):
self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
super(XLNetClassifyAccuracy, self).__init__(output_dir=output_dir)
super(XLNetClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self,
......@@ -123,6 +124,7 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
FLAGS.train_tfrecord_path = self.train_data_path
FLAGS.test_tfrecord_path = self.eval_data_path
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_imdb(self):
"""Run XLNet model accuracy test with 8 GPUs."""
self._setup()
......@@ -134,6 +136,19 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_imdb(self):
"""Run XLNet model accuracy test on 2x2 tpu."""
self._setup()
FLAGS.strategy_type = 'tpu'
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_imdb')
# Sets timer_callback to None as we do not use it now.
self.timer_callback = None
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
class XLNetSquadAccuracy(XLNetBenchmarkBase):
"""Short accuracy test for XLNet squad model.
......@@ -143,14 +158,14 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
`benchmark_(number of gpus)_gpu_(dataset type)` format.
"""
def __init__(self, output_dir=None, **kwargs):
def __init__(self, output_dir=None, tpu=None, **kwargs):
self.train_data_path = SQUAD_DATA_PATH
self.predict_file = os.path.join(SQUAD_DATA_PATH, "dev-v2.0.json")
self.test_data_path = os.path.join(SQUAD_DATA_PATH, "12048.eval.tf_record")
self.spiece_model_file = os.path.join(SQUAD_DATA_PATH, "spiece.cased.model")
self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
super(XLNetSquadAccuracy, self).__init__(output_dir=output_dir)
super(XLNetSquadAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
@benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self,
......@@ -196,9 +211,10 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
FLAGS.test_tfrecord_path = self.test_data_path
FLAGS.spiece_model_file = self.spiece_model_file
FLAGS.predict_file = self.predict_file
FLAGS.adam_epsilon=1e-6
FLAGS.lr_layer_decay_rate=0.75
FLAGS.adam_epsilon = 1e-6
FLAGS.lr_layer_decay_rate = 0.75
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_squadv2(self):
"""Run XLNet model squad v2 accuracy test with 8 GPUs."""
self._setup()
......@@ -211,6 +227,20 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_squadv2(self):
"""Run XLNet model squad v2 accuracy test on 2x2 tpu."""
self._setup()
FLAGS.strategy_type = 'tpu'
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_squadv2')
FLAGS.predict_dir = FLAGS.model_dir
# Sets timer_callback to None as we do not use it now.
self.timer_callback = None
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
if __name__ == '__main__':
tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment