Commit ef7b1dac authored by Jing Li's avatar Jing Li Committed by A. Unique TensorFlower
Browse files

Internal change

PiperOrigin-RevId: 309586836
parent c627506f
...@@ -31,6 +31,7 @@ import tensorflow as tf ...@@ -31,6 +31,7 @@ import tensorflow as tf
# pylint: enable=g-bad-import-order # pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.benchmark import owner_utils
from official.nlp.bert import configs from official.nlp.bert import configs
from official.nlp.bert import run_classifier from official.nlp.bert import run_classifier
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
...@@ -55,7 +56,6 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase): ...@@ -55,7 +56,6 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
super(BertClassifyBenchmarkBase, self).__init__(output_dir) super(BertClassifyBenchmarkBase, self).__init__(output_dir)
self.num_epochs = None self.num_epochs = None
self.num_steps_per_epoch = None self.num_steps_per_epoch = None
self.tpu = tpu
FLAGS.steps_per_loop = 50 FLAGS.steps_per_loop = 50
@flagsaver.flagsaver @flagsaver.flagsaver
...@@ -74,9 +74,9 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase): ...@@ -74,9 +74,9 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
warmup_steps = int(epochs * steps_per_epoch * 0.1) warmup_steps = int(epochs * steps_per_epoch * 0.1)
eval_steps = int( eval_steps = int(
math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size)) math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
if self.tpu: if self.default_flags['tpu']:
strategy = distribution_utils.get_distribution_strategy( strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='tpu', tpu_address=self.tpu) distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
else: else:
strategy = distribution_utils.get_distribution_strategy( strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored' if use_ds else 'off', distribution_strategy='mirrored' if use_ds else 'off',
...@@ -211,6 +211,7 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase): ...@@ -211,6 +211,7 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False) self._run_and_report_benchmark(summary_path, use_ds=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_mrpc(self): def benchmark_8_gpu_mrpc(self):
"""Test BERT model performance with 8 GPUs.""" """Test BERT model performance with 8 GPUs."""
...@@ -264,6 +265,7 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase): ...@@ -264,6 +265,7 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False) self._run_and_report_benchmark(summary_path, use_ds=False)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_mrpc(self): def benchmark_2x2_tpu_mrpc(self):
"""Test BERT model performance with 2x2 TPU.""" """Test BERT model performance with 2x2 TPU."""
...@@ -289,14 +291,14 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase): ...@@ -289,14 +291,14 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
`benchmark_(number of gpus)_gpu_(dataset type)` format. `benchmark_(number of gpus)_gpu_(dataset type)` format.
""" """
def __init__(self, output_dir=TMP_DIR, **kwargs): def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
self.bert_config_file = MODEL_CONFIG_FILE_PATH self.bert_config_file = MODEL_CONFIG_FILE_PATH
self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
super(BertClassifyAccuracy, self).__init__(output_dir=output_dir) super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
@benchmark_wrappers.enable_runtime_flags @benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, def _run_and_report_benchmark(self,
...@@ -326,6 +328,7 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase): ...@@ -326,6 +328,7 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
FLAGS.bert_config_file = self.bert_config_file FLAGS.bert_config_file = self.bert_config_file
FLAGS.init_checkpoint = self.pretrained_checkpoint_path FLAGS.init_checkpoint = self.pretrained_checkpoint_path
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_mrpc(self): def benchmark_8_gpu_mrpc(self):
"""Run BERT model accuracy test with 8 GPUs. """Run BERT model accuracy test with 8 GPUs.
...@@ -349,6 +352,16 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase): ...@@ -349,6 +352,16 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path) self._run_and_report_benchmark(summary_path)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_mrpc(self):
"""Run BERT model accuracy test on 2x2 TPU."""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -28,6 +28,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order ...@@ -28,6 +28,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
from official.benchmark import benchmark_wrappers from official.benchmark import benchmark_wrappers
from official.benchmark import bert_benchmark_utils from official.benchmark import bert_benchmark_utils
from official.benchmark import owner_utils
from official.nlp.bert import run_pretraining from official.nlp.bert import run_pretraining
from official.utils.flags import core as flags_core from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
...@@ -64,10 +65,10 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase): ...@@ -64,10 +65,10 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
output_dir=output_dir, tpu=tpu, **kwargs) output_dir=output_dir, tpu=tpu, **kwargs)
@benchmark_wrappers.enable_runtime_flags @benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, summary_path: str): def _run_and_report_benchmark(self, summary_path: str, report_accuracy: bool):
"""Runs and reports the benchmark given the provided configuration.""" """Runs and reports the benchmark given the provided configuration."""
distribution = distribution_utils.get_distribution_strategy( distribution = distribution_utils.get_distribution_strategy(
distribution_strategy='tpu', tpu_address=FLAGS.tpu) distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
logging.info('Flags: %s', flags_core.get_nondefault_flags_as_str()) logging.info('Flags: %s', flags_core.get_nondefault_flags_as_str())
start_time_sec = time.time() start_time_sec = time.time()
run_pretraining.run_bert_pretrain( run_pretraining.run_bert_pretrain(
...@@ -76,32 +77,36 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase): ...@@ -76,32 +77,36 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
with tf.io.gfile.GFile(summary_path, 'rb') as reader: with tf.io.gfile.GFile(summary_path, 'rb') as reader:
summary = json.loads(reader.read().decode('utf-8')) summary = json.loads(reader.read().decode('utf-8'))
self._report_benchmark(summary, start_time_sec, wall_time_sec) self._report_benchmark(summary, start_time_sec, wall_time_sec,
report_accuracy)
def _report_benchmark(self, summary, start_time_sec, wall_time_sec): def _report_benchmark(self, summary, start_time_sec, wall_time_sec,
report_accuracy):
metrics = [{ metrics = [{
'name': 'train_loss', 'name': 'train_loss',
'value': summary['train_loss'], 'value': summary['train_loss'],
}, { }, {
'name': 'name':
'example_per_second', 'exp_per_second',
'value': 'value':
self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size * self.timer_callback.get_examples_per_sec(FLAGS.train_batch_size *
FLAGS.steps_per_loop) FLAGS.steps_per_loop)
}, { }, {
'name': 'startup_time', 'name': 'startup_time',
'value': self.timer_callback.get_startup_time(start_time_sec) 'value': self.timer_callback.get_startup_time(start_time_sec)
}, {
'name': 'masked_lm_accuracy',
'value': summary['masked_lm_accuracy'],
'min_value': MIN_MLM_ACCURACY,
'max_value': MAX_MLM_ACCURACY,
}, {
'name': 'next_sentence_accuracy',
'value': summary['next_sentence_accuracy'],
'min_value': MIN_NSP_ACCURACY,
'max_value': MAX_NSP_ACCURACY,
}] }]
if report_accuracy:
metrics.extend([{
'name': 'masked_lm_accuracy',
'value': summary['masked_lm_accuracy'],
'min_value': MIN_MLM_ACCURACY,
'max_value': MAX_MLM_ACCURACY,
}, {
'name': 'next_sentence_accuracy',
'value': summary['next_sentence_accuracy'],
'min_value': MIN_NSP_ACCURACY,
'max_value': MAX_NSP_ACCURACY,
}])
self.report_benchmark( self.report_benchmark(
iters=summary['total_training_steps'], iters=summary['total_training_steps'],
wall_time=wall_time_sec, wall_time=wall_time_sec,
...@@ -120,7 +125,8 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase): ...@@ -120,7 +125,8 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
FLAGS.max_predictions_per_seq = 20 FLAGS.max_predictions_per_seq = 20
FLAGS.dtype = 'bf16' FLAGS.dtype = 'bf16'
def benchmark_8x8_tpu_bf16_seq128_1m_steps(self): @owner_utils.Owner('tf-model-garden')
def benchmark_accuracy_8x8_tpu_bf16_seq128_1m_steps(self):
"""Test bert pretraining with 8x8 TPU for 1 million steps.""" """Test bert pretraining with 8x8 TPU for 1 million steps."""
# This is used for accuracy test. # This is used for accuracy test.
self._setup() self._setup()
...@@ -128,23 +134,26 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase): ...@@ -128,23 +134,26 @@ class BertPretrainAccuracyBenchmark(bert_benchmark_utils.BertBenchmarkBase):
FLAGS.num_steps_per_epoch = 250000 FLAGS.num_steps_per_epoch = 250000
FLAGS.num_train_epochs = 4 FLAGS.num_train_epochs = 4
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_8x8_tpu_bf16_seq128_1m_steps') 'benchmark_accuracy_8x8_tpu_bf16_seq128_1m_steps')
summary_path = os.path.join(FLAGS.model_dir, summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path=summary_path) self._run_and_report_benchmark(summary_path=summary_path,
report_accuracy=True)
def benchmark_4x4_tpu_bf16_seq128_1k_steps(self): @owner_utils.Owner('tf-model-garden')
"""Test bert pretraining with 4x4 TPU for 1000 steps.""" def benchmark_perf_8x8_tpu_bf16_seq128_10k_steps(self):
# This is used for througput test. """Test bert pretraining with 8x8 TPU for 10000 steps."""
self._setup() self._setup()
self._specify_common_flags() self._specify_common_flags()
FLAGS.num_steps_per_epoch = 1000 FLAGS.num_steps_per_epoch = 5000
FLAGS.num_train_epochs = 1 FLAGS.num_train_epochs = 2
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_4x4_tpu_bf16_seq128_1k_steps') 'benchmark_perf_8x8_tpu_bf16_seq128_10k_steps')
summary_path = os.path.join(FLAGS.model_dir, summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path=summary_path) # Disable accuracy check.
self._run_and_report_benchmark(summary_path=summary_path,
report_accuracy=False)
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -30,6 +30,7 @@ import tensorflow as tf ...@@ -30,6 +30,7 @@ import tensorflow as tf
# pylint: enable=g-bad-import-order # pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.benchmark import owner_utils
from official.nlp.bert import run_squad from official.nlp.bert import run_squad
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
...@@ -79,9 +80,9 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase): ...@@ -79,9 +80,9 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
Returns: Returns:
A `tf.distribute.DistibutionStrategy` object. A `tf.distribute.DistibutionStrategy` object.
""" """
if FLAGS.tpu or ds_type == 'tpu': if self.default_flags['tpu'] or ds_type == 'tpu':
return distribution_utils.get_distribution_strategy( return distribution_utils.get_distribution_strategy(
distribution_strategy='tpu', tpu_address=FLAGS.tpu) distribution_strategy='tpu', tpu_address=self.default_flags['tpu'])
elif ds_type == 'multi_worker_mirrored': elif ds_type == 'multi_worker_mirrored':
# Configures cluster spec for multi-worker distribution strategy. # Configures cluster spec for multi-worker distribution strategy.
_ = distribution_utils.configure_cluster(FLAGS.worker_hosts, _ = distribution_utils.configure_cluster(FLAGS.worker_hosts,
...@@ -225,26 +226,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -225,26 +226,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark(ds_type='off', run_eagerly=True) self._run_and_report_benchmark(ds_type='off', run_eagerly=True)
def benchmark_2_gpu(self): @owner_utils.Owner('tf-model-garden')
"""Tests BERT SQuAD model performance with 2 GPUs."""
self._setup()
self.num_gpus = 2
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad')
FLAGS.train_batch_size = 8
self._run_and_report_benchmark()
def benchmark_4_gpu(self):
"""Tests BERT SQuAD model performance with 4 GPUs."""
self._setup()
self.num_gpus = 4
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad')
FLAGS.train_batch_size = 16
self._run_and_report_benchmark()
def benchmark_8_gpu(self): def benchmark_8_gpu(self):
"""Tests BERT SQuAD model performance with 8 GPUs.""" """Tests BERT SQuAD model performance with 8 GPUs."""
...@@ -293,30 +275,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -293,30 +275,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_2_gpu_fp16(self):
"""Tests BERT SQuAD model performance with 2 GPUs and FP16."""
self._setup()
self.num_gpus = 2
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad_fp16')
FLAGS.train_batch_size = 8
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 'dynamic'
self._run_and_report_benchmark()
def benchmark_4_gpu_fp16(self):
"""Tests BERT SQuAD model performance with 4 GPUs and FP16."""
self._setup()
self.num_gpus = 4
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad_fp16')
FLAGS.train_batch_size = 16
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 'dynamic'
self._run_and_report_benchmark()
def benchmark_8_gpu_fp16(self): def benchmark_8_gpu_fp16(self):
"""Tests BERT SQuAD model performance with 8 GPUs.""" """Tests BERT SQuAD model performance with 8 GPUs."""
...@@ -355,18 +313,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -355,18 +313,6 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_4_gpu_amp(self):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
self._setup()
self.num_gpus = 4
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_amp_squad')
FLAGS.train_batch_size = 16
FLAGS.dtype = 'fp16'
FLAGS.fp16_implementation = 'graph_rewrite'
self._run_and_report_benchmark()
def benchmark_8_gpu_amp(self): def benchmark_8_gpu_amp(self):
"""Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision.""" """Tests BERT SQuAD model performance with 1 GPU with automatic mixed precision."""
...@@ -380,6 +326,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -380,6 +326,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu(self): def benchmark_2x2_tpu(self):
"""Tests BERT SQuAD model performance with 2x2 TPU.""" """Tests BERT SQuAD model performance with 2x2 TPU."""
...@@ -449,6 +396,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase): ...@@ -449,6 +396,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self._run_and_report_benchmark(ds_type='off', run_eagerly=True) self._run_and_report_benchmark(ds_type='off', run_eagerly=True)
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu(self): def benchmark_8_gpu(self):
"""Tests BERT SQuAD model accuracy with 8 GPUs.""" """Tests BERT SQuAD model accuracy with 8 GPUs."""
...@@ -485,6 +433,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase): ...@@ -485,6 +433,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu(self): def benchmark_2x2_tpu(self):
"""Tests BERT SQuAD model accuracy with 2x2 TPU.""" """Tests BERT SQuAD model accuracy with 2x2 TPU."""
......
...@@ -26,6 +26,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order ...@@ -26,6 +26,7 @@ import tensorflow as tf # pylint: disable=g-bad-import-order
from official.benchmark import benchmark_wrappers from official.benchmark import benchmark_wrappers
from official.benchmark import keras_benchmark from official.benchmark import keras_benchmark
from official.benchmark import owner_utils
from official.vision.segmentation import unet_main as unet_training_lib from official.vision.segmentation import unet_main as unet_training_lib
from official.vision.segmentation import unet_model as unet_model_lib from official.vision.segmentation import unet_model as unet_model_lib
...@@ -119,6 +120,7 @@ class Unet3DAccuracyBenchmark(keras_benchmark.KerasBenchmark): ...@@ -119,6 +120,7 @@ class Unet3DAccuracyBenchmark(keras_benchmark.KerasBenchmark):
def _get_model_dir(self, folder_name): def _get_model_dir(self, folder_name):
return os.path.join(self.output_dir, folder_name) return os.path.join(self.output_dir, folder_name)
@owner_utils.Owner('tf-model-garden')
def benchmark_4x4_tpu_bf16(self): def benchmark_4x4_tpu_bf16(self):
"""Test Keras model with 4x4 TPU, fp16.""" """Test Keras model with 4x4 TPU, fp16."""
experiment_name = 'benchmark_4x4_tpu_fp16' experiment_name = 'benchmark_4x4_tpu_fp16'
......
...@@ -29,6 +29,7 @@ import tensorflow as tf ...@@ -29,6 +29,7 @@ import tensorflow as tf
# pylint: enable=g-bad-import-order # pylint: enable=g-bad-import-order
from official.benchmark import bert_benchmark_utils as benchmark_utils from official.benchmark import bert_benchmark_utils as benchmark_utils
from official.benchmark import owner_utils
from official.nlp.xlnet import run_classifier from official.nlp.xlnet import run_classifier
from official.nlp.xlnet import run_squad from official.nlp.xlnet import run_squad
from official.benchmark import benchmark_wrappers from official.benchmark import benchmark_wrappers
...@@ -47,8 +48,8 @@ FLAGS = flags.FLAGS ...@@ -47,8 +48,8 @@ FLAGS = flags.FLAGS
class XLNetBenchmarkBase(benchmark_utils.BertBenchmarkBase): class XLNetBenchmarkBase(benchmark_utils.BertBenchmarkBase):
"""Base class to hold methods common to test classes in the module.""" """Base class to hold methods common to test classes in the module."""
def __init__(self, output_dir=None): def __init__(self, output_dir=None, tpu=None):
super(XLNetBenchmarkBase, self).__init__(output_dir) super(XLNetBenchmarkBase, self).__init__(output_dir=output_dir, tpu=tpu)
self.num_epochs = None self.num_epochs = None
self.num_steps_per_epoch = None self.num_steps_per_epoch = None
...@@ -71,12 +72,12 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase): ...@@ -71,12 +72,12 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
`benchmark_(number of gpus)_gpu_(dataset type)` format. `benchmark_(number of gpus)_gpu_(dataset type)` format.
""" """
def __init__(self, output_dir=None, **kwargs): def __init__(self, output_dir=None, tpu=None, **kwargs):
self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
super(XLNetClassifyAccuracy, self).__init__(output_dir=output_dir) super(XLNetClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
@benchmark_wrappers.enable_runtime_flags @benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, def _run_and_report_benchmark(self,
...@@ -123,6 +124,7 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase): ...@@ -123,6 +124,7 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
FLAGS.train_tfrecord_path = self.train_data_path FLAGS.train_tfrecord_path = self.train_data_path
FLAGS.test_tfrecord_path = self.eval_data_path FLAGS.test_tfrecord_path = self.eval_data_path
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_imdb(self): def benchmark_8_gpu_imdb(self):
"""Run XLNet model accuracy test with 8 GPUs.""" """Run XLNet model accuracy test with 8 GPUs."""
self._setup() self._setup()
...@@ -134,6 +136,19 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase): ...@@ -134,6 +136,19 @@ class XLNetClassifyAccuracy(XLNetBenchmarkBase):
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path) self._run_and_report_benchmark(summary_path)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_imdb(self):
"""Run XLNet model accuracy test on 2x2 tpu."""
self._setup()
FLAGS.strategy_type = 'tpu'
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_imdb')
# Sets timer_callback to None as we do not use it now.
self.timer_callback = None
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
class XLNetSquadAccuracy(XLNetBenchmarkBase): class XLNetSquadAccuracy(XLNetBenchmarkBase):
"""Short accuracy test for XLNet squad model. """Short accuracy test for XLNet squad model.
...@@ -143,14 +158,14 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase): ...@@ -143,14 +158,14 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
`benchmark_(number of gpus)_gpu_(dataset type)` format. `benchmark_(number of gpus)_gpu_(dataset type)` format.
""" """
def __init__(self, output_dir=None, **kwargs): def __init__(self, output_dir=None, tpu=None, **kwargs):
self.train_data_path = SQUAD_DATA_PATH self.train_data_path = SQUAD_DATA_PATH
self.predict_file = os.path.join(SQUAD_DATA_PATH, "dev-v2.0.json") self.predict_file = os.path.join(SQUAD_DATA_PATH, "dev-v2.0.json")
self.test_data_path = os.path.join(SQUAD_DATA_PATH, "12048.eval.tf_record") self.test_data_path = os.path.join(SQUAD_DATA_PATH, "12048.eval.tf_record")
self.spiece_model_file = os.path.join(SQUAD_DATA_PATH, "spiece.cased.model") self.spiece_model_file = os.path.join(SQUAD_DATA_PATH, "spiece.cased.model")
self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
super(XLNetSquadAccuracy, self).__init__(output_dir=output_dir) super(XLNetSquadAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
@benchmark_wrappers.enable_runtime_flags @benchmark_wrappers.enable_runtime_flags
def _run_and_report_benchmark(self, def _run_and_report_benchmark(self,
...@@ -196,9 +211,10 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase): ...@@ -196,9 +211,10 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
FLAGS.test_tfrecord_path = self.test_data_path FLAGS.test_tfrecord_path = self.test_data_path
FLAGS.spiece_model_file = self.spiece_model_file FLAGS.spiece_model_file = self.spiece_model_file
FLAGS.predict_file = self.predict_file FLAGS.predict_file = self.predict_file
FLAGS.adam_epsilon=1e-6 FLAGS.adam_epsilon = 1e-6
FLAGS.lr_layer_decay_rate=0.75 FLAGS.lr_layer_decay_rate = 0.75
@owner_utils.Owner('tf-model-garden')
def benchmark_8_gpu_squadv2(self): def benchmark_8_gpu_squadv2(self):
"""Run XLNet model squad v2 accuracy test with 8 GPUs.""" """Run XLNet model squad v2 accuracy test with 8 GPUs."""
self._setup() self._setup()
...@@ -211,6 +227,20 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase): ...@@ -211,6 +227,20 @@ class XLNetSquadAccuracy(XLNetBenchmarkBase):
'summaries/training_summary.txt') 'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path) self._run_and_report_benchmark(summary_path)
@owner_utils.Owner('tf-model-garden')
def benchmark_2x2_tpu_squadv2(self):
"""Run XLNet model squad v2 accuracy test on 2x2 tpu."""
self._setup()
FLAGS.strategy_type = 'tpu'
FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_squadv2')
FLAGS.predict_dir = FLAGS.model_dir
# Sets timer_callback to None as we do not use it now.
self.timer_callback = None
summary_path = os.path.join(FLAGS.model_dir,
'summaries/training_summary.txt')
self._run_and_report_benchmark(summary_path)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment