Unverified Commit 53e3adb8 authored by Hongkun Yu's avatar Hongkun Yu Committed by GitHub
Browse files

Merged commit includes the following changes: (#7301)

259889221  by hongkuny<hongkuny@google.com>:

    Add no ds / xla / eager perfzero tests

--

PiperOrigin-RevId: 259889221
parent 3c5330d8
...@@ -33,6 +33,7 @@ from official.bert import modeling ...@@ -33,6 +33,7 @@ from official.bert import modeling
from official.bert import run_classifier from official.bert import run_classifier
from official.bert.benchmark import benchmark_utils from official.bert.benchmark import benchmark_utils
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
# pylint: disable=line-too-long # pylint: disable=line-too-long
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16/bert_model.ckpt' PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16/bert_model.ckpt'
...@@ -54,7 +55,7 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase): ...@@ -54,7 +55,7 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
self.num_steps_per_epoch = None self.num_steps_per_epoch = None
@flagsaver.flagsaver @flagsaver.flagsaver
def _run_bert_classifier(self, callbacks=None): def _run_bert_classifier(self, callbacks=None, use_ds=True, enable_xla=False):
"""Starts BERT classification task.""" """Starts BERT classification task."""
with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader: with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
input_meta_data = json.loads(reader.read().decode('utf-8')) input_meta_data = json.loads(reader.read().decode('utf-8'))
...@@ -70,7 +71,11 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase): ...@@ -70,7 +71,11 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
eval_steps = int( eval_steps = int(
math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size)) math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
strategy = distribution_utils.get_distribution_strategy( strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored', num_gpus=self.num_gpus) distribution_strategy='mirrored' if use_ds else 'off',
num_gpus=self.num_gpus)
# TODO(hongkuny): Enable XLA once we are confident with its performance.
keras_utils.set_config_v2(enable_xla)
steps_per_loop = 1 steps_per_loop = 1
run_classifier.run_customized_training( run_classifier.run_customized_training(
...@@ -113,11 +118,14 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase): ...@@ -113,11 +118,14 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
def _run_and_report_benchmark(self, def _run_and_report_benchmark(self,
training_summary_path, training_summary_path,
min_accuracy=0, min_accuracy=0,
max_accuracy=1): max_accuracy=1,
use_ds=True,
enable_xla=False):
"""Starts BERT performance benchmark test.""" """Starts BERT performance benchmark test."""
start_time_sec = time.time() start_time_sec = time.time()
self._run_bert_classifier(callbacks=[self.timer_callback]) self._run_bert_classifier(
callbacks=[self.timer_callback], use_ds=use_ds, enable_xla=enable_xla)
wall_time_sec = time.time() - start_time_sec wall_time_sec = time.time() - start_time_sec
with tf.io.gfile.GFile(training_summary_path, 'rb') as reader: with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
...@@ -148,6 +156,38 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase): ...@@ -148,6 +156,38 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt') summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path) self._run_and_report_benchmark(summary_path)
def benchmark_1_gpu_mrpc_xla(self):
"""Test BERT model performance with 1 GPU."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path, enable_xla=True)
def benchmark_1_gpu_mrpc_no_dist_strat(self):
"""Test BERT model performance with 1 GPU, no distribution strategy."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
def benchmark_2_gpu_mrpc(self): def benchmark_2_gpu_mrpc(self):
"""Test BERT model performance with 2 GPUs.""" """Test BERT model performance with 2 GPUs."""
...@@ -213,11 +253,13 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase): ...@@ -213,11 +253,13 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
def _run_and_report_benchmark(self, def _run_and_report_benchmark(self,
training_summary_path, training_summary_path,
min_accuracy=0.84, min_accuracy=0.84,
max_accuracy=0.88): max_accuracy=0.88,
enable_xla=False):
"""Starts BERT accuracy benchmark test.""" """Starts BERT accuracy benchmark test."""
start_time_sec = time.time() start_time_sec = time.time()
self._run_bert_classifier(callbacks=[self.timer_callback]) self._run_bert_classifier(
callbacks=[self.timer_callback], enable_xla=enable_xla)
wall_time_sec = time.time() - start_time_sec wall_time_sec = time.time() - start_time_sec
with tf.io.gfile.GFile(training_summary_path, 'rb') as reader: with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
...@@ -229,6 +271,14 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase): ...@@ -229,6 +271,14 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
min_accuracy=min_accuracy, min_accuracy=min_accuracy,
max_accuracy=max_accuracy) max_accuracy=max_accuracy)
def _setup(self):
super(BertClassifyAccuracy, self)._setup()
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.init_checkpoint = self.pretrained_checkpoint_path
def benchmark_8_gpu_mrpc(self): def benchmark_8_gpu_mrpc(self):
"""Run BERT model accuracy test with 8 GPUs. """Run BERT model accuracy test with 8 GPUs.
...@@ -236,18 +286,20 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase): ...@@ -236,18 +286,20 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
accuracy metric has high variance between trainings. As so, we accuracy metric has high variance between trainings. As so, we
set the wide range of allowed accuracy (84% to 88%). set the wide range of allowed accuracy (84% to 88%).
""" """
self._setup() self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.init_checkpoint = self.pretrained_checkpoint_path
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt') summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path) self._run_and_report_benchmark(summary_path)
def benchmark_8_gpu_mrpc_xla(self):
"""Run BERT model accuracy test with 8 GPUs with XLA."""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path, enable_xla=True)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -32,6 +32,7 @@ from official.bert import run_squad ...@@ -32,6 +32,7 @@ from official.bert import run_squad
from official.bert.benchmark import benchmark_utils from official.bert.benchmark import benchmark_utils
from official.bert.benchmark import squad_evaluate_v1_1 from official.bert.benchmark import squad_evaluate_v1_1
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
# pylint: disable=line-too-long # pylint: disable=line-too-long
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16/bert_model.ckpt' PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16/bert_model.ckpt'
...@@ -72,27 +73,29 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase): ...@@ -72,27 +73,29 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
with tf.io.gfile.GFile(predictions_file, 'r') as reader: with tf.io.gfile.GFile(predictions_file, 'r') as reader:
return json.load(reader) return json.load(reader)
def _get_distribution_strategy(self): def _get_distribution_strategy(self, use_ds=True):
"""Gets the distribution strategy.""" """Gets the distribution strategy."""
return distribution_utils.get_distribution_strategy( return distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored', num_gpus=self.num_gpus) distribution_strategy='mirrored' if use_ds else 'off',
num_gpus=self.num_gpus)
@flagsaver.flagsaver @flagsaver.flagsaver
def _train_squad(self): def _train_squad(self, use_ds=True, run_eagerly=False):
"""Runs BERT SQuAD training.""" """Runs BERT SQuAD training."""
input_meta_data = self._read_input_meta_data_from_file() input_meta_data = self._read_input_meta_data_from_file()
strategy = self._get_distribution_strategy() strategy = self._get_distribution_strategy(use_ds)
run_squad.train_squad( run_squad.train_squad(
strategy=strategy, strategy=strategy,
input_meta_data=input_meta_data, input_meta_data=input_meta_data,
run_eagerly=run_eagerly,
custom_callbacks=[self.timer_callback]) custom_callbacks=[self.timer_callback])
@flagsaver.flagsaver @flagsaver.flagsaver
def _evaluate_squad(self): def _evaluate_squad(self, use_ds=True):
"""Runs BERT SQuAD evaluation.""" """Runs BERT SQuAD evaluation."""
input_meta_data = self._read_input_meta_data_from_file() input_meta_data = self._read_input_meta_data_from_file()
strategy = self._get_distribution_strategy() strategy = self._get_distribution_strategy(use_ds)
run_squad.predict_squad(strategy=strategy, input_meta_data=input_meta_data) run_squad.predict_squad(strategy=strategy, input_meta_data=input_meta_data)
...@@ -126,10 +129,14 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -126,10 +129,14 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS.num_train_epochs = 1 FLAGS.num_train_epochs = 1
FLAGS.steps_per_loop = 1 FLAGS.steps_per_loop = 1
def _run_and_report_benchmark(self): def _run_and_report_benchmark(self,
use_ds=True,
enable_xla=False,
run_eagerly=False):
"""Runs the benchmark and reports various metrics.""" """Runs the benchmark and reports various metrics."""
keras_utils.set_config_v2(enable_xla)
start_time_sec = time.time() start_time_sec = time.time()
self._train_squad() self._train_squad(use_ds=use_ds, run_eagerly=run_eagerly)
wall_time_sec = time.time() - start_time_sec wall_time_sec = time.time() - start_time_sec
summary = self._read_training_summary_from_file() summary = self._read_training_summary_from_file()
...@@ -150,6 +157,37 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -150,6 +157,37 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_xla(self):
"""Tests BERT SQuAD model performance with 1 GPU with XLA."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla_squad')
FLAGS.train_batch_size = 4
self._run_and_report_benchmark(enable_xla=True)
def benchmark_1_gpu_no_dist_strat(self):
"""Tests BERT SQuAD model performance with 1 GPU without DS."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat_squad')
FLAGS.train_batch_size = 4
self._run_and_report_benchmark(use_ds=False)
def benchmark_1_gpu_eager_no_dist_strat(self):
"""Tests BERT SQuAD model performance with 1 GPU with eager execution."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_eager_no_dist_strat_squad')
FLAGS.train_batch_size = 4
self._run_and_report_benchmark(use_ds=False, run_eagerly=True)
def benchmark_2_gpu(self): def benchmark_2_gpu(self):
"""Tests BERT SQuAD model performance with 2 GPUs.""" """Tests BERT SQuAD model performance with 2 GPUs."""
...@@ -203,10 +241,14 @@ class BertSquadAccuracy(BertSquadBenchmarkBase): ...@@ -203,10 +241,14 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
FLAGS.num_train_epochs = 2 FLAGS.num_train_epochs = 2
FLAGS.steps_per_loop = 1 FLAGS.steps_per_loop = 1
def _run_and_report_benchmark(self): def _run_and_report_benchmark(self,
use_ds=True,
enable_xla=False,
run_eagerly=False):
"""Runs the benchmark and reports various metrics.""" """Runs the benchmark and reports various metrics."""
keras_utils.set_config_v2(enable_xla)
start_time_sec = time.time() start_time_sec = time.time()
self._train_squad() self._train_squad(use_ds=use_ds, run_eagerly=run_eagerly)
self._evaluate_squad() self._evaluate_squad()
wall_time_sec = time.time() - start_time_sec wall_time_sec = time.time() - start_time_sec
...@@ -219,6 +261,16 @@ class BertSquadAccuracy(BertSquadBenchmarkBase): ...@@ -219,6 +261,16 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
min_accuracy=0.900, min_accuracy=0.900,
max_accuracy=0.908) max_accuracy=0.908)
def benchmark_1_gpu_eager(self):
"""Tests BERT SQuAD model accuracy with 1 GPU with eager execution."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad_eager')
FLAGS.train_batch_size = 4
self._run_and_report_benchmark(use_ds=False, run_eagerly=True)
def benchmark_8_gpu(self): def benchmark_8_gpu(self):
"""Tests BERT SQuAD model accuracy with 8 GPUs.""" """Tests BERT SQuAD model accuracy with 8 GPUs."""
...@@ -229,6 +281,16 @@ class BertSquadAccuracy(BertSquadBenchmarkBase): ...@@ -229,6 +281,16 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_8_gpu_xla(self):
"""Tests BERT SQuAD model accuracy with 8 GPUs."""
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad_xla')
FLAGS.train_batch_size = 32
self._run_and_report_benchmark(enable_xla=True)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -23,6 +23,7 @@ import os ...@@ -23,6 +23,7 @@ import os
from absl import logging from absl import logging
import tensorflow as tf import tensorflow as tf
from official.utils.misc import distribution_utils
_SUMMARY_TXT = 'training_summary.txt' _SUMMARY_TXT = 'training_summary.txt'
_MIN_SUMMARY_STEPS = 10 _MIN_SUMMARY_STEPS = 10
...@@ -196,7 +197,7 @@ def run_customized_training_loop( ...@@ -196,7 +197,7 @@ def run_customized_training_loop(
with tf.device(get_primary_cpu_task(use_remote_tpu)): with tf.device(get_primary_cpu_task(use_remote_tpu)):
train_iterator = _get_input_iterator(train_input_fn, strategy) train_iterator = _get_input_iterator(train_input_fn, strategy)
with strategy.scope(): with distribution_utils.get_strategy_scope(strategy):
# To correctly place the model weights on accelerators, # To correctly place the model weights on accelerators,
# model and optimizer should be created in scope. # model and optimizer should be created in scope.
model, sub_model = model_fn() model, sub_model = model_fn()
......
...@@ -173,11 +173,12 @@ def predict_squad_customized(strategy, input_meta_data, bert_config, ...@@ -173,11 +173,12 @@ def predict_squad_customized(strategy, input_meta_data, bert_config,
return all_results return all_results
def train_squad(strategy, input_meta_data, custom_callbacks=None): def train_squad(strategy,
input_meta_data,
custom_callbacks=None,
run_eagerly=False):
"""Run bert squad training.""" """Run bert squad training."""
if not strategy: if strategy:
raise ValueError('Distribution strategy cannot be None.')
logging.info('Training using customized training loop with distribution' logging.info('Training using customized training loop with distribution'
' strategy.') ' strategy.')
...@@ -219,6 +220,7 @@ def train_squad(strategy, input_meta_data, custom_callbacks=None): ...@@ -219,6 +220,7 @@ def train_squad(strategy, input_meta_data, custom_callbacks=None):
train_input_fn=train_input_fn, train_input_fn=train_input_fn,
init_checkpoint=FLAGS.init_checkpoint, init_checkpoint=FLAGS.init_checkpoint,
use_remote_tpu=use_remote_tpu, use_remote_tpu=use_remote_tpu,
run_eagerly=run_eagerly,
custom_callbacks=custom_callbacks) custom_callbacks=custom_callbacks)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment