Unverified Commit 53e3adb8 authored by Hongkun Yu's avatar Hongkun Yu Committed by GitHub
Browse files

Merged commit includes the following changes: (#7301)

259889221  by hongkuny<hongkuny@google.com>:

    Add no ds / xla / eager perfzero tests

--

PiperOrigin-RevId: 259889221
parent 3c5330d8
......@@ -33,6 +33,7 @@ from official.bert import modeling
from official.bert import run_classifier
from official.bert.benchmark import benchmark_utils
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
# pylint: disable=line-too-long
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16/bert_model.ckpt'
......@@ -54,7 +55,7 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
self.num_steps_per_epoch = None
@flagsaver.flagsaver
def _run_bert_classifier(self, callbacks=None):
def _run_bert_classifier(self, callbacks=None, use_ds=True, enable_xla=False):
"""Starts BERT classification task."""
with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
input_meta_data = json.loads(reader.read().decode('utf-8'))
......@@ -70,7 +71,11 @@ class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
eval_steps = int(
math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored', num_gpus=self.num_gpus)
distribution_strategy='mirrored' if use_ds else 'off',
num_gpus=self.num_gpus)
# TODO(hongkuny): Enable XLA once we are confident with its performance.
keras_utils.set_config_v2(enable_xla)
steps_per_loop = 1
run_classifier.run_customized_training(
......@@ -113,11 +118,14 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
def _run_and_report_benchmark(self,
training_summary_path,
min_accuracy=0,
max_accuracy=1):
max_accuracy=1,
use_ds=True,
enable_xla=False):
"""Starts BERT performance benchmark test."""
start_time_sec = time.time()
self._run_bert_classifier(callbacks=[self.timer_callback])
self._run_bert_classifier(
callbacks=[self.timer_callback], use_ds=use_ds, enable_xla=enable_xla)
wall_time_sec = time.time() - start_time_sec
with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
......@@ -148,6 +156,38 @@ class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path)
def benchmark_1_gpu_mrpc_xla(self):
"""Test BERT model performance with 1 GPU."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path, enable_xla=True)
def benchmark_1_gpu_mrpc_no_dist_strat(self):
"""Test BERT model performance with 1 GPU, no distribution strategy."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.train_batch_size = 4
FLAGS.eval_batch_size = 4
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path, use_ds=False)
def benchmark_2_gpu_mrpc(self):
"""Test BERT model performance with 2 GPUs."""
......@@ -213,11 +253,13 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
def _run_and_report_benchmark(self,
training_summary_path,
min_accuracy=0.84,
max_accuracy=0.88):
max_accuracy=0.88,
enable_xla=False):
"""Starts BERT accuracy benchmark test."""
start_time_sec = time.time()
self._run_bert_classifier(callbacks=[self.timer_callback])
self._run_bert_classifier(
callbacks=[self.timer_callback], enable_xla=enable_xla)
wall_time_sec = time.time() - start_time_sec
with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
......@@ -229,6 +271,14 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
min_accuracy=min_accuracy,
max_accuracy=max_accuracy)
def _setup(self):
super(BertClassifyAccuracy, self)._setup()
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.init_checkpoint = self.pretrained_checkpoint_path
def benchmark_8_gpu_mrpc(self):
"""Run BERT model accuracy test with 8 GPUs.
......@@ -236,18 +286,20 @@ class BertClassifyAccuracy(BertClassifyBenchmarkBase):
accuracy metric has high variance between trainings. As so, we
set the wide range of allowed accuracy (84% to 88%).
"""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
FLAGS.train_data_path = self.train_data_path
FLAGS.eval_data_path = self.eval_data_path
FLAGS.input_meta_data_path = self.input_meta_data_path
FLAGS.bert_config_file = self.bert_config_file
FLAGS.init_checkpoint = self.pretrained_checkpoint_path
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path)
def benchmark_8_gpu_mrpc_xla(self):
"""Run BERT model accuracy test with 8 GPUs with XLA."""
self._setup()
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path, enable_xla=True)
if __name__ == '__main__':
tf.test.main()
......@@ -32,6 +32,7 @@ from official.bert import run_squad
from official.bert.benchmark import benchmark_utils
from official.bert.benchmark import squad_evaluate_v1_1
from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils
# pylint: disable=line-too-long
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16/bert_model.ckpt'
......@@ -72,27 +73,29 @@ class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
with tf.io.gfile.GFile(predictions_file, 'r') as reader:
return json.load(reader)
def _get_distribution_strategy(self):
def _get_distribution_strategy(self, use_ds=True):
"""Gets the distribution strategy."""
return distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored', num_gpus=self.num_gpus)
distribution_strategy='mirrored' if use_ds else 'off',
num_gpus=self.num_gpus)
@flagsaver.flagsaver
def _train_squad(self):
def _train_squad(self, use_ds=True, run_eagerly=False):
"""Runs BERT SQuAD training."""
input_meta_data = self._read_input_meta_data_from_file()
strategy = self._get_distribution_strategy()
strategy = self._get_distribution_strategy(use_ds)
run_squad.train_squad(
strategy=strategy,
input_meta_data=input_meta_data,
run_eagerly=run_eagerly,
custom_callbacks=[self.timer_callback])
@flagsaver.flagsaver
def _evaluate_squad(self):
def _evaluate_squad(self, use_ds=True):
"""Runs BERT SQuAD evaluation."""
input_meta_data = self._read_input_meta_data_from_file()
strategy = self._get_distribution_strategy()
strategy = self._get_distribution_strategy(use_ds)
run_squad.predict_squad(strategy=strategy, input_meta_data=input_meta_data)
......@@ -126,10 +129,14 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS.num_train_epochs = 1
FLAGS.steps_per_loop = 1
def _run_and_report_benchmark(self):
def _run_and_report_benchmark(self,
use_ds=True,
enable_xla=False,
run_eagerly=False):
"""Runs the benchmark and reports various metrics."""
keras_utils.set_config_v2(enable_xla)
start_time_sec = time.time()
self._train_squad()
self._train_squad(use_ds=use_ds, run_eagerly=run_eagerly)
wall_time_sec = time.time() - start_time_sec
summary = self._read_training_summary_from_file()
......@@ -150,6 +157,37 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_1_gpu_xla(self):
"""Tests BERT SQuAD model performance with 1 GPU with XLA."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla_squad')
FLAGS.train_batch_size = 4
self._run_and_report_benchmark(enable_xla=True)
def benchmark_1_gpu_no_dist_strat(self):
"""Tests BERT SQuAD model performance with 1 GPU without DS."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat_squad')
FLAGS.train_batch_size = 4
self._run_and_report_benchmark(use_ds=False)
def benchmark_1_gpu_eager_no_dist_strat(self):
"""Tests BERT SQuAD model performance with 1 GPU with eager execution."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_eager_no_dist_strat_squad')
FLAGS.train_batch_size = 4
self._run_and_report_benchmark(use_ds=False, run_eagerly=True)
def benchmark_2_gpu(self):
"""Tests BERT SQuAD model performance with 2 GPUs."""
......@@ -203,10 +241,14 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
FLAGS.num_train_epochs = 2
FLAGS.steps_per_loop = 1
def _run_and_report_benchmark(self):
def _run_and_report_benchmark(self,
use_ds=True,
enable_xla=False,
run_eagerly=False):
"""Runs the benchmark and reports various metrics."""
keras_utils.set_config_v2(enable_xla)
start_time_sec = time.time()
self._train_squad()
self._train_squad(use_ds=use_ds, run_eagerly=run_eagerly)
self._evaluate_squad()
wall_time_sec = time.time() - start_time_sec
......@@ -219,6 +261,16 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
min_accuracy=0.900,
max_accuracy=0.908)
def benchmark_1_gpu_eager(self):
"""Tests BERT SQuAD model accuracy with 1 GPU with eager execution."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad_eager')
FLAGS.train_batch_size = 4
self._run_and_report_benchmark(use_ds=False, run_eagerly=True)
def benchmark_8_gpu(self):
"""Tests BERT SQuAD model accuracy with 8 GPUs."""
......@@ -229,6 +281,16 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self._run_and_report_benchmark()
def benchmark_8_gpu_xla(self):
"""Tests BERT SQuAD model accuracy with 8 GPUs."""
self._setup()
self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad_xla')
FLAGS.train_batch_size = 32
self._run_and_report_benchmark(enable_xla=True)
if __name__ == '__main__':
tf.test.main()
......@@ -23,6 +23,7 @@ import os
from absl import logging
import tensorflow as tf
from official.utils.misc import distribution_utils
_SUMMARY_TXT = 'training_summary.txt'
_MIN_SUMMARY_STEPS = 10
......@@ -196,7 +197,7 @@ def run_customized_training_loop(
with tf.device(get_primary_cpu_task(use_remote_tpu)):
train_iterator = _get_input_iterator(train_input_fn, strategy)
with strategy.scope():
with distribution_utils.get_strategy_scope(strategy):
# To correctly place the model weights on accelerators,
# model and optimizer should be created in scope.
model, sub_model = model_fn()
......
......@@ -173,11 +173,12 @@ def predict_squad_customized(strategy, input_meta_data, bert_config,
return all_results
def train_squad(strategy, input_meta_data, custom_callbacks=None):
def train_squad(strategy,
input_meta_data,
custom_callbacks=None,
run_eagerly=False):
"""Run bert squad training."""
if not strategy:
raise ValueError('Distribution strategy cannot be None.')
if strategy:
logging.info('Training using customized training loop with distribution'
' strategy.')
......@@ -219,6 +220,7 @@ def train_squad(strategy, input_meta_data, custom_callbacks=None):
train_input_fn=train_input_fn,
init_checkpoint=FLAGS.init_checkpoint,
use_remote_tpu=use_remote_tpu,
run_eagerly=run_eagerly,
custom_callbacks=custom_callbacks)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment