Commit dfcaed77 authored by David M. Chen's avatar David M. Chen Committed by saberkun
Browse files

Merged commit includes the following changes: (#7039)

253636854  by dmchen<dmchen@google.com>:

    Run only training in BERT SQuAD performance test

--
253118910  by hongkuny<hongkuny@google.com>:

    Internal change

PiperOrigin-RevId: 253636854
parent d44b7283
......@@ -49,50 +49,57 @@ FLAGS = flags.FLAGS
class BertSquadBenchmarkBase(benchmark_utils.BertBenchmarkBase):
"""Base class to hold methods common to test classes in the module."""
def _run_and_report_benchmark(self, training_summary_path, min_accuracy,
max_accuracy):
"""Runs the benchmark and reports various metrics."""
start_time_sec = time.time()
self._run_bert_squad()
wall_time_sec = time.time() - start_time_sec
with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
summary = json.loads(reader.read().decode('utf-8'))
summary['eval_metrics'] = self.eval_metrics
def _read_training_summary_from_file(self):
"""Reads the training summary from a file."""
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
with tf.io.gfile.GFile(summary_path, 'rb') as reader:
return json.loads(reader.read().decode('utf-8'))
super(BertSquadBenchmarkBase, self)._report_benchmark(
stats=summary,
wall_time_sec=wall_time_sec,
min_accuracy=min_accuracy,
max_accuracy=max_accuracy)
def _read_input_meta_data_from_file(self):
"""Reads the input metadata from a file."""
with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
return json.loads(reader.read().decode('utf-8'))
def _evaluate_squad(self, predictions_file):
"""Evaluates a predictions file."""
def _read_predictions_dataset_from_file(self):
"""Reads the predictions dataset from a file."""
with tf.io.gfile.GFile(SQUAD_PREDICT_FILE, 'r') as reader:
dataset_json = json.load(reader)
dataset = dataset_json['data']
return dataset_json['data']
def _read_predictions_from_file(self):
"""Reads the predictions from a file."""
predictions_file = os.path.join(FLAGS.model_dir, 'predictions.json')
with tf.io.gfile.GFile(predictions_file, 'r') as reader:
predictions = json.load(reader)
return json.load(reader)
return squad_evaluate_v1_1.evaluate(dataset, predictions)
def _get_distribution_strategy(self):
"""Gets the distribution strategy."""
return distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored', num_gpus=self.num_gpus)
@flagsaver.flagsaver
def _run_bert_squad(self):
"""Starts BERT SQuAD training and evaluation tasks."""
with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
input_meta_data = json.loads(reader.read().decode('utf-8'))
strategy = distribution_utils.get_distribution_strategy(
distribution_strategy='mirrored', num_gpus=self.num_gpus)
def _train_squad(self):
"""Runs BERT SQuAD training."""
input_meta_data = self._read_input_meta_data_from_file()
strategy = self._get_distribution_strategy()
run_squad.train_squad(
strategy=strategy,
input_meta_data=input_meta_data,
custom_callbacks=[self.timer_callback])
@flagsaver.flagsaver
def _evaluate_squad(self):
"""Runs BERT SQuAD evaluation."""
input_meta_data = self._read_input_meta_data_from_file()
strategy = self._get_distribution_strategy()
run_squad.predict_squad(strategy=strategy, input_meta_data=input_meta_data)
predictions_file = os.path.join(FLAGS.model_dir, 'predictions.json')
eval_metrics = self._evaluate_squad(predictions_file)
dataset = self._read_predictions_dataset_from_file()
predictions = self._read_predictions_from_file()
eval_metrics = squad_evaluate_v1_1.evaluate(dataset, predictions)
# Use F1 score as reported evaluation metric.
self.eval_metrics = eval_metrics['f1']
......@@ -118,10 +125,19 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS.bert_config_file = MODEL_CONFIG_FILE_PATH
FLAGS.num_train_epochs = 1
def _run_and_report_benchmark(self, training_summary_path):
def _run_and_report_benchmark(self):
"""Runs the benchmark and reports various metrics."""
super(BertSquadBenchmarkReal, self)._run_and_report_benchmark(
training_summary_path, min_accuracy=0, max_accuracy=1)
start_time_sec = time.time()
self._train_squad()
wall_time_sec = time.time() - start_time_sec
summary = self._read_training_summary_from_file()
super(BertSquadBenchmarkReal, self)._report_benchmark(
stats=summary,
wall_time_sec=wall_time_sec,
min_accuracy=0,
max_accuracy=1)
def benchmark_1_gpu(self):
"""Tests BERT SQuAD model performance with 1 GPU."""
......@@ -131,8 +147,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad')
FLAGS.train_batch_size = 4
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path)
self._run_and_report_benchmark()
def benchmark_2_gpu(self):
"""Tests BERT SQuAD model performance with 2 GPUs."""
......@@ -142,8 +157,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad')
FLAGS.train_batch_size = 8
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path)
self._run_and_report_benchmark()
def benchmark_4_gpu(self):
"""Tests BERT SQuAD model performance with 4 GPUs."""
......@@ -153,8 +167,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad')
FLAGS.train_batch_size = 16
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path)
self._run_and_report_benchmark()
def benchmark_8_gpu(self):
"""Tests BERT SQuAD model performance with 8 GPUs."""
......@@ -164,8 +177,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad')
FLAGS.train_batch_size = 32
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path)
self._run_and_report_benchmark()
class BertSquadAccuracy(BertSquadBenchmarkBase):
......@@ -189,10 +201,21 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
FLAGS.init_checkpoint = PRETRAINED_CHECKPOINT_PATH
FLAGS.num_train_epochs = 2
def _run_and_report_benchmark(self, training_summary_path):
def _run_and_report_benchmark(self):
"""Runs the benchmark and reports various metrics."""
super(BertSquadAccuracy, self)._run_and_report_benchmark(
training_summary_path, min_accuracy=0.902, max_accuracy=0.909)
start_time_sec = time.time()
self._train_squad()
self._evaluate_squad()
wall_time_sec = time.time() - start_time_sec
summary = self._read_training_summary_from_file()
summary['eval_metrics'] = self.eval_metrics
super(BertSquadAccuracy, self)._report_benchmark(
stats=summary,
wall_time_sec=wall_time_sec,
min_accuracy=0.902,
max_accuracy=0.906)
def benchmark_8_gpu(self):
"""Tests BERT SQuAD model accuracy with 8 GPUs."""
......@@ -202,8 +225,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad')
FLAGS.train_batch_size = 32
summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
self._run_and_report_benchmark(summary_path)
self._run_and_report_benchmark()
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment