# Copyright 2019 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Executes BERT benchmarks and accuracy tests.""" import functools import json import math import os import time from absl import flags from absl.testing import flagsaver import tensorflow as tf from official.benchmark import benchmark_wrappers from official.benchmark import bert_benchmark_utils as benchmark_utils from official.benchmark import owner_utils from official.common import distribute_utils from official.nlp.bert import configs from official.nlp.bert import run_classifier # pylint: disable=line-too-long PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt' CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record' CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record' CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data' MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json' # pylint: enable=line-too-long TMP_DIR = os.getenv('TMPDIR') FLAGS = flags.FLAGS class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase): """Base class to hold methods common to test classes in the module.""" def __init__(self, output_dir=None, tpu=None): super(BertClassifyBenchmarkBase, self).__init__(output_dir, tpu=tpu) self.num_epochs = None self.num_steps_per_epoch = None FLAGS.steps_per_loop = 1 @flagsaver.flagsaver def _run_bert_classifier(self, callbacks=None, use_ds=True): """Starts BERT classification task.""" with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader: input_meta_data = json.loads(reader.read().decode('utf-8')) bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file) epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs if self.num_steps_per_epoch: steps_per_epoch = self.num_steps_per_epoch else: train_data_size = input_meta_data['train_data_size'] steps_per_epoch = int(train_data_size / FLAGS.train_batch_size) warmup_steps = int(epochs * steps_per_epoch * 0.1) eval_steps = int( math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size)) if self.tpu: strategy = distribute_utils.get_distribution_strategy( distribution_strategy='tpu', tpu_address=self.tpu) else: strategy = distribute_utils.get_distribution_strategy( distribution_strategy='mirrored' if use_ds else 'off', num_gpus=self.num_gpus) max_seq_length = input_meta_data['max_seq_length'] train_input_fn = run_classifier.get_dataset_fn( FLAGS.train_data_path, max_seq_length, FLAGS.train_batch_size, is_training=True) eval_input_fn = run_classifier.get_dataset_fn( FLAGS.eval_data_path, max_seq_length, FLAGS.eval_batch_size, is_training=False) _, summary = run_classifier.run_bert_classifier( strategy, bert_config, input_meta_data, FLAGS.model_dir, epochs, steps_per_epoch, FLAGS.steps_per_loop, eval_steps, warmup_steps, FLAGS.learning_rate, FLAGS.init_checkpoint, train_input_fn, eval_input_fn, training_callbacks=False, custom_callbacks=callbacks) return summary class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase): """Short benchmark performance tests for BERT model. Tests BERT classification performance in different GPU, TPU configurations. The naming convention of below test cases follow `benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and `benchmark_(topology)_tpu_(dataset type)` for TPUs. """ def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs): super(BertClassifyBenchmarkReal, self).__init__( output_dir=output_dir, tpu=tpu) self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH self.bert_config_file = MODEL_CONFIG_FILE_PATH self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH # Since we only care about performance metrics, we limit # the number of training steps and epochs to prevent unnecessarily # long tests. self.num_steps_per_epoch = 100 self.num_epochs = 1 @benchmark_wrappers.enable_runtime_flags def _run_and_report_benchmark(self, training_summary_path, min_accuracy=0, max_accuracy=1, use_ds=True): """Starts BERT performance benchmark test.""" start_time_sec = time.time() summary = self._run_bert_classifier( callbacks=[self.timer_callback], use_ds=use_ds) wall_time_sec = time.time() - start_time_sec # Since we do not load from any pretrained checkpoints, we ignore all # accuracy metrics. summary.pop('eval_metrics', None) summary['start_time_sec'] = start_time_sec super(BertClassifyBenchmarkReal, self)._report_benchmark( stats=summary, wall_time_sec=wall_time_sec, min_accuracy=min_accuracy, max_accuracy=max_accuracy) def benchmark_1_gpu_mrpc(self): """Test BERT model performance with 1 GPU.""" self._setup() self.num_gpus = 1 FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc') FLAGS.train_data_path = self.train_data_path FLAGS.eval_data_path = self.eval_data_path FLAGS.input_meta_data_path = self.input_meta_data_path FLAGS.bert_config_file = self.bert_config_file FLAGS.train_batch_size = 4 FLAGS.eval_batch_size = 4 summary_path = os.path.join(FLAGS.model_dir, 'summaries/training_summary.txt') self._run_and_report_benchmark(summary_path) def benchmark_1_gpu_mrpc_xla(self): """Test BERT model performance with 1 GPU.""" self._setup() self.num_gpus = 1 FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla') FLAGS.train_data_path = self.train_data_path FLAGS.eval_data_path = self.eval_data_path FLAGS.input_meta_data_path = self.input_meta_data_path FLAGS.bert_config_file = self.bert_config_file FLAGS.train_batch_size = 4 FLAGS.eval_batch_size = 4 FLAGS.enable_xla = True summary_path = os.path.join(FLAGS.model_dir, 'summaries/training_summary.txt') self._run_and_report_benchmark(summary_path) def benchmark_1_gpu_mrpc_no_dist_strat(self): """Test BERT model performance with 1 GPU, no distribution strategy.""" self._setup() self.num_gpus = 1 FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat') FLAGS.train_data_path = self.train_data_path FLAGS.eval_data_path = self.eval_data_path FLAGS.input_meta_data_path = self.input_meta_data_path FLAGS.bert_config_file = self.bert_config_file FLAGS.train_batch_size = 4 FLAGS.eval_batch_size = 4 summary_path = os.path.join(FLAGS.model_dir, 'summaries/training_summary.txt') self._run_and_report_benchmark(summary_path, use_ds=False) @owner_utils.Owner('tf-model-garden') def benchmark_8_gpu_mrpc(self): """Test BERT model performance with 8 GPUs.""" self._setup() FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc') FLAGS.train_data_path = self.train_data_path FLAGS.eval_data_path = self.eval_data_path FLAGS.input_meta_data_path = self.input_meta_data_path FLAGS.bert_config_file = self.bert_config_file summary_path = os.path.join(FLAGS.model_dir, 'summaries/training_summary.txt') self._run_and_report_benchmark(summary_path) @owner_utils.Owner('tf-model-garden') def benchmark_2x2_tpu_mrpc(self): """Test BERT model performance with 2x2 TPU.""" self._setup() FLAGS.steps_per_loop = 50 FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc') FLAGS.train_data_path = self.train_data_path FLAGS.eval_data_path = self.eval_data_path FLAGS.input_meta_data_path = self.input_meta_data_path FLAGS.bert_config_file = self.bert_config_file FLAGS.train_batch_size = 32 FLAGS.eval_batch_size = 32 summary_path = os.path.join(FLAGS.model_dir, 'summaries/training_summary.txt') self._run_and_report_benchmark(summary_path, use_ds=False) class BertClassifyAccuracy(BertClassifyBenchmarkBase): """Short accuracy test for BERT model. Tests BERT classification task model accuracy. The naming convention of below test cases follow `benchmark_(number of gpus)_gpu_(dataset type)` format. """ def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs): self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH self.bert_config_file = MODEL_CONFIG_FILE_PATH self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu) @benchmark_wrappers.enable_runtime_flags def _run_and_report_benchmark(self, training_summary_path, min_accuracy=0.84, max_accuracy=0.88): """Starts BERT accuracy benchmark test.""" start_time_sec = time.time() summary = self._run_bert_classifier(callbacks=[self.timer_callback]) wall_time_sec = time.time() - start_time_sec super(BertClassifyAccuracy, self)._report_benchmark( stats=summary, wall_time_sec=wall_time_sec, min_accuracy=min_accuracy, max_accuracy=max_accuracy) def _setup(self): super(BertClassifyAccuracy, self)._setup() FLAGS.train_data_path = self.train_data_path FLAGS.eval_data_path = self.eval_data_path FLAGS.input_meta_data_path = self.input_meta_data_path FLAGS.bert_config_file = self.bert_config_file FLAGS.init_checkpoint = self.pretrained_checkpoint_path @owner_utils.Owner('tf-model-garden') def benchmark_8_gpu_mrpc(self): """Run BERT model accuracy test with 8 GPUs. Due to comparatively small cardinality of MRPC dataset, training accuracy metric has high variance between trainings. As so, we set the wide range of allowed accuracy (84% to 88%). """ self._setup() FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc') summary_path = os.path.join(FLAGS.model_dir, 'summaries/training_summary.txt') self._run_and_report_benchmark(summary_path) def benchmark_8_gpu_mrpc_xla(self): """Run BERT model accuracy test with 8 GPUs with XLA.""" self._setup() FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla') FLAGS.enable_xla = True summary_path = os.path.join(FLAGS.model_dir, 'summaries/training_summary.txt') self._run_and_report_benchmark(summary_path) @owner_utils.Owner('tf-model-garden') def benchmark_2x2_tpu_mrpc(self): """Run BERT model accuracy test on 2x2 TPU.""" self._setup() FLAGS.steps_per_loop = 50 FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc') summary_path = os.path.join(FLAGS.model_dir, 'summaries/training_summary.txt') self._run_and_report_benchmark(summary_path) if __name__ == '__main__': tf.test.main()