bert_benchmark.py 12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT benchmarks and accuracy tests."""

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
17
import functools
18
import json
19
import math
20
21
22
23
24
import os
import time

from absl import flags
from absl.testing import flagsaver
25
import tensorflow as tf
26

27
from official.benchmark import benchmark_wrappers
28
from official.benchmark import bert_benchmark_utils as benchmark_utils
Jing Li's avatar
Jing Li committed
29
from official.benchmark import owner_utils
30
from official.common import distribute_utils
31
from official.nlp.bert import configs
32
from official.nlp.bert import run_classifier
33
34

# pylint: disable=line-too-long
35
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt'
36
37
38
CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record'
CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record'
CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data'
David Chen's avatar
David Chen committed
39
MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json'
40
41
# pylint: enable=line-too-long

David Chen's avatar
David Chen committed
42
TMP_DIR = os.getenv('TMPDIR')
43
44
45
FLAGS = flags.FLAGS


davidmochen's avatar
davidmochen committed
46
class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
47
48
  """Base class to hold methods common to test classes in the module."""

David Chen's avatar
David Chen committed
49
  def __init__(self, output_dir=None, tpu=None):
Hongkun Yu's avatar
Hongkun Yu committed
50
    super(BertClassifyBenchmarkBase, self).__init__(output_dir, tpu=tpu)
51
52
    self.num_epochs = None
    self.num_steps_per_epoch = None
Hongkun Yu's avatar
Hongkun Yu committed
53
    FLAGS.steps_per_loop = 1
54

55
  @flagsaver.flagsaver
56
  def _run_bert_classifier(self, callbacks=None, use_ds=True):
57
    """Starts BERT classification task."""
58
59
60
    with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
      input_meta_data = json.loads(reader.read().decode('utf-8'))

61
    bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file)
62
63
64
65
66
67
68
    epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
    if self.num_steps_per_epoch:
      steps_per_epoch = self.num_steps_per_epoch
    else:
      train_data_size = input_meta_data['train_data_size']
      steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * steps_per_epoch * 0.1)
69
70
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
Sai Ganesh Bandiatmakuri's avatar
Sai Ganesh Bandiatmakuri committed
71
    if self.tpu:
72
      strategy = distribute_utils.get_distribution_strategy(
Sai Ganesh Bandiatmakuri's avatar
Sai Ganesh Bandiatmakuri committed
73
          distribution_strategy='tpu', tpu_address=self.tpu)
David Chen's avatar
David Chen committed
74
    else:
75
      strategy = distribute_utils.get_distribution_strategy(
David Chen's avatar
David Chen committed
76
77
          distribution_strategy='mirrored' if use_ds else 'off',
          num_gpus=self.num_gpus)
78

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
79
    max_seq_length = input_meta_data['max_seq_length']
Hongkun Yu's avatar
Hongkun Yu committed
80
    train_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
81
        FLAGS.train_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
82
83
84
85
        max_seq_length,
        FLAGS.train_batch_size,
        is_training=True)
    eval_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
86
        FLAGS.eval_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
87
88
89
        max_seq_length,
        FLAGS.eval_batch_size,
        is_training=False)
90
    _, summary = run_classifier.run_bert_classifier(
91
92
93
94
95
96
        strategy,
        bert_config,
        input_meta_data,
        FLAGS.model_dir,
        epochs,
        steps_per_epoch,
Hongkun Yu's avatar
Hongkun Yu committed
97
        FLAGS.steps_per_loop,
98
99
100
101
        eval_steps,
        warmup_steps,
        FLAGS.learning_rate,
        FLAGS.init_checkpoint,
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
102
103
        train_input_fn,
        eval_input_fn,
104
        training_callbacks=False,
105
        custom_callbacks=callbacks)
106
    return summary
107
108


davidmochen's avatar
davidmochen committed
109
class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
110
111
  """Short benchmark performance tests for BERT model.

David Chen's avatar
David Chen committed
112
  Tests BERT classification performance in different GPU, TPU configurations.
113
  The naming convention of below test cases follow
David Chen's avatar
David Chen committed
114
115
  `benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and
  `benchmark_(topology)_tpu_(dataset type)` for TPUs.
116
  """
117

David Chen's avatar
David Chen committed
118
119
120
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
    super(BertClassifyBenchmarkReal, self).__init__(
        output_dir=output_dir, tpu=tpu)
121

122
123
124
125
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
126

127
128
129
    # Since we only care about performance metrics, we limit
    # the number of training steps and epochs to prevent unnecessarily
    # long tests.
130
    self.num_steps_per_epoch = 100
131
    self.num_epochs = 1
132

133
  @benchmark_wrappers.enable_runtime_flags
134
135
136
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0,
137
                                max_accuracy=1,
138
                                use_ds=True):
139
140
    """Starts BERT performance benchmark test."""
    start_time_sec = time.time()
141
142
    summary = self._run_bert_classifier(
        callbacks=[self.timer_callback], use_ds=use_ds)
143
144
145
146
147
    wall_time_sec = time.time() - start_time_sec

    # Since we do not load from any pretrained checkpoints, we ignore all
    # accuracy metrics.
    summary.pop('eval_metrics', None)
David Chen's avatar
David Chen committed
148
149
    summary['start_time_sec'] = start_time_sec

150
    super(BertClassifyBenchmarkReal, self)._report_benchmark(
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)

  def benchmark_1_gpu_mrpc(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

169
170
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
171
172
    self._run_and_report_benchmark(summary_path)

173
174
175
176
177
178
179
180
181
182
183
184
  def benchmark_1_gpu_mrpc_xla(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
185
    FLAGS.enable_xla = True
186

187
188
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
189
    self._run_and_report_benchmark(summary_path)
190
191
192
193
194
195
196
197
198
199
200
201
202
203

  def benchmark_1_gpu_mrpc_no_dist_strat(self):
    """Test BERT model performance with 1 GPU, no distribution strategy."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

204
205
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
206
207
    self._run_and_report_benchmark(summary_path, use_ds=False)

Jing Li's avatar
Jing Li committed
208
  @owner_utils.Owner('tf-model-garden')
209
  def benchmark_8_gpu_mrpc(self):
210
211
212
    """Test BERT model performance with 8 GPUs."""

    self._setup()
213
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
214
215
216
217
218
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file

219
220
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
221
222
    self._run_and_report_benchmark(summary_path)

Jing Li's avatar
Jing Li committed
223
  @owner_utils.Owner('tf-model-garden')
David Chen's avatar
David Chen committed
224
225
226
227
  def benchmark_2x2_tpu_mrpc(self):
    """Test BERT model performance with 2x2 TPU."""

    self._setup()
Hongkun Yu's avatar
Hongkun Yu committed
228
    FLAGS.steps_per_loop = 50
David Chen's avatar
David Chen committed
229
230
231
232
233
234
235
236
237
238
239
240
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)

241

davidmochen's avatar
davidmochen committed
242
class BertClassifyAccuracy(BertClassifyBenchmarkBase):
243
244
245
246
247
248
  """Short accuracy test for BERT model.

  Tests BERT classification task model accuracy. The naming
  convention of below test cases follow
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
249

Jing Li's avatar
Jing Li committed
250
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
251
252
253
254
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
255
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
256

Jing Li's avatar
Jing Li committed
257
    super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
258

259
  @benchmark_wrappers.enable_runtime_flags
260
261
262
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0.84,
263
                                max_accuracy=0.88):
264
265
    """Starts BERT accuracy benchmark test."""

266
    start_time_sec = time.time()
267
    summary = self._run_bert_classifier(callbacks=[self.timer_callback])
268
269
    wall_time_sec = time.time() - start_time_sec

270
271
272
273
274
    super(BertClassifyAccuracy, self)._report_benchmark(
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)
275

276
277
278
279
280
281
282
283
  def _setup(self):
    super(BertClassifyAccuracy, self)._setup()
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.init_checkpoint = self.pretrained_checkpoint_path

Jing Li's avatar
Jing Li committed
284
  @owner_utils.Owner('tf-model-garden')
285
286
287
288
289
290
291
  def benchmark_8_gpu_mrpc(self):
    """Run BERT model accuracy test with 8 GPUs.

    Due to comparatively small cardinality of  MRPC dataset, training
    accuracy metric has high variance between trainings. As so, we
    set the wide range of allowed accuracy (84% to 88%).
    """
292
    self._setup()
293
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
294

295
296
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
297
    self._run_and_report_benchmark(summary_path)
298

299
300
301
302
  def benchmark_8_gpu_mrpc_xla(self):
    """Run BERT model accuracy test with 8 GPUs with XLA."""
    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
303
    FLAGS.enable_xla = True
304
305
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
306
    self._run_and_report_benchmark(summary_path)
307

Jing Li's avatar
Jing Li committed
308
309
310
311
  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu_mrpc(self):
    """Run BERT model accuracy test on 2x2 TPU."""
    self._setup()
Hongkun Yu's avatar
Hongkun Yu committed
312
    FLAGS.steps_per_loop = 50
Jing Li's avatar
Jing Li committed
313
314
315
316
317
318
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path)

319
320
321

if __name__ == '__main__':
  tf.test.main()