bert_benchmark.py 13.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
21
import functools
22
import json
23
import math
24
25
26
import os
import time

27
# pylint: disable=g-bad-import-order
28
29
from absl import flags
from absl.testing import flagsaver
30
import tensorflow as tf
31
# pylint: enable=g-bad-import-order
32

33
from official.benchmark import bert_benchmark_utils as benchmark_utils
Jing Li's avatar
Jing Li committed
34
from official.benchmark import owner_utils
35
from official.nlp.bert import configs
36
from official.nlp.bert import run_classifier
37
from official.utils.misc import distribution_utils
38
from official.benchmark import benchmark_wrappers
39
40

# pylint: disable=line-too-long
41
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt'
42
43
44
CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record'
CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record'
CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data'
David Chen's avatar
David Chen committed
45
MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json'
46
47
# pylint: enable=line-too-long

David Chen's avatar
David Chen committed
48
TMP_DIR = os.getenv('TMPDIR')
49
50
51
FLAGS = flags.FLAGS


davidmochen's avatar
davidmochen committed
52
class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
53
54
  """Base class to hold methods common to test classes in the module."""

David Chen's avatar
David Chen committed
55
  def __init__(self, output_dir=None, tpu=None):
56
    super(BertClassifyBenchmarkBase, self).__init__(output_dir)
57
58
    self.num_epochs = None
    self.num_steps_per_epoch = None
Hongkun Yu's avatar
Hongkun Yu committed
59
    FLAGS.steps_per_loop = 50
60

61
  @flagsaver.flagsaver
62
  def _run_bert_classifier(self, callbacks=None, use_ds=True):
63
    """Starts BERT classification task."""
64
65
66
    with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
      input_meta_data = json.loads(reader.read().decode('utf-8'))

67
    bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file)
68
69
70
71
72
73
74
    epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
    if self.num_steps_per_epoch:
      steps_per_epoch = self.num_steps_per_epoch
    else:
      train_data_size = input_meta_data['train_data_size']
      steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * steps_per_epoch * 0.1)
75
76
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
Sai Ganesh Bandiatmakuri's avatar
Sai Ganesh Bandiatmakuri committed
77
    if self.tpu:
David Chen's avatar
David Chen committed
78
      strategy = distribution_utils.get_distribution_strategy(
Sai Ganesh Bandiatmakuri's avatar
Sai Ganesh Bandiatmakuri committed
79
          distribution_strategy='tpu', tpu_address=self.tpu)
David Chen's avatar
David Chen committed
80
81
82
83
    else:
      strategy = distribution_utils.get_distribution_strategy(
          distribution_strategy='mirrored' if use_ds else 'off',
          num_gpus=self.num_gpus)
84

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
85
    max_seq_length = input_meta_data['max_seq_length']
Hongkun Yu's avatar
Hongkun Yu committed
86
    train_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
87
        FLAGS.train_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
88
89
90
91
        max_seq_length,
        FLAGS.train_batch_size,
        is_training=True)
    eval_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
92
        FLAGS.eval_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
93
94
95
        max_seq_length,
        FLAGS.eval_batch_size,
        is_training=False)
96
    run_classifier.run_bert_classifier(
97
98
99
100
101
102
        strategy,
        bert_config,
        input_meta_data,
        FLAGS.model_dir,
        epochs,
        steps_per_epoch,
Hongkun Yu's avatar
Hongkun Yu committed
103
        FLAGS.steps_per_loop,
104
105
106
107
        eval_steps,
        warmup_steps,
        FLAGS.learning_rate,
        FLAGS.init_checkpoint,
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
108
109
        train_input_fn,
        eval_input_fn,
110
111
112
        custom_callbacks=callbacks)


davidmochen's avatar
davidmochen committed
113
class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
114
115
  """Short benchmark performance tests for BERT model.

David Chen's avatar
David Chen committed
116
  Tests BERT classification performance in different GPU, TPU configurations.
117
  The naming convention of below test cases follow
David Chen's avatar
David Chen committed
118
119
  `benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and
  `benchmark_(topology)_tpu_(dataset type)` for TPUs.
120
  """
121

David Chen's avatar
David Chen committed
122
123
124
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
    super(BertClassifyBenchmarkReal, self).__init__(
        output_dir=output_dir, tpu=tpu)
125

126
127
128
129
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
130

131
132
133
    # Since we only care about performance metrics, we limit
    # the number of training steps and epochs to prevent unnecessarily
    # long tests.
134
    self.num_steps_per_epoch = 100
135
    self.num_epochs = 1
136

137
  @benchmark_wrappers.enable_runtime_flags
138
139
140
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0,
141
                                max_accuracy=1,
142
                                use_ds=True):
143
144
    """Starts BERT performance benchmark test."""
    start_time_sec = time.time()
145
    self._run_bert_classifier(callbacks=[self.timer_callback], use_ds=use_ds)
146
147
148
149
150
151
152
153
    wall_time_sec = time.time() - start_time_sec

    with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
      summary = json.loads(reader.read().decode('utf-8'))

    # Since we do not load from any pretrained checkpoints, we ignore all
    # accuracy metrics.
    summary.pop('eval_metrics', None)
David Chen's avatar
David Chen committed
154
155
    summary['start_time_sec'] = start_time_sec

156
    super(BertClassifyBenchmarkReal, self)._report_benchmark(
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)

  def benchmark_1_gpu_mrpc(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

175
176
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
177
178
    self._run_and_report_benchmark(summary_path)

179
180
181
182
183
184
185
186
187
188
189
190
  def benchmark_1_gpu_mrpc_xla(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
191
    FLAGS.enable_xla = True
192

193
194
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
195
    self._run_and_report_benchmark(summary_path)
196
197
198
199
200
201
202
203
204
205
206
207
208
209

  def benchmark_1_gpu_mrpc_no_dist_strat(self):
    """Test BERT model performance with 1 GPU, no distribution strategy."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

210
211
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
212
213
    self._run_and_report_benchmark(summary_path, use_ds=False)

Jing Li's avatar
Jing Li committed
214
  @owner_utils.Owner('tf-model-garden')
215
  def benchmark_8_gpu_mrpc(self):
216
217
218
    """Test BERT model performance with 8 GPUs."""

    self._setup()
219
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
220
221
222
223
224
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file

225
226
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
227
228
    self._run_and_report_benchmark(summary_path)

229
  def benchmark_1_gpu_amp_mrpc_no_dist_strat(self):
230
    """Performance for 1 GPU no DS with automatic mixed precision."""
231
232
    self._setup()
    self.num_gpus = 1
233
234
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_amp_mrpc_no_dist_strat')
235
236
237
238
239
240
241
242
243
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
    FLAGS.dtype = 'fp16'
    FLAGS.fp16_implementation = 'graph_rewrite'

244
245
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
246
247
248
    self._run_and_report_benchmark(summary_path, use_ds=False)

  def benchmark_8_gpu_amp_mrpc(self):
249
250
    """Test BERT model performance with 8 GPUs with automatic mixed precision.
    """
251
252
253
254
255
256
257
258
259
260
261
262
263

    self._setup()
    self.num_gpus = 8
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32
    FLAGS.dtype = 'fp16'
    FLAGS.fp16_implementation = 'graph_rewrite'

264
265
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
266
    self._run_and_report_benchmark(summary_path, use_ds=False)
267

Jing Li's avatar
Jing Li committed
268
  @owner_utils.Owner('tf-model-garden')
David Chen's avatar
David Chen committed
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
  def benchmark_2x2_tpu_mrpc(self):
    """Test BERT model performance with 2x2 TPU."""

    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)

285

davidmochen's avatar
davidmochen committed
286
class BertClassifyAccuracy(BertClassifyBenchmarkBase):
287
288
289
290
291
292
  """Short accuracy test for BERT model.

  Tests BERT classification task model accuracy. The naming
  convention of below test cases follow
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
293

Jing Li's avatar
Jing Li committed
294
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
295
296
297
298
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
299
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
300

Jing Li's avatar
Jing Li committed
301
    super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
302

303
  @benchmark_wrappers.enable_runtime_flags
304
305
306
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0.84,
307
                                max_accuracy=0.88):
308
309
    """Starts BERT accuracy benchmark test."""

310
    start_time_sec = time.time()
311
    self._run_bert_classifier(callbacks=[self.timer_callback])
312
313
    wall_time_sec = time.time() - start_time_sec

314
315
316
    with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
      summary = json.loads(reader.read().decode('utf-8'))

317
318
319
320
321
    super(BertClassifyAccuracy, self)._report_benchmark(
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)
322

323
324
325
326
327
328
329
330
  def _setup(self):
    super(BertClassifyAccuracy, self)._setup()
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.init_checkpoint = self.pretrained_checkpoint_path

Jing Li's avatar
Jing Li committed
331
  @owner_utils.Owner('tf-model-garden')
332
333
334
335
336
337
338
  def benchmark_8_gpu_mrpc(self):
    """Run BERT model accuracy test with 8 GPUs.

    Due to comparatively small cardinality of  MRPC dataset, training
    accuracy metric has high variance between trainings. As so, we
    set the wide range of allowed accuracy (84% to 88%).
    """
339
    self._setup()
340
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
341

342
343
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
344
    self._run_and_report_benchmark(summary_path)
345

346
347
348
349
  def benchmark_8_gpu_mrpc_xla(self):
    """Run BERT model accuracy test with 8 GPUs with XLA."""
    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
350
    FLAGS.enable_xla = True
351
352
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
353
    self._run_and_report_benchmark(summary_path)
354

Jing Li's avatar
Jing Li committed
355
356
357
358
359
360
361
362
363
364
  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu_mrpc(self):
    """Run BERT model accuracy test on 2x2 TPU."""
    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path)

365
366
367

if __name__ == '__main__':
  tf.test.main()