bert_benchmark.py 13.7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
21
import functools
22
import json
23
import math
24
25
26
import os
import time

27
# pylint: disable=g-bad-import-order
Hongkun Yu's avatar
Hongkun Yu committed
28

29
30
from absl import flags
from absl.testing import flagsaver
31
import tensorflow as tf
32
# pylint: enable=g-bad-import-order
33

34
from official.benchmark import bert_benchmark_utils as benchmark_utils
Jing Li's avatar
Jing Li committed
35
from official.benchmark import owner_utils
36
from official.nlp.bert import configs
37
from official.nlp.bert import run_classifier
38
from official.utils.misc import distribution_utils
39
from official.benchmark import benchmark_wrappers
40
41

# pylint: disable=line-too-long
42
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt'
43
44
45
CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record'
CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record'
CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data'
David Chen's avatar
David Chen committed
46
MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json'
47
48
# pylint: enable=line-too-long

David Chen's avatar
David Chen committed
49
TMP_DIR = os.getenv('TMPDIR')
50
51
52
FLAGS = flags.FLAGS


davidmochen's avatar
davidmochen committed
53
class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
54
55
  """Base class to hold methods common to test classes in the module."""

David Chen's avatar
David Chen committed
56
  def __init__(self, output_dir=None, tpu=None):
Hongkun Yu's avatar
Hongkun Yu committed
57
    super(BertClassifyBenchmarkBase, self).__init__(output_dir, tpu=tpu)
58
59
    self.num_epochs = None
    self.num_steps_per_epoch = None
Hongkun Yu's avatar
Hongkun Yu committed
60
    FLAGS.steps_per_loop = 1
61

62
  @flagsaver.flagsaver
63
  def _run_bert_classifier(self, callbacks=None, use_ds=True):
64
    """Starts BERT classification task."""
65
66
67
    with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
      input_meta_data = json.loads(reader.read().decode('utf-8'))

68
    bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file)
69
70
71
72
73
74
75
    epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
    if self.num_steps_per_epoch:
      steps_per_epoch = self.num_steps_per_epoch
    else:
      train_data_size = input_meta_data['train_data_size']
      steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * steps_per_epoch * 0.1)
76
77
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
Sai Ganesh Bandiatmakuri's avatar
Sai Ganesh Bandiatmakuri committed
78
    if self.tpu:
David Chen's avatar
David Chen committed
79
      strategy = distribution_utils.get_distribution_strategy(
Sai Ganesh Bandiatmakuri's avatar
Sai Ganesh Bandiatmakuri committed
80
          distribution_strategy='tpu', tpu_address=self.tpu)
David Chen's avatar
David Chen committed
81
82
83
84
    else:
      strategy = distribution_utils.get_distribution_strategy(
          distribution_strategy='mirrored' if use_ds else 'off',
          num_gpus=self.num_gpus)
85

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
86
    max_seq_length = input_meta_data['max_seq_length']
Hongkun Yu's avatar
Hongkun Yu committed
87
    train_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
88
        FLAGS.train_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
89
90
91
92
        max_seq_length,
        FLAGS.train_batch_size,
        is_training=True)
    eval_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
93
        FLAGS.eval_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
94
95
96
        max_seq_length,
        FLAGS.eval_batch_size,
        is_training=False)
97
    _, summary = run_classifier.run_bert_classifier(
98
99
100
101
102
103
        strategy,
        bert_config,
        input_meta_data,
        FLAGS.model_dir,
        epochs,
        steps_per_epoch,
Hongkun Yu's avatar
Hongkun Yu committed
104
        FLAGS.steps_per_loop,
105
106
107
108
        eval_steps,
        warmup_steps,
        FLAGS.learning_rate,
        FLAGS.init_checkpoint,
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
109
110
        train_input_fn,
        eval_input_fn,
111
        training_callbacks=False,
112
        custom_callbacks=callbacks)
113
    return summary
114
115


davidmochen's avatar
davidmochen committed
116
class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
117
118
  """Short benchmark performance tests for BERT model.

David Chen's avatar
David Chen committed
119
  Tests BERT classification performance in different GPU, TPU configurations.
120
  The naming convention of below test cases follow
David Chen's avatar
David Chen committed
121
122
  `benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and
  `benchmark_(topology)_tpu_(dataset type)` for TPUs.
123
  """
124

David Chen's avatar
David Chen committed
125
126
127
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
    super(BertClassifyBenchmarkReal, self).__init__(
        output_dir=output_dir, tpu=tpu)
128

129
130
131
132
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
133

134
135
136
    # Since we only care about performance metrics, we limit
    # the number of training steps and epochs to prevent unnecessarily
    # long tests.
137
    self.num_steps_per_epoch = 100
138
    self.num_epochs = 1
139

140
  @benchmark_wrappers.enable_runtime_flags
141
142
143
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0,
144
                                max_accuracy=1,
145
                                use_ds=True):
146
147
    """Starts BERT performance benchmark test."""
    start_time_sec = time.time()
148
149
    summary = self._run_bert_classifier(
        callbacks=[self.timer_callback], use_ds=use_ds)
150
151
152
153
154
    wall_time_sec = time.time() - start_time_sec

    # Since we do not load from any pretrained checkpoints, we ignore all
    # accuracy metrics.
    summary.pop('eval_metrics', None)
David Chen's avatar
David Chen committed
155
156
    summary['start_time_sec'] = start_time_sec

157
    super(BertClassifyBenchmarkReal, self)._report_benchmark(
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)

  def benchmark_1_gpu_mrpc(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

176
177
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
178
179
    self._run_and_report_benchmark(summary_path)

180
181
182
183
184
185
186
187
188
189
190
191
  def benchmark_1_gpu_mrpc_xla(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
192
    FLAGS.enable_xla = True
193

194
195
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
196
    self._run_and_report_benchmark(summary_path)
197
198
199
200
201
202
203
204
205
206
207
208
209
210

  def benchmark_1_gpu_mrpc_no_dist_strat(self):
    """Test BERT model performance with 1 GPU, no distribution strategy."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

211
212
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
213
214
    self._run_and_report_benchmark(summary_path, use_ds=False)

Jing Li's avatar
Jing Li committed
215
  @owner_utils.Owner('tf-model-garden')
216
  def benchmark_8_gpu_mrpc(self):
217
218
219
    """Test BERT model performance with 8 GPUs."""

    self._setup()
220
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
221
222
223
224
225
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file

226
227
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
228
229
    self._run_and_report_benchmark(summary_path)

230
  def benchmark_1_gpu_amp_mrpc_no_dist_strat(self):
231
    """Performance for 1 GPU no DS with automatic mixed precision."""
232
233
    self._setup()
    self.num_gpus = 1
234
235
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_amp_mrpc_no_dist_strat')
236
237
238
239
240
241
242
243
244
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
    FLAGS.dtype = 'fp16'
    FLAGS.fp16_implementation = 'graph_rewrite'

245
246
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
247
248
249
    self._run_and_report_benchmark(summary_path, use_ds=False)

  def benchmark_8_gpu_amp_mrpc(self):
250
    """Test BERT model performance with 8 GPUs with automatic mixed precision."""
251
252
253
254
255
256
257
258
259
260
261
262
263

    self._setup()
    self.num_gpus = 8
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32
    FLAGS.dtype = 'fp16'
    FLAGS.fp16_implementation = 'graph_rewrite'

264
265
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
266
    self._run_and_report_benchmark(summary_path, use_ds=False)
267

Jing Li's avatar
Jing Li committed
268
  @owner_utils.Owner('tf-model-garden')
David Chen's avatar
David Chen committed
269
270
271
272
  def benchmark_2x2_tpu_mrpc(self):
    """Test BERT model performance with 2x2 TPU."""

    self._setup()
Hongkun Yu's avatar
Hongkun Yu committed
273
    FLAGS.steps_per_loop = 50
David Chen's avatar
David Chen committed
274
275
276
277
278
279
280
281
282
283
284
285
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)

286

davidmochen's avatar
davidmochen committed
287
class BertClassifyAccuracy(BertClassifyBenchmarkBase):
288
289
290
291
292
293
  """Short accuracy test for BERT model.

  Tests BERT classification task model accuracy. The naming
  convention of below test cases follow
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
294

Jing Li's avatar
Jing Li committed
295
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
296
297
298
299
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
300
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
301

Jing Li's avatar
Jing Li committed
302
    super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
303

304
  @benchmark_wrappers.enable_runtime_flags
305
306
307
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0.84,
308
                                max_accuracy=0.88):
309
310
    """Starts BERT accuracy benchmark test."""

311
    start_time_sec = time.time()
312
    summary = self._run_bert_classifier(callbacks=[self.timer_callback])
313
314
    wall_time_sec = time.time() - start_time_sec

315
316
317
318
319
    super(BertClassifyAccuracy, self)._report_benchmark(
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)
320

321
322
323
324
325
326
327
328
  def _setup(self):
    super(BertClassifyAccuracy, self)._setup()
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.init_checkpoint = self.pretrained_checkpoint_path

Jing Li's avatar
Jing Li committed
329
  @owner_utils.Owner('tf-model-garden')
330
331
332
333
334
335
336
  def benchmark_8_gpu_mrpc(self):
    """Run BERT model accuracy test with 8 GPUs.

    Due to comparatively small cardinality of  MRPC dataset, training
    accuracy metric has high variance between trainings. As so, we
    set the wide range of allowed accuracy (84% to 88%).
    """
337
    self._setup()
338
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
339

340
341
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
342
    self._run_and_report_benchmark(summary_path)
343

344
345
346
347
  def benchmark_8_gpu_mrpc_xla(self):
    """Run BERT model accuracy test with 8 GPUs with XLA."""
    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
348
    FLAGS.enable_xla = True
349
350
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
351
    self._run_and_report_benchmark(summary_path)
352

Jing Li's avatar
Jing Li committed
353
354
355
356
  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu_mrpc(self):
    """Run BERT model accuracy test on 2x2 TPU."""
    self._setup()
Hongkun Yu's avatar
Hongkun Yu committed
357
    FLAGS.steps_per_loop = 50
Jing Li's avatar
Jing Li committed
358
359
360
361
362
363
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path)

364
365
366

if __name__ == '__main__':
  tf.test.main()