"docs/vscode:/vscode.git/clone" did not exist on "6e95f5e5bd24b8d0fa269865b8a89cb3f8dc2491"
bert_benchmark.py 14.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
21
import functools
22
import json
23
import math
24
25
26
import os
import time

27
# pylint: disable=g-bad-import-order
28
29
from absl import flags
from absl.testing import flagsaver
30
import tensorflow as tf
31
# pylint: enable=g-bad-import-order
32

33
from official.benchmark import bert_benchmark_utils as benchmark_utils
34
from official.nlp.bert import configs
35
from official.nlp.bert import run_classifier
36
from official.utils.misc import distribution_utils
37
from official.utils.testing import benchmark_wrappers
38
39

# pylint: disable=line-too-long
40
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt'
41
42
43
CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record'
CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record'
CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data'
David Chen's avatar
David Chen committed
44
MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json'
45
46
# pylint: enable=line-too-long

David Chen's avatar
David Chen committed
47
TMP_DIR = os.getenv('TMPDIR')
48
49
50
FLAGS = flags.FLAGS


davidmochen's avatar
davidmochen committed
51
class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
52
53
  """Base class to hold methods common to test classes in the module."""

David Chen's avatar
David Chen committed
54
  def __init__(self, output_dir=None, tpu=None):
55
    super(BertClassifyBenchmarkBase, self).__init__(output_dir)
56
57
    self.num_epochs = None
    self.num_steps_per_epoch = None
David Chen's avatar
David Chen committed
58
    self.tpu = tpu
59

60
  @flagsaver.flagsaver
61
  def _run_bert_classifier(self, callbacks=None, use_ds=True):
62
    """Starts BERT classification task."""
63
64
65
    with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
      input_meta_data = json.loads(reader.read().decode('utf-8'))

66
    bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file)
67
68
69
70
71
72
73
    epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
    if self.num_steps_per_epoch:
      steps_per_epoch = self.num_steps_per_epoch
    else:
      train_data_size = input_meta_data['train_data_size']
      steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * steps_per_epoch * 0.1)
74
75
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
David Chen's avatar
David Chen committed
76
77
78
79
80
81
82
    if self.tpu:
      strategy = distribution_utils.get_distribution_strategy(
          distribution_strategy='tpu', tpu_address=self.tpu)
    else:
      strategy = distribution_utils.get_distribution_strategy(
          distribution_strategy='mirrored' if use_ds else 'off',
          num_gpus=self.num_gpus)
83

84
    steps_per_loop = 100
85

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
86
    max_seq_length = input_meta_data['max_seq_length']
Hongkun Yu's avatar
Hongkun Yu committed
87
    train_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
88
        FLAGS.train_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
89
90
91
92
        max_seq_length,
        FLAGS.train_batch_size,
        is_training=True)
    eval_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
93
        FLAGS.eval_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
94
95
96
        max_seq_length,
        FLAGS.eval_batch_size,
        is_training=False)
97
    run_classifier.run_bert_classifier(
98
99
100
101
102
103
        strategy,
        bert_config,
        input_meta_data,
        FLAGS.model_dir,
        epochs,
        steps_per_epoch,
104
        steps_per_loop,
105
106
107
108
        eval_steps,
        warmup_steps,
        FLAGS.learning_rate,
        FLAGS.init_checkpoint,
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
109
110
        train_input_fn,
        eval_input_fn,
111
112
113
        custom_callbacks=callbacks)


davidmochen's avatar
davidmochen committed
114
class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
115
116
  """Short benchmark performance tests for BERT model.

David Chen's avatar
David Chen committed
117
  Tests BERT classification performance in different GPU, TPU configurations.
118
  The naming convention of below test cases follow
David Chen's avatar
David Chen committed
119
120
  `benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and
  `benchmark_(topology)_tpu_(dataset type)` for TPUs.
121
  """
122

David Chen's avatar
David Chen committed
123
124
125
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
    super(BertClassifyBenchmarkReal, self).__init__(
        output_dir=output_dir, tpu=tpu)
126

127
128
129
130
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
131

132
133
134
135
136
    # Since we only care about performance metrics, we limit
    # the number of training steps and epochs to prevent unnecessarily
    # long tests.
    self.num_steps_per_epoch = 110
    self.num_epochs = 1
137

138
  @benchmark_wrappers.enable_runtime_flags
139
140
141
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0,
142
                                max_accuracy=1,
143
                                use_ds=True):
144
145
    """Starts BERT performance benchmark test."""
    start_time_sec = time.time()
146
    self._run_bert_classifier(callbacks=[self.timer_callback], use_ds=use_ds)
147
148
149
150
151
152
153
154
    wall_time_sec = time.time() - start_time_sec

    with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
      summary = json.loads(reader.read().decode('utf-8'))

    # Since we do not load from any pretrained checkpoints, we ignore all
    # accuracy metrics.
    summary.pop('eval_metrics', None)
David Chen's avatar
David Chen committed
155
156
    summary['start_time_sec'] = start_time_sec

157
    super(BertClassifyBenchmarkReal, self)._report_benchmark(
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)

  def benchmark_1_gpu_mrpc(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

176
177
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
178
179
    self._run_and_report_benchmark(summary_path)

180
181
182
183
184
185
186
187
188
189
190
191
  def benchmark_1_gpu_mrpc_xla(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
192
    FLAGS.enable_xla = True
193

194
195
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
196
    self._run_and_report_benchmark(summary_path)
197
198
199
200
201
202
203
204
205
206
207
208
209
210

  def benchmark_1_gpu_mrpc_no_dist_strat(self):
    """Test BERT model performance with 1 GPU, no distribution strategy."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

211
212
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
213
214
    self._run_and_report_benchmark(summary_path, use_ds=False)

215
  def benchmark_2_gpu_mrpc(self):
216
217
218
219
    """Test BERT model performance with 2 GPUs."""

    self._setup()
    self.num_gpus = 2
220
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_mrpc')
221
222
223
224
225
226
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 8
    FLAGS.eval_batch_size = 8
227

228
229
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
230
231
232
233
234
235
236
237
238
239
240
241
242
243
    self._run_and_report_benchmark(summary_path)

  def benchmark_4_gpu_mrpc(self):
    """Test BERT model performance with 4 GPUs."""

    self._setup()
    self.num_gpus = 4
    FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 16

244
245
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
246
247
248
    self._run_and_report_benchmark(summary_path)

  def benchmark_8_gpu_mrpc(self):
249
250
251
    """Test BERT model performance with 8 GPUs."""

    self._setup()
252
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
253
254
255
256
257
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file

258
259
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
260
261
    self._run_and_report_benchmark(summary_path)

262
  def benchmark_1_gpu_amp_mrpc_no_dist_strat(self):
263
    """Performance for 1 GPU no DS with automatic mixed precision."""
264
265
    self._setup()
    self.num_gpus = 1
266
267
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_amp_mrpc_no_dist_strat')
268
269
270
271
272
273
274
275
276
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
    FLAGS.dtype = 'fp16'
    FLAGS.fp16_implementation = 'graph_rewrite'

277
278
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
279
280
281
    self._run_and_report_benchmark(summary_path, use_ds=False)

  def benchmark_8_gpu_amp_mrpc(self):
282
283
    """Test BERT model performance with 8 GPUs with automatic mixed precision.
    """
284
285
286
287
288
289
290
291
292
293
294
295
296

    self._setup()
    self.num_gpus = 8
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32
    FLAGS.dtype = 'fp16'
    FLAGS.fp16_implementation = 'graph_rewrite'

297
298
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
299
    self._run_and_report_benchmark(summary_path, use_ds=False)
300

David Chen's avatar
David Chen committed
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
  def benchmark_2x2_tpu_mrpc(self):
    """Test BERT model performance with 2x2 TPU."""

    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)

317

davidmochen's avatar
davidmochen committed
318
class BertClassifyAccuracy(BertClassifyBenchmarkBase):
319
320
321
322
323
324
  """Short accuracy test for BERT model.

  Tests BERT classification task model accuracy. The naming
  convention of below test cases follow
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
325

David Chen's avatar
David Chen committed
326
  def __init__(self, output_dir=TMP_DIR, **kwargs):
327
328
329
330
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
331
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
332

333
    super(BertClassifyAccuracy, self).__init__(output_dir=output_dir)
334

335
  @benchmark_wrappers.enable_runtime_flags
336
337
338
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0.84,
339
                                max_accuracy=0.88):
340
341
    """Starts BERT accuracy benchmark test."""

342
    start_time_sec = time.time()
343
    self._run_bert_classifier(callbacks=[self.timer_callback])
344
345
    wall_time_sec = time.time() - start_time_sec

346
347
348
    with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
      summary = json.loads(reader.read().decode('utf-8'))

349
350
351
352
353
    super(BertClassifyAccuracy, self)._report_benchmark(
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)
354

355
356
357
358
359
360
361
362
  def _setup(self):
    super(BertClassifyAccuracy, self)._setup()
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.init_checkpoint = self.pretrained_checkpoint_path

363
364
365
366
367
368
369
  def benchmark_8_gpu_mrpc(self):
    """Run BERT model accuracy test with 8 GPUs.

    Due to comparatively small cardinality of  MRPC dataset, training
    accuracy metric has high variance between trainings. As so, we
    set the wide range of allowed accuracy (84% to 88%).
    """
370
    self._setup()
371
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
372

373
374
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
375
    self._run_and_report_benchmark(summary_path)
376

377
378
379
380
  def benchmark_8_gpu_mrpc_xla(self):
    """Run BERT model accuracy test with 8 GPUs with XLA."""
    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
381
    FLAGS.enable_xla = True
382
383
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
384
    self._run_and_report_benchmark(summary_path)
385

386
387
388

if __name__ == '__main__':
  tf.test.main()