bert_benchmark.py 12.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import json
22
import math
23
24
25
import os
import time

26
# pylint: disable=g-bad-import-order
27
28
from absl import flags
from absl.testing import flagsaver
29
30
import tensorflow as tf
# pylint: enable=g-bad-import-order
31

32
from official.bert import modeling
33
from official.bert import run_classifier
davidmochen's avatar
davidmochen committed
34
from official.bert.benchmark import benchmark_utils
35
from official.utils.misc import distribution_utils
36
37

# pylint: disable=line-too-long
38
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16/bert_model.ckpt'
39
40
41
CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record'
CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record'
CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data'
42
MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/tf_20/uncased_L-24_H-1024_A-16/bert_config'
43
44
45
46
47
# pylint: enable=line-too-long

FLAGS = flags.FLAGS


davidmochen's avatar
davidmochen committed
48
class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
49
50
51
  """Base class to hold methods common to test classes in the module."""

  def __init__(self, output_dir=None):
52
    super(BertClassifyBenchmarkBase, self).__init__(output_dir)
53
54
55
    self.num_epochs = None
    self.num_steps_per_epoch = None

56
  @flagsaver.flagsaver
57
  def _run_bert_classifier(self, callbacks=None, use_ds=True):
58
    """Starts BERT classification task."""
59
60
61
    with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
      input_meta_data = json.loads(reader.read().decode('utf-8'))

62
    bert_config = modeling.BertConfig.from_json_file(FLAGS.bert_config_file)
63
64
65
66
67
68
69
    epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
    if self.num_steps_per_epoch:
      steps_per_epoch = self.num_steps_per_epoch
    else:
      train_data_size = input_meta_data['train_data_size']
      steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * steps_per_epoch * 0.1)
70
71
72
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
    strategy = distribution_utils.get_distribution_strategy(
73
74
75
        distribution_strategy='mirrored' if use_ds else 'off',
        num_gpus=self.num_gpus)

76
    steps_per_loop = 1
77
78
79
80
81
82
83
84

    run_classifier.run_customized_training(
        strategy,
        bert_config,
        input_meta_data,
        FLAGS.model_dir,
        epochs,
        steps_per_epoch,
85
        steps_per_loop,
86
87
88
89
90
91
92
        eval_steps,
        warmup_steps,
        FLAGS.learning_rate,
        FLAGS.init_checkpoint,
        custom_callbacks=callbacks)


davidmochen's avatar
davidmochen committed
93
class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
94
95
96
97
98
99
  """Short benchmark performance tests for BERT model.

  Tests BERT classification performance in different GPU configurations.
  The naming convention of below test cases follow
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
100
101

  def __init__(self, output_dir=None, **kwargs):
102
103
    super(BertClassifyBenchmarkReal, self).__init__(output_dir=output_dir)

104
105
106
107
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
108

109
110
111
112
113
    # Since we only care about performance metrics, we limit
    # the number of training steps and epochs to prevent unnecessarily
    # long tests.
    self.num_steps_per_epoch = 110
    self.num_epochs = 1
114

115
116
117
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0,
118
                                max_accuracy=1,
119
                                use_ds=True):
120
121
    """Starts BERT performance benchmark test."""
    start_time_sec = time.time()
122
    self._run_bert_classifier(callbacks=[self.timer_callback], use_ds=use_ds)
123
124
125
126
127
128
129
130
    wall_time_sec = time.time() - start_time_sec

    with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
      summary = json.loads(reader.read().decode('utf-8'))

    # Since we do not load from any pretrained checkpoints, we ignore all
    # accuracy metrics.
    summary.pop('eval_metrics', None)
131
    super(BertClassifyBenchmarkReal, self)._report_benchmark(
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)

  def benchmark_1_gpu_mrpc(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
    self._run_and_report_benchmark(summary_path)

153
154
155
156
157
158
159
160
161
162
163
164
  def benchmark_1_gpu_mrpc_xla(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
165
    FLAGS.enable_xla = True
166
167

    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
168
    self._run_and_report_benchmark(summary_path)
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185

  def benchmark_1_gpu_mrpc_no_dist_strat(self):
    """Test BERT model performance with 1 GPU, no distribution strategy."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)

186
  def benchmark_2_gpu_mrpc(self):
187
188
189
190
    """Test BERT model performance with 2 GPUs."""

    self._setup()
    self.num_gpus = 2
191
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_mrpc')
192
193
194
195
196
197
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 8
    FLAGS.eval_batch_size = 8
198

199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
    self._run_and_report_benchmark(summary_path)

  def benchmark_4_gpu_mrpc(self):
    """Test BERT model performance with 4 GPUs."""

    self._setup()
    self.num_gpus = 4
    FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 16

    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
    self._run_and_report_benchmark(summary_path)

  def benchmark_8_gpu_mrpc(self):
218
219
220
    """Test BERT model performance with 8 GPUs."""

    self._setup()
221
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
222
223
224
225
226
227
228
229
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file

    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
    self._run_and_report_benchmark(summary_path)

230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
  def benchmark_1_gpu_amp_mrpc_no_dist_strat(self):
    """Test BERT model performance with 1 GPU, no distribution strategy
       with automatic mixed precision """

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_amp_mrpc_no_dist_strat')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
    FLAGS.dtype = 'fp16'
    FLAGS.fp16_implementation = 'graph_rewrite'

    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)

  def benchmark_8_gpu_amp_mrpc(self):
    """Test BERT model performance with 8 GPUs
       with automatic mixed precision """

    self._setup()
    self.num_gpus = 8
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_amp_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32
    FLAGS.dtype = 'fp16'
    FLAGS.fp16_implementation = 'graph_rewrite'

    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)
267

davidmochen's avatar
davidmochen committed
268
class BertClassifyAccuracy(BertClassifyBenchmarkBase):
269
270
271
272
273
274
  """Short accuracy test for BERT model.

  Tests BERT classification task model accuracy. The naming
  convention of below test cases follow
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
275
276
277
278
279
280

  def __init__(self, output_dir=None, **kwargs):
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
281
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
282

283
    super(BertClassifyAccuracy, self).__init__(output_dir=output_dir)
284

285
286
287
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0.84,
288
                                max_accuracy=0.88):
289
290
    """Starts BERT accuracy benchmark test."""

291
    start_time_sec = time.time()
292
    self._run_bert_classifier(callbacks=[self.timer_callback])
293
294
    wall_time_sec = time.time() - start_time_sec

295
296
297
    with tf.io.gfile.GFile(training_summary_path, 'rb') as reader:
      summary = json.loads(reader.read().decode('utf-8'))

298
299
300
301
302
    super(BertClassifyAccuracy, self)._report_benchmark(
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)
303

304
305
306
307
308
309
310
311
  def _setup(self):
    super(BertClassifyAccuracy, self)._setup()
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.init_checkpoint = self.pretrained_checkpoint_path

312
313
314
315
316
317
318
  def benchmark_8_gpu_mrpc(self):
    """Run BERT model accuracy test with 8 GPUs.

    Due to comparatively small cardinality of  MRPC dataset, training
    accuracy metric has high variance between trainings. As so, we
    set the wide range of allowed accuracy (84% to 88%).
    """
319
    self._setup()
320
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
321

322
323
    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
    self._run_and_report_benchmark(summary_path)
324

325
326
327
328
  def benchmark_8_gpu_mrpc_xla(self):
    """Run BERT model accuracy test with 8 GPUs with XLA."""
    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
329
    FLAGS.enable_xla = True
330
    summary_path = os.path.join(FLAGS.model_dir, 'training_summary.txt')
331
    self._run_and_report_benchmark(summary_path)
332

333
334
335

if __name__ == '__main__':
  tf.test.main()