bert_benchmark.py 12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes BERT benchmarks and accuracy tests."""

import json
18
import math
19
20
21
22
23
import os
import time

from absl import flags
from absl.testing import flagsaver
24
import tensorflow as tf
25

26
from official.benchmark import benchmark_wrappers
27
from official.benchmark import bert_benchmark_utils as benchmark_utils
Jing Li's avatar
Jing Li committed
28
from official.benchmark import owner_utils
29
from official.common import distribute_utils
Le Hou's avatar
Le Hou committed
30
31
from official.legacy.bert import configs
from official.legacy.bert import run_classifier
32
33

# pylint: disable=line-too-long
34
PRETRAINED_CHECKPOINT_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_model.ckpt'
35
36
37
CLASSIFIER_TRAIN_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_train.tf_record'
CLASSIFIER_EVAL_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_eval.tf_record'
CLASSIFIER_INPUT_META_DATA_PATH = 'gs://tf-perfzero-data/bert/classification/mrpc_meta_data'
David Chen's avatar
David Chen committed
38
MODEL_CONFIG_FILE_PATH = 'gs://cloud-tpu-checkpoints/bert/keras_bert/uncased_L-24_H-1024_A-16/bert_config.json'
39
40
# pylint: enable=line-too-long

David Chen's avatar
David Chen committed
41
TMP_DIR = os.getenv('TMPDIR')
42
43
44
FLAGS = flags.FLAGS


davidmochen's avatar
davidmochen committed
45
class BertClassifyBenchmarkBase(benchmark_utils.BertBenchmarkBase):
46
47
  """Base class to hold methods common to test classes in the module."""

David Chen's avatar
David Chen committed
48
  def __init__(self, output_dir=None, tpu=None):
Hongkun Yu's avatar
Hongkun Yu committed
49
    super(BertClassifyBenchmarkBase, self).__init__(output_dir, tpu=tpu)
50
51
    self.num_epochs = None
    self.num_steps_per_epoch = None
Hongkun Yu's avatar
Hongkun Yu committed
52
    FLAGS.steps_per_loop = 1
53

54
  @flagsaver.flagsaver
55
  def _run_bert_classifier(self, callbacks=None, use_ds=True):
56
    """Starts BERT classification task."""
57
58
59
    with tf.io.gfile.GFile(FLAGS.input_meta_data_path, 'rb') as reader:
      input_meta_data = json.loads(reader.read().decode('utf-8'))

60
    bert_config = configs.BertConfig.from_json_file(FLAGS.bert_config_file)
61
62
63
64
65
66
67
    epochs = self.num_epochs if self.num_epochs else FLAGS.num_train_epochs
    if self.num_steps_per_epoch:
      steps_per_epoch = self.num_steps_per_epoch
    else:
      train_data_size = input_meta_data['train_data_size']
      steps_per_epoch = int(train_data_size / FLAGS.train_batch_size)
    warmup_steps = int(epochs * steps_per_epoch * 0.1)
68
69
    eval_steps = int(
        math.ceil(input_meta_data['eval_data_size'] / FLAGS.eval_batch_size))
Sai Ganesh Bandiatmakuri's avatar
Sai Ganesh Bandiatmakuri committed
70
    if self.tpu:
71
      strategy = distribute_utils.get_distribution_strategy(
Sai Ganesh Bandiatmakuri's avatar
Sai Ganesh Bandiatmakuri committed
72
          distribution_strategy='tpu', tpu_address=self.tpu)
David Chen's avatar
David Chen committed
73
    else:
74
      strategy = distribute_utils.get_distribution_strategy(
David Chen's avatar
David Chen committed
75
76
          distribution_strategy='mirrored' if use_ds else 'off',
          num_gpus=self.num_gpus)
77

Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
78
    max_seq_length = input_meta_data['max_seq_length']
Hongkun Yu's avatar
Hongkun Yu committed
79
    train_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
80
        FLAGS.train_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
81
82
83
84
        max_seq_length,
        FLAGS.train_batch_size,
        is_training=True)
    eval_input_fn = run_classifier.get_dataset_fn(
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
85
        FLAGS.eval_data_path,
Hongkun Yu's avatar
Hongkun Yu committed
86
87
88
        max_seq_length,
        FLAGS.eval_batch_size,
        is_training=False)
89
    _, summary = run_classifier.run_bert_classifier(
90
91
92
93
94
95
        strategy,
        bert_config,
        input_meta_data,
        FLAGS.model_dir,
        epochs,
        steps_per_epoch,
Hongkun Yu's avatar
Hongkun Yu committed
96
        FLAGS.steps_per_loop,
97
98
99
100
        eval_steps,
        warmup_steps,
        FLAGS.learning_rate,
        FLAGS.init_checkpoint,
Rajagopal Ananthanarayanan's avatar
Rajagopal Ananthanarayanan committed
101
102
        train_input_fn,
        eval_input_fn,
103
        training_callbacks=False,
104
        custom_callbacks=callbacks)
105
    return summary
106
107


davidmochen's avatar
davidmochen committed
108
class BertClassifyBenchmarkReal(BertClassifyBenchmarkBase):
109
110
  """Short benchmark performance tests for BERT model.

David Chen's avatar
David Chen committed
111
  Tests BERT classification performance in different GPU, TPU configurations.
112
  The naming convention of below test cases follow
David Chen's avatar
David Chen committed
113
114
  `benchmark_(number of gpus)_gpu_(dataset type)` for GPUs and
  `benchmark_(topology)_tpu_(dataset type)` for TPUs.
115
  """
116

David Chen's avatar
David Chen committed
117
118
119
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
    super(BertClassifyBenchmarkReal, self).__init__(
        output_dir=output_dir, tpu=tpu)
120

121
122
123
124
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
125

126
127
128
    # Since we only care about performance metrics, we limit
    # the number of training steps and epochs to prevent unnecessarily
    # long tests.
129
    self.num_steps_per_epoch = 100
130
    self.num_epochs = 1
131

132
  @benchmark_wrappers.enable_runtime_flags
133
134
135
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0,
136
                                max_accuracy=1,
137
                                use_ds=True):
138
139
    """Starts BERT performance benchmark test."""
    start_time_sec = time.time()
140
141
    summary = self._run_bert_classifier(
        callbacks=[self.timer_callback], use_ds=use_ds)
142
143
144
145
146
    wall_time_sec = time.time() - start_time_sec

    # Since we do not load from any pretrained checkpoints, we ignore all
    # accuracy metrics.
    summary.pop('eval_metrics', None)
David Chen's avatar
David Chen committed
147
148
    summary['start_time_sec'] = start_time_sec

149
    super(BertClassifyBenchmarkReal, self)._report_benchmark(
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)

  def benchmark_1_gpu_mrpc(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

168
169
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
170
171
    self._run_and_report_benchmark(summary_path)

172
173
174
175
176
177
178
179
180
181
182
183
  def benchmark_1_gpu_mrpc_xla(self):
    """Test BERT model performance with 1 GPU."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_xla')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4
184
    FLAGS.enable_xla = True
185

186
187
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
188
    self._run_and_report_benchmark(summary_path)
189
190
191
192
193
194
195
196
197
198
199
200
201
202

  def benchmark_1_gpu_mrpc_no_dist_strat(self):
    """Test BERT model performance with 1 GPU, no distribution strategy."""

    self._setup()
    self.num_gpus = 1
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_mrpc_no_dist_strat')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 4
    FLAGS.eval_batch_size = 4

203
204
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
205
206
    self._run_and_report_benchmark(summary_path, use_ds=False)

Jing Li's avatar
Jing Li committed
207
  @owner_utils.Owner('tf-model-garden')
208
  def benchmark_8_gpu_mrpc(self):
209
210
211
    """Test BERT model performance with 8 GPUs."""

    self._setup()
212
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
213
214
215
216
217
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file

218
219
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
220
221
    self._run_and_report_benchmark(summary_path)

Jing Li's avatar
Jing Li committed
222
  @owner_utils.Owner('tf-model-garden')
David Chen's avatar
David Chen committed
223
224
225
226
  def benchmark_2x2_tpu_mrpc(self):
    """Test BERT model performance with 2x2 TPU."""

    self._setup()
Hongkun Yu's avatar
Hongkun Yu committed
227
    FLAGS.steps_per_loop = 50
David Chen's avatar
David Chen committed
228
229
230
231
232
233
234
235
236
237
238
239
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.train_batch_size = 32
    FLAGS.eval_batch_size = 32

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path, use_ds=False)

240

davidmochen's avatar
davidmochen committed
241
class BertClassifyAccuracy(BertClassifyBenchmarkBase):
242
243
244
245
246
247
  """Short accuracy test for BERT model.

  Tests BERT classification task model accuracy. The naming
  convention of below test cases follow
  `benchmark_(number of gpus)_gpu_(dataset type)` format.
  """
248

Jing Li's avatar
Jing Li committed
249
  def __init__(self, output_dir=TMP_DIR, tpu=None, **kwargs):
250
251
252
253
    self.train_data_path = CLASSIFIER_TRAIN_DATA_PATH
    self.eval_data_path = CLASSIFIER_EVAL_DATA_PATH
    self.bert_config_file = MODEL_CONFIG_FILE_PATH
    self.input_meta_data_path = CLASSIFIER_INPUT_META_DATA_PATH
254
    self.pretrained_checkpoint_path = PRETRAINED_CHECKPOINT_PATH
255

Jing Li's avatar
Jing Li committed
256
    super(BertClassifyAccuracy, self).__init__(output_dir=output_dir, tpu=tpu)
257

258
  @benchmark_wrappers.enable_runtime_flags
259
260
261
  def _run_and_report_benchmark(self,
                                training_summary_path,
                                min_accuracy=0.84,
262
                                max_accuracy=0.88):
263
264
    """Starts BERT accuracy benchmark test."""

265
    start_time_sec = time.time()
266
    summary = self._run_bert_classifier(callbacks=[self.timer_callback])
267
268
    wall_time_sec = time.time() - start_time_sec

269
270
271
272
273
    super(BertClassifyAccuracy, self)._report_benchmark(
        stats=summary,
        wall_time_sec=wall_time_sec,
        min_accuracy=min_accuracy,
        max_accuracy=max_accuracy)
274

275
276
277
278
279
280
281
282
  def _setup(self):
    super(BertClassifyAccuracy, self)._setup()
    FLAGS.train_data_path = self.train_data_path
    FLAGS.eval_data_path = self.eval_data_path
    FLAGS.input_meta_data_path = self.input_meta_data_path
    FLAGS.bert_config_file = self.bert_config_file
    FLAGS.init_checkpoint = self.pretrained_checkpoint_path

Jing Li's avatar
Jing Li committed
283
  @owner_utils.Owner('tf-model-garden')
284
285
286
287
288
289
290
  def benchmark_8_gpu_mrpc(self):
    """Run BERT model accuracy test with 8 GPUs.

    Due to comparatively small cardinality of  MRPC dataset, training
    accuracy metric has high variance between trainings. As so, we
    set the wide range of allowed accuracy (84% to 88%).
    """
291
    self._setup()
292
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc')
293

294
295
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
296
    self._run_and_report_benchmark(summary_path)
297

298
299
300
301
  def benchmark_8_gpu_mrpc_xla(self):
    """Run BERT model accuracy test with 8 GPUs with XLA."""
    self._setup()
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mrpc_xla')
302
    FLAGS.enable_xla = True
303
304
    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
305
    self._run_and_report_benchmark(summary_path)
306

Jing Li's avatar
Jing Li committed
307
308
309
310
  @owner_utils.Owner('tf-model-garden')
  def benchmark_2x2_tpu_mrpc(self):
    """Run BERT model accuracy test on 2x2 TPU."""
    self._setup()
Hongkun Yu's avatar
Hongkun Yu committed
311
    FLAGS.steps_per_loop = 50
Jing Li's avatar
Jing Li committed
312
313
314
315
316
317
    FLAGS.model_dir = self._get_model_dir('benchmark_2x2_tpu_mrpc')

    summary_path = os.path.join(FLAGS.model_dir,
                                'summaries/training_summary.txt')
    self._run_and_report_benchmark(summary_path)

318
319
320

if __name__ == '__main__':
  tf.test.main()