keras_imagenet_benchmark.py 35.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import print_function

import os
19
import time
20
21

from absl import flags
22
import tensorflow as tf # pylint: disable=g-bad-import-order
23
24

from official.resnet import imagenet_main
Toby Boyd's avatar
Toby Boyd committed
25
from official.resnet.keras import keras_benchmark
26
27
28
from official.resnet.keras import keras_common
from official.resnet.keras import keras_imagenet_main

Toby Boyd's avatar
Toby Boyd committed
29
30
MIN_TOP_1_ACCURACY = 0.76
MAX_TOP_1_ACCURACY = 0.77
31

Toby Boyd's avatar
Toby Boyd committed
32
FLAGS = flags.FLAGS
33
34


Toby Boyd's avatar
Toby Boyd committed
35
36
class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Benchmark accuracy tests for ResNet50 in Keras."""
37

38
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
39
40
41
42
43
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
44
45
46
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
47
48
    """

49
    flag_methods = [
50
51
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
52
    ]
Toby Boyd's avatar
Toby Boyd committed
53

54
    self.data_dir = os.path.join(root_data_dir, 'imagenet')
55
56
    super(Resnet50KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
57

Toby Boyd's avatar
Toby Boyd committed
58
  def benchmark_graph_8_gpu(self):
59
60
    """Test Keras model with Keras fit/dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
61
    FLAGS.num_gpus = 8
62
    FLAGS.data_dir = self.data_dir
63
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
64
    FLAGS.train_epochs = 90
65
    FLAGS.epochs_between_evals = 10
66
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
67
    FLAGS.dtype = 'fp32'
68
    FLAGS.use_tensor_lr = True
69
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
70
71

  def benchmark_8_gpu(self):
72
73
    """Test Keras model with eager, dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
74
    FLAGS.num_gpus = 8
75
    FLAGS.data_dir = self.data_dir
76
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
77
    FLAGS.train_epochs = 90
78
    FLAGS.epochs_between_evals = 10
79
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
80
81
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
82
83
    # Add some thread tunings to improve performance.
    FLAGS.datasets_num_private_threads = 14
84
    FLAGS.use_tensor_lr = True
85
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
86

Reed's avatar
Reed committed
87
88
89
90
91
92
93
  def benchmark_8_gpu_fp16(self):
    """Test Keras model with eager, dist_strat, 8 GPUs, and fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
94
    FLAGS.epochs_between_evals = 10
Reed's avatar
Reed committed
95
96
97
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
98
99
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
100
    FLAGS.use_tensor_lr = True
Reed's avatar
Reed committed
101
102
103
104
105
106
107
108
109
    self._run_and_report_benchmark()

  def benchmark_xla_8_gpu_fp16(self):
    """Test Keras model with XLA, eager, dist_strat, 8 GPUs and fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
110
    FLAGS.epochs_between_evals = 10
Reed's avatar
Reed committed
111
112
113
114
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
115
116
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
117
    FLAGS.use_tensor_lr = True
Reed's avatar
Reed committed
118
119
    self._run_and_report_benchmark()

120
121
122
123
124
125
126
  def benchmark_xla_8_gpu_fp16_dynamic(self):
    """Test Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
127
    FLAGS.epochs_between_evals = 10
128
129
130
131
132
133
134
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.loss_scale = 'dynamic'
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
135
    FLAGS.use_tensor_lr = True
136
137
    self._run_and_report_benchmark()

138
139
140
141
142
143
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = keras_imagenet_main.run(flags.FLAGS)
    wall_time_sec = time.time() - start_time_sec

    super(Resnet50KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
144
        stats,
145
        wall_time_sec,
Toby Boyd's avatar
Toby Boyd committed
146
147
        top_1_min=MIN_TOP_1_ACCURACY,
        top_1_max=MAX_TOP_1_ACCURACY,
148
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
149
        log_steps=100)
150
151
152
153

  def _get_model_dir(self, folder_name):
    return os.path.join(self.output_dir, folder_name)

Toby Boyd's avatar
Toby Boyd committed
154
155
156
157
158

class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Resnet50 benchmarks."""

  def __init__(self, output_dir=None, default_flags=None):
159
    flag_methods = [
160
161
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
162
    ]
Toby Boyd's avatar
Toby Boyd committed
163
164
165
166
167
168

    super(Resnet50KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

169
170
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
Toby Boyd's avatar
Toby Boyd committed
171
    stats = keras_imagenet_main.run(FLAGS)
172
    wall_time_sec = time.time() - start_time_sec
173
174
175
    # Number of logged step time entries that are excluded in performance
    # report. We keep results from last 100 batches in this case.
    warmup = (FLAGS.train_steps - 100) // FLAGS.log_steps
176
177
178
179
180

    super(Resnet50KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
181
182
        log_steps=FLAGS.log_steps,
        warmup=warmup)
Toby Boyd's avatar
Toby Boyd committed
183
184

  def benchmark_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
185
    """Test Keras model with 1 GPU, no distribution strategy."""
Toby Boyd's avatar
Toby Boyd committed
186
187
188
189
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
190
    FLAGS.distribution_strategy = 'off'
191
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
192
    FLAGS.batch_size = 128
193
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
194

195
196
197
198
199
200
201
202
203
204
205
206
207
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
    """Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.batch_size = 64
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
208
  def benchmark_graph_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
209
    """Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
Toby Boyd's avatar
Toby Boyd committed
210
211
212
213
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
214
    FLAGS.distribution_strategy = 'off'
215
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
216
217
    FLAGS.batch_size = 96  # BatchNorm is less efficient in legacy graph mode
                           # due to its reliance on v1 cond.
218
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
219
220

  def benchmark_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
221
    """Test Keras model with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
222
223
224
225
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
226
    FLAGS.distribution_strategy = 'default'
227
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
228
    FLAGS.batch_size = 128
229
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
230

Haoyu Zhang's avatar
Haoyu Zhang committed
231
232
233
234
235
236
237
238
239
240
241
242
  def benchmark_xla_1_gpu(self):
    """Test Keras model with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
243
  def benchmark_1_gpu_fp16(self):
244
    """Test Keras model with 1 GPU and fp16."""
Reed's avatar
Reed committed
245
246
247
248
249
250
251
252
253
254
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

255
256
257
258
259
260
261
262
263
264
265
266
267
  def benchmark_1_gpu_fp16_dynamic(self):
    """Test Keras model with 1 GPU, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.loss_scale = 'dynamic'
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
268
269
270
271
272
273
274
275
276
277
278
279
280
  def benchmark_xla_1_gpu_fp16(self):
    """Test Keras model with XLA, 1 GPU and fp16."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

281
282
283
284
285
286
287
288
289
290
291
  def benchmark_xla_1_gpu_fp16_tweaked(self):
    """Test Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
292
    FLAGS.use_tensor_lr = True
293
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

  def benchmark_xla_1_gpu_fp16_slack(self):
    """Test Keras model with XLA, 1 GPU, fp16, and tf.data's experimental_slack
       functionality."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_slack')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.tf_data_experimental_slack = True
310
311
    self._run_and_report_benchmark()

312
313
314
315
316
317
318
319
320
321
322
323
324
325
  def benchmark_xla_1_gpu_fp16_dynamic(self):
    """Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.loss_scale = 'dynamic'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
326
  def benchmark_graph_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
327
    """Test Keras model in legacy graph mode with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
328
329
330
331
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
332
    FLAGS.distribution_strategy = 'default'
333
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
334
    FLAGS.batch_size = 128
335
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
336

Haoyu Zhang's avatar
Haoyu Zhang committed
337
338
339
340
341
342
343
344
345
346
347
348
  def benchmark_graph_xla_1_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

349
350
351
352
353
  def benchmark_graph_1_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 1 GPU and fp16."""
    self._setup()

    FLAGS.num_gpus = 1
354
    FLAGS.dtype = 'fp16'
355
356
357
358
359
360
361
362
363
364
365
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_fp16')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_graph_xla_1_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16 and XLA."""
    self._setup()

    FLAGS.num_gpus = 1
366
    FLAGS.dtype = 'fp16'
367
368
369
370
371
372
373
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu_fp16')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

374
375
376
377
378
379
380
381
382
383
384
385
386
387
  def benchmark_graph_xla_1_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16, XLA, and manual
       config tuning.
    """
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_1_gpu_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
388
    FLAGS.use_tensor_lr = True
389
390
391
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  def benchmark_graph_xla_1_gpu_fp16_slack(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16, XLA, and
       tf.data's experimental_slack functionality.
    """
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_1_gpu_fp16_slack')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.tf_data_experimental_slack = True
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
409
  def benchmark_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
410
    """Test Keras model with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
411
412
413
414
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
415
    FLAGS.distribution_strategy = 'default'
416
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
417
    FLAGS.batch_size = 128 * 8  # 8 GPUs
418
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
419

420
421
  def benchmark_8_gpu_cloning(self):
    """Test Keras model with 8 GPUs and cloning."""
422
423
424
425
426
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
427
428
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_cloning')
429
430
431
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    self._run_and_report_benchmark()

432
  def benchmark_8_gpu_tweaked(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
433
    """Test Keras model with manual config tuning and 8 GPUs."""
434
435
436
437
438
439
440
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 128 * 8  # 8 GPUs
441
    FLAGS.use_tensor_lr = True
442
    FLAGS.datasets_num_private_threads = 14
443
444
445
446
447
448
449
450
451
452
453
454
455
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

  def benchmark_8_gpu_slack(self):
    """Test Keras model with tf.data's experimental_slack and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_slack')
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    FLAGS.tf_data_experimental_slack = True
456
457
    self._run_and_report_benchmark()

Haoyu Zhang's avatar
Haoyu Zhang committed
458
459
460
461
462
463
464
465
466
  def benchmark_xla_8_gpu(self):
    """Test Keras model with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu')
467
    FLAGS.batch_size = 128 * 8  # 8 GPUs
Haoyu Zhang's avatar
Haoyu Zhang committed
468
469
    self._run_and_report_benchmark()

470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
  def benchmark_xla_8_gpu_tweaked(self):
    """Test Keras model with manual config tuning, 8 GPUs, and XLA."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_tweaked')
    FLAGS.batch_size = 128 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.datasets_num_private_threads = 24
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
486
  def benchmark_8_gpu_fp16(self):
487
    """Test Keras model with 8 GPUs and fp16."""
Reed's avatar
Reed committed
488
489
490
    self._setup()

    FLAGS.num_gpus = 8
491
    FLAGS.dtype = 'fp16'
Reed's avatar
Reed committed
492
493
494
495
496
497
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

498
499
500
501
502
503
504
505
506
507
508
509
510
  def benchmark_8_gpu_fp16_cloning(self):
    """Test Keras model with 8 GPUs, fp16 and cloning."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_cloning')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

511
  def benchmark_8_gpu_fp16_tweaked(self):
512
    """Test Keras model with 8 GPUs, fp16, and manual config tuning."""
513
514
515
516
517
518
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
519
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_tweaked')
520
    FLAGS.batch_size = 256 * 8  # 8 GPUs
521
    FLAGS.use_tensor_lr = True
522
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
523
    FLAGS.data_delay_prefetch = True
524
525
    self._run_and_report_benchmark()

526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
  def benchmark_8_gpu_fp16_cloning_tweaked(self):
    """Test Keras model with 8 GPUs, fp16, cloning, and manual config tuning."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_8_gpu_fp16_cloning_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

543
  def benchmark_8_gpu_fp16_dynamic_tweaked(self):
544
545
546
    """Test Keras model with 8 GPUs, fp16, dynamic loss scaling, and manual
       config tuning.
    """
547
548
549
550
551
552
553
554
555
556
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
557
    FLAGS.use_tensor_lr = True
558
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
559
    FLAGS.data_delay_prefetch = True
560
561
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
562
  def benchmark_xla_8_gpu_fp16(self):
563
    """Test Keras model with XLA, 8 GPUs and fp16."""
Reed's avatar
Reed committed
564
565
566
    self._setup()

    FLAGS.num_gpus = 8
567
    FLAGS.dtype = 'fp16'
Reed's avatar
Reed committed
568
569
570
571
572
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
573
574
575
576
577
578
579
580
581
582
583
584
585
586
    self._run_and_report_benchmark()

  def benchmark_xla_8_gpu_fp16_cloning(self):
    """Test Keras model with XLA, 8 GPUs, fp16 and cloning."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_cloning')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
Reed's avatar
Reed committed
587
588
    self._run_and_report_benchmark()

589
590
591
592
593
594
595
596
597
598
599
  def benchmark_xla_8_gpu_fp16_tweaked(self):
    """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
600
    FLAGS.use_tensor_lr = True
601
    # FLAGS.tf_gpu_thread_mode = 'gpu_private'
602
603
604
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
  def benchmark_xla_8_gpu_fp16_cloning_tweaked(self):
    """Test Keras model with manual config tuning, XLA, 8 GPUs, fp16, and
       cloning.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_cloning_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    # FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
  def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self):
    """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16. Delay
       performance measurement for stable performance on 96 vCPU platforms.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_tweaked_delay_measure')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    FLAGS.train_steps = 310
    self._run_and_report_benchmark()

645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
  def benchmark_xla_8_gpu_fp16_cloning_tweaked_delay_measure(self):
    """Test Keras model with manual config tuning, XLA, 8 GPUs, fp16, and
       cloning. Delay performance measurement for stable performance on 96 vCPU
       platforms.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_cloning_tweaked_delay_measure')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    FLAGS.train_steps = 310
    self._run_and_report_benchmark()

667
  def benchmark_xla_8_gpu_fp16_tweaked_optional_next(self):
668
669
670
    """Test Keras model with manual config tuning, XLA, 8 GPUs, fp16.

    This test also enables get_next_as_optional.
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_tweaked_optional_next')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.use_tensor_lr = True
    # FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    FLAGS.enable_get_next_as_optional = True
    self._run_and_report_benchmark()

688
  def benchmark_xla_8_gpu_fp16_slack(self):
689
690
691
    """Test Keras model with XLA, 8 GPUs and fp16.

    This test also enable tf.data's experimental_slack functionality.
692
693
694
695
696
697
698
699
700
701
702
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_slack')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.tf_data_experimental_slack = True
703
704
    self._run_and_report_benchmark()

705
706
707
708
709
710
711
712
713
714
715
716
717
  def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self):
    """Test Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
718
    FLAGS.use_tensor_lr = True
719
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
720
    FLAGS.data_delay_prefetch = True
721
722
    self._run_and_report_benchmark()

723
724
725
726
727
728
729
730
731
732
733
734
  def benchmark_xla_8_gpu_fp16_tensorboard_tweaked(self):
    """Test to track Tensorboard performance overhead."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_tensorboard_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
735
    FLAGS.use_tensor_lr = True
736
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
737
    FLAGS.data_delay_prefetch = True
738
739
740
    FLAGS.enable_tensorboard = True
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
741
  def benchmark_graph_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
742
    """Test Keras model in legacy graph mode with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
743
744
745
746
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
747
    FLAGS.distribution_strategy = 'default'
748
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
749
    FLAGS.batch_size = 128 * 8  # 8 GPUs
750
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
751

Haoyu Zhang's avatar
Haoyu Zhang committed
752
753
754
755
756
757
758
759
760
  def benchmark_graph_xla_8_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu')
761
    FLAGS.batch_size = 128 * 8  # 8 GPUs
Haoyu Zhang's avatar
Haoyu Zhang committed
762
763
    self._run_and_report_benchmark()

764
765
766
767
768
769
770
771
772
773
774
775
  def benchmark_graph_8_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

776
777
778
779
780
781
782
783
784
785
786
787
788
  def benchmark_graph_xla_8_gpu_fp16(self):
    """Test Keras model in legacy graph mode with XLA, 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

789
790
791
792
793
794
795
796
797
798
799
800
  def benchmark_graph_8_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with manual config tuning, 8 GPUs
       and fp16.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
801
    FLAGS.use_tensor_lr = True
802
803
804
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

805
806
807
808
809
810
811
812
813
814
815
816
817
818
  def benchmark_graph_xla_8_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with manual config tuning, XLA,
       8 GPUs and fp16.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
819
    FLAGS.use_tensor_lr = True
820
821
822
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
  def benchmark_graph_xla_8_gpu_fp16_tweaked_delay_measure(self):
    """Test Keras model in legacy graph mode with manual config tuning, XLA,
       8 GPUs and fp16. Delay performance measurement for stable performance
       on 96 vCPU platforms.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked_delay_measure')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.train_steps = 310
    self._run_and_report_benchmark()

843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
  def benchmark_graph_xla_8_gpu_fp16_tweaked_optional_next(self):
    """Test Keras model in legacy graph mode with manual config tuning, XLA,
       8 GPUs and fp16.

    This test also enables get_next_as_optional.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked_optional_next')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.enable_get_next_as_optional = True
    self._run_and_report_benchmark()

864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
  def benchmark_graph_xla_8_gpu_fp16_slack(self):
    """Test Keras model in legacy graph mode with tf.data's experimental_slack
       functionality, XLA, 8 GPUs and fp16.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_slack')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.tf_data_experimental_slack = True
    self._run_and_report_benchmark()

881
882
883
884
885
886
887
888
889
890
891
892
  def benchmark_graph_8_gpu_fp16_dynamic_tweaked(self):
    """Test graph Keras with config tuning, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
893
    FLAGS.use_tensor_lr = True
894
895
896
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

897
898
899
900
901
902
903
904
905
906
907
908
  def benchmark_graph_xla_8_gpu_fp16_dynamic_tweaked(self):
    """Test graph Keras with config tuning, XLA, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
909
    FLAGS.use_tensor_lr = True
910
911
912
913
    FLAGS.loss_scale = 'dynamic'
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
914
915
916
917
918
919
  def fill_report_object(self, stats):
    super(Resnet50KerasBenchmarkBase, self).fill_report_object(
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

Toby Boyd's avatar
Toby Boyd committed
920
921
922
923

class Resnet50KerasBenchmarkSynth(Resnet50KerasBenchmarkBase):
  """Resnet50 synthetic benchmark tests."""

924
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
925
926
    def_flags = {}
    def_flags['skip_eval'] = True
927
    def_flags['report_accuracy_metrics'] = False
Toby Boyd's avatar
Toby Boyd committed
928
929
930
931
    def_flags['use_synthetic_data'] = True
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

932
933
    super(Resnet50KerasBenchmarkSynth, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
Toby Boyd's avatar
Toby Boyd committed
934
935
936
937
938


class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
  """Resnet50 real data benchmark tests."""

939
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
940
941
    def_flags = {}
    def_flags['skip_eval'] = True
942
    def_flags['report_accuracy_metrics'] = False
943
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
Toby Boyd's avatar
Toby Boyd committed
944
945
946
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

947
948
    super(Resnet50KerasBenchmarkReal, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
949
950


951
class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
952
953
954
955
  """Trivial model with real data benchmark tests."""

  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
    flag_methods = [
956
957
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
958
959
960
    ]
    def_flags = {}
    def_flags['skip_eval'] = True
961
    def_flags['report_accuracy_metrics'] = False
962
963
964
965
966
967
968
    def_flags['dtype'] = 'fp16'
    def_flags['enable_xla'] = True
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
    def_flags['train_steps'] = 600
    def_flags['log_steps'] = 100
    def_flags['distribution_strategy'] = 'default'

969
    super(TrivialKerasBenchmarkReal, self).__init__(
970
971
972
973
974
975
976
977
978
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=def_flags)

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = keras_imagenet_main.run(FLAGS)
    wall_time_sec = time.time() - start_time_sec

979
    super(TrivialKerasBenchmarkReal, self)._report_benchmark(
980
981
982
983
984
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

985
986
987
988
989
990
991
992
993
994
995
  def benchmark_8_gpu_warmup(self):
    """Dummy test that runs over an epoch to warmup the machine."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_warmup')
    FLAGS.batch_size = 256
    FLAGS.train_steps = 700
    self._run_and_report_benchmark()

996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
  def benchmark_1_gpu(self):
    """Test trivial Keras model (input pipeline) with 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_graph_1_gpu(self):
    """Test trivial Keras model (input pipeline) with 1 GPU."""
    self._setup()

1010
    FLAGS.num_gpus = 1
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_8_gpu(self):
    """Test trivial Keras model (input pipeline) with 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
    FLAGS.batch_size = 256 * 8
    self._run_and_report_benchmark()

  def benchmark_8_gpu_tweaked(self):
    """Test trivial Keras model (input pipeline) with manual config tuning and
       8 GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

  def benchmark_8_gpu_slack(self):
    """Test trivial Keras model (input pipeline) with tf.data's
       experimental_slack and 8 GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_slack')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_data_experimental_slack = True
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu(self):
    """Test trivial Keras model (input pipeline) in legacy graph mode with 8
       GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
    FLAGS.batch_size = 256 * 8
    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu_tweaked(self):
    """Test trivial Keras model (input pipeline) in legacy graph mode with
       manual config tuning and 8 GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

  def fill_report_object(self, stats):
1079
    super(TrivialKerasBenchmarkReal, self).fill_report_object(
1080
1081
1082
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
1083
1084
1085
1086


if __name__ == '__main__':
  tf.test.main()