keras_imagenet_benchmark.py 35.2 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import print_function

import os
19
import time
20
21

from absl import flags
22
import tensorflow as tf # pylint: disable=g-bad-import-order
23
24

from official.resnet import imagenet_main
Toby Boyd's avatar
Toby Boyd committed
25
from official.resnet.keras import keras_benchmark
26
27
28
from official.resnet.keras import keras_common
from official.resnet.keras import keras_imagenet_main

Toby Boyd's avatar
Toby Boyd committed
29
30
MIN_TOP_1_ACCURACY = 0.76
MAX_TOP_1_ACCURACY = 0.77
31

Toby Boyd's avatar
Toby Boyd committed
32
FLAGS = flags.FLAGS
33
34


Toby Boyd's avatar
Toby Boyd committed
35
36
class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Benchmark accuracy tests for ResNet50 in Keras."""
37

38
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
39
40
41
42
43
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
44
45
46
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
47
48
    """

49
    flag_methods = [
50
51
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
52
    ]
Toby Boyd's avatar
Toby Boyd committed
53

54
    self.data_dir = os.path.join(root_data_dir, 'imagenet')
55
56
    super(Resnet50KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
57

Toby Boyd's avatar
Toby Boyd committed
58
  def benchmark_graph_8_gpu(self):
59
60
    """Test Keras model with Keras fit/dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
61
    FLAGS.num_gpus = 8
62
    FLAGS.data_dir = self.data_dir
63
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
64
    FLAGS.train_epochs = 90
65
    FLAGS.epochs_between_evals = 10
66
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
67
    FLAGS.dtype = 'fp32'
68
    FLAGS.use_tensor_lr = True
69
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
70
71

  def benchmark_8_gpu(self):
72
73
    """Test Keras model with eager, dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
74
    FLAGS.num_gpus = 8
75
    FLAGS.data_dir = self.data_dir
76
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
77
    FLAGS.train_epochs = 90
78
    FLAGS.epochs_between_evals = 10
79
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
80
81
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
82
83
    # Add some thread tunings to improve performance.
    FLAGS.datasets_num_private_threads = 14
84
    FLAGS.use_tensor_lr = True
85
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
86

Reed's avatar
Reed committed
87
88
89
90
91
92
93
  def benchmark_8_gpu_fp16(self):
    """Test Keras model with eager, dist_strat, 8 GPUs, and fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
94
    FLAGS.epochs_between_evals = 10
Reed's avatar
Reed committed
95
96
97
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
98
99
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
100
    FLAGS.use_tensor_lr = True
Reed's avatar
Reed committed
101
102
103
104
105
106
107
108
109
    self._run_and_report_benchmark()

  def benchmark_xla_8_gpu_fp16(self):
    """Test Keras model with XLA, eager, dist_strat, 8 GPUs and fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
110
    FLAGS.epochs_between_evals = 10
Reed's avatar
Reed committed
111
112
113
114
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
115
116
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
117
    FLAGS.use_tensor_lr = True
Reed's avatar
Reed committed
118
119
    self._run_and_report_benchmark()

120
121
122
123
124
125
126
  def benchmark_xla_8_gpu_fp16_dynamic(self):
    """Test Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
127
    FLAGS.epochs_between_evals = 10
128
129
130
131
132
133
134
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.loss_scale = 'dynamic'
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
135
    FLAGS.use_tensor_lr = True
136
137
    self._run_and_report_benchmark()

138
139
140
141
142
143
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = keras_imagenet_main.run(flags.FLAGS)
    wall_time_sec = time.time() - start_time_sec

    super(Resnet50KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
144
        stats,
145
        wall_time_sec,
Toby Boyd's avatar
Toby Boyd committed
146
147
        top_1_min=MIN_TOP_1_ACCURACY,
        top_1_max=MAX_TOP_1_ACCURACY,
148
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
149
        log_steps=100)
150
151
152
153

  def _get_model_dir(self, folder_name):
    return os.path.join(self.output_dir, folder_name)

Toby Boyd's avatar
Toby Boyd committed
154
155
156
157
158

class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Resnet50 benchmarks."""

  def __init__(self, output_dir=None, default_flags=None):
159
    flag_methods = [
160
161
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
162
    ]
Toby Boyd's avatar
Toby Boyd committed
163
164
165
166
167
168

    super(Resnet50KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

169
170
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
Toby Boyd's avatar
Toby Boyd committed
171
    stats = keras_imagenet_main.run(FLAGS)
172
    wall_time_sec = time.time() - start_time_sec
173
174
175
    # Number of logged step time entries that are excluded in performance
    # report. We keep results from last 100 batches in this case.
    warmup = (FLAGS.train_steps - 100) // FLAGS.log_steps
176
177
178
179
180

    super(Resnet50KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
181
182
        log_steps=FLAGS.log_steps,
        warmup=warmup)
Toby Boyd's avatar
Toby Boyd committed
183
184

  def benchmark_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
185
    """Test Keras model with 1 GPU, no distribution strategy."""
Toby Boyd's avatar
Toby Boyd committed
186
187
188
189
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
190
    FLAGS.distribution_strategy = 'off'
191
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
192
    FLAGS.batch_size = 128
193
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
194

195
196
197
198
199
200
201
202
203
204
205
206
207
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
    """Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.batch_size = 64
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
208
  def benchmark_graph_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
209
    """Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
Toby Boyd's avatar
Toby Boyd committed
210
211
212
213
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
214
    FLAGS.distribution_strategy = 'off'
215
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
216
217
    FLAGS.batch_size = 96  # BatchNorm is less efficient in legacy graph mode
                           # due to its reliance on v1 cond.
218
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
219
220

  def benchmark_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
221
    """Test Keras model with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
222
223
224
225
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
226
    FLAGS.distribution_strategy = 'default'
227
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
228
    FLAGS.batch_size = 128
229
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
230

Haoyu Zhang's avatar
Haoyu Zhang committed
231
232
233
234
235
236
237
238
239
240
241
242
  def benchmark_xla_1_gpu(self):
    """Test Keras model with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
243
  def benchmark_1_gpu_fp16(self):
244
    """Test Keras model with 1 GPU and fp16."""
Reed's avatar
Reed committed
245
246
247
248
249
250
251
252
253
254
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

255
256
257
258
259
260
261
262
263
264
265
266
267
  def benchmark_1_gpu_fp16_dynamic(self):
    """Test Keras model with 1 GPU, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.loss_scale = 'dynamic'
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
268
269
270
271
272
273
274
275
276
277
278
279
280
  def benchmark_xla_1_gpu_fp16(self):
    """Test Keras model with XLA, 1 GPU and fp16."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

281
282
283
284
285
286
287
288
289
290
291
  def benchmark_xla_1_gpu_fp16_tweaked(self):
    """Test Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
292
    FLAGS.use_tensor_lr = True
293
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

  def benchmark_xla_1_gpu_fp16_slack(self):
    """Test Keras model with XLA, 1 GPU, fp16, and tf.data's experimental_slack
       functionality."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_slack')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.tf_data_experimental_slack = True
310
311
    self._run_and_report_benchmark()

312
313
314
315
316
317
318
319
320
321
322
323
324
325
  def benchmark_xla_1_gpu_fp16_dynamic(self):
    """Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.loss_scale = 'dynamic'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
326
  def benchmark_graph_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
327
    """Test Keras model in legacy graph mode with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
328
329
330
331
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
332
    FLAGS.distribution_strategy = 'default'
333
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
334
    FLAGS.batch_size = 128
335
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
336

Haoyu Zhang's avatar
Haoyu Zhang committed
337
338
339
340
341
342
343
344
345
346
347
348
  def benchmark_graph_xla_1_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

349
350
351
352
353
  def benchmark_graph_1_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 1 GPU and fp16."""
    self._setup()

    FLAGS.num_gpus = 1
354
    FLAGS.dtype = 'fp16'
355
356
357
358
359
360
361
362
363
364
365
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_fp16')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_graph_xla_1_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16 and XLA."""
    self._setup()

    FLAGS.num_gpus = 1
366
    FLAGS.dtype = 'fp16'
367
368
369
370
371
372
373
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu_fp16')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

374
375
376
377
378
379
380
381
382
383
384
385
386
387
  def benchmark_graph_xla_1_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16, XLA, and manual
       config tuning.
    """
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_1_gpu_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
388
    FLAGS.use_tensor_lr = True
389
390
391
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  def benchmark_graph_xla_1_gpu_fp16_slack(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16, XLA, and
       tf.data's experimental_slack functionality.
    """
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_1_gpu_fp16_slack')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.tf_data_experimental_slack = True
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
409
  def benchmark_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
410
    """Test Keras model with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
411
412
413
414
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
415
    FLAGS.distribution_strategy = 'default'
416
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
417
    FLAGS.batch_size = 128 * 8  # 8 GPUs
418
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
419

420
421
  def benchmark_8_gpu_cloning(self):
    """Test Keras model with 8 GPUs and cloning."""
422
423
424
425
426
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
427
428
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_cloning')
429
430
431
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    self._run_and_report_benchmark()

432
  def benchmark_8_gpu_tweaked(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
433
    """Test Keras model with manual config tuning and 8 GPUs."""
434
435
436
437
438
439
440
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 128 * 8  # 8 GPUs
441
    FLAGS.use_tensor_lr = True
442
    FLAGS.datasets_num_private_threads = 14
443
444
445
446
447
448
449
450
451
452
453
454
455
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

  def benchmark_8_gpu_slack(self):
    """Test Keras model with tf.data's experimental_slack and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_slack')
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    FLAGS.tf_data_experimental_slack = True
456
457
    self._run_and_report_benchmark()

Haoyu Zhang's avatar
Haoyu Zhang committed
458
459
460
461
462
463
464
465
466
  def benchmark_xla_8_gpu(self):
    """Test Keras model with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu')
467
    FLAGS.batch_size = 128 * 8  # 8 GPUs
Haoyu Zhang's avatar
Haoyu Zhang committed
468
469
    self._run_and_report_benchmark()

470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
  def benchmark_xla_8_gpu_tweaked(self):
    """Test Keras model with manual config tuning, 8 GPUs, and XLA."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_tweaked')
    FLAGS.batch_size = 128 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.datasets_num_private_threads = 24
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
486
  def benchmark_8_gpu_fp16(self):
487
    """Test Keras model with 8 GPUs and fp16."""
Reed's avatar
Reed committed
488
489
490
    self._setup()

    FLAGS.num_gpus = 8
491
    FLAGS.dtype = 'fp16'
Reed's avatar
Reed committed
492
493
494
495
496
497
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

498
499
500
501
502
503
504
505
506
507
508
509
510
  def benchmark_8_gpu_fp16_cloning(self):
    """Test Keras model with 8 GPUs, fp16 and cloning."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_cloning')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

511
  def benchmark_8_gpu_fp16_tweaked(self):
512
    """Test Keras model with 8 GPUs, fp16, and manual config tuning."""
513
514
515
516
517
518
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
519
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_tweaked')
520
    FLAGS.batch_size = 256 * 8  # 8 GPUs
521
    FLAGS.use_tensor_lr = True
522
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
523
    FLAGS.data_delay_prefetch = True
524
525
    self._run_and_report_benchmark()

526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
  def benchmark_8_gpu_fp16_cloning_tweaked(self):
    """Test Keras model with 8 GPUs, fp16, cloning, and manual config tuning."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_8_gpu_fp16_cloning_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

543
  def benchmark_8_gpu_fp16_dynamic_tweaked(self):
544
545
546
    """Test Keras model with 8 GPUs, fp16, dynamic loss scaling, and manual
       config tuning.
    """
547
548
549
550
551
552
553
554
555
556
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
557
    FLAGS.use_tensor_lr = True
558
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
559
    FLAGS.data_delay_prefetch = True
560
561
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
562
  def benchmark_xla_8_gpu_fp16(self):
563
    """Test Keras model with XLA, 8 GPUs and fp16."""
Reed's avatar
Reed committed
564
565
566
    self._setup()

    FLAGS.num_gpus = 8
567
    FLAGS.dtype = 'fp16'
Reed's avatar
Reed committed
568
569
570
571
572
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
573
574
575
576
577
578
579
580
581
582
583
584
585
586
    self._run_and_report_benchmark()

  def benchmark_xla_8_gpu_fp16_cloning(self):
    """Test Keras model with XLA, 8 GPUs, fp16 and cloning."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_cloning')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
Reed's avatar
Reed committed
587
588
    self._run_and_report_benchmark()

589
590
591
592
593
594
595
596
597
598
599
  def benchmark_xla_8_gpu_fp16_tweaked(self):
    """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
600
    FLAGS.use_tensor_lr = True
601
    # FLAGS.tf_gpu_thread_mode = 'gpu_private'
602
603
604
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

605
  def benchmark_xla_8_gpu_fp16_cloning_tweaked(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
606
    """Test with manual config tuning, XLA, 8 GPUs, fp16, and cloning."""
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_cloning_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    # FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

623
  def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
624
625
626
    """Test with manual config tuning, XLA, 8 GPUs and fp16.

    Delay performance measurement for stable performance on 96 vCPU platforms.
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_tweaked_delay_measure')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    FLAGS.train_steps = 310
    self._run_and_report_benchmark()

644
  def benchmark_xla_8_gpu_fp16_cloning_tweaked_delay_measure(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
645
646
647
    """Test with manual config tuning, XLA, 8 GPUs, fp16, and cloning.

    Delay performance measurement for stable performance on 96 vCPU platforms.
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.clone_model_in_keras_dist_strat = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_cloning_tweaked_delay_measure')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    FLAGS.train_steps = 310
    self._run_and_report_benchmark()

666
  def benchmark_xla_8_gpu_fp16_tweaked_optional_next(self):
667
668
669
    """Test Keras model with manual config tuning, XLA, 8 GPUs, fp16.

    This test also enables get_next_as_optional.
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_tweaked_optional_next')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.use_tensor_lr = True
    # FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_delay_prefetch = True
    FLAGS.enable_get_next_as_optional = True
    self._run_and_report_benchmark()

687
  def benchmark_xla_8_gpu_fp16_slack(self):
688
689
690
    """Test Keras model with XLA, 8 GPUs and fp16.

    This test also enable tf.data's experimental_slack functionality.
691
692
693
694
695
696
697
698
699
700
701
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_slack')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.tf_data_experimental_slack = True
702
703
    self._run_and_report_benchmark()

704
705
706
707
708
709
710
711
712
713
714
715
716
  def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self):
    """Test Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
717
    FLAGS.use_tensor_lr = True
718
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
719
    FLAGS.data_delay_prefetch = True
720
721
    self._run_and_report_benchmark()

722
723
724
725
726
727
728
729
730
731
732
733
  def benchmark_xla_8_gpu_fp16_tensorboard_tweaked(self):
    """Test to track Tensorboard performance overhead."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_tensorboard_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
734
    FLAGS.use_tensor_lr = True
735
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
736
    FLAGS.data_delay_prefetch = True
737
738
739
    FLAGS.enable_tensorboard = True
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
740
  def benchmark_graph_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
741
    """Test Keras model in legacy graph mode with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
742
743
744
745
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
746
    FLAGS.distribution_strategy = 'default'
747
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
748
    FLAGS.batch_size = 128 * 8  # 8 GPUs
749
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
750

Haoyu Zhang's avatar
Haoyu Zhang committed
751
752
753
754
755
756
757
758
759
  def benchmark_graph_xla_8_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu')
760
    FLAGS.batch_size = 128 * 8  # 8 GPUs
Haoyu Zhang's avatar
Haoyu Zhang committed
761
762
    self._run_and_report_benchmark()

763
764
765
766
767
768
769
770
771
772
773
774
  def benchmark_graph_8_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

775
776
777
778
779
780
781
782
783
784
785
786
787
  def benchmark_graph_xla_8_gpu_fp16(self):
    """Test Keras model in legacy graph mode with XLA, 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

788
789
790
791
792
793
794
795
796
797
798
799
  def benchmark_graph_8_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with manual config tuning, 8 GPUs
       and fp16.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
800
    FLAGS.use_tensor_lr = True
801
802
803
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

804
805
806
807
808
809
810
811
812
813
814
815
816
817
  def benchmark_graph_xla_8_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with manual config tuning, XLA,
       8 GPUs and fp16.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
818
    FLAGS.use_tensor_lr = True
819
820
821
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

822
  def benchmark_graph_xla_8_gpu_fp16_tweaked_delay_measure(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
823
824
825
    """Test in legacy graph mode with manual config tuning, XLA, 8 GPUs, fp16.

    Delay performance measurement for stable performance on 96 vCPU platforms.
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked_delay_measure')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.train_steps = 310
    self._run_and_report_benchmark()

842
  def benchmark_graph_xla_8_gpu_fp16_tweaked_optional_next(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
843
    """Test in legacy graph mode with manual config tuning, XLA, 8 GPUs, fp16.
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861

    This test also enables get_next_as_optional.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked_optional_next')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.enable_get_next_as_optional = True
    self._run_and_report_benchmark()

862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
  def benchmark_graph_xla_8_gpu_fp16_slack(self):
    """Test Keras model in legacy graph mode with tf.data's experimental_slack
       functionality, XLA, 8 GPUs and fp16.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_slack')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.tf_data_experimental_slack = True
    self._run_and_report_benchmark()

879
880
881
882
883
884
885
886
887
888
889
890
  def benchmark_graph_8_gpu_fp16_dynamic_tweaked(self):
    """Test graph Keras with config tuning, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
891
    FLAGS.use_tensor_lr = True
892
893
894
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

895
896
897
898
899
900
901
902
903
904
905
906
  def benchmark_graph_xla_8_gpu_fp16_dynamic_tweaked(self):
    """Test graph Keras with config tuning, XLA, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
907
    FLAGS.use_tensor_lr = True
908
909
910
911
    FLAGS.loss_scale = 'dynamic'
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
912
913
914
915
916
917
  def fill_report_object(self, stats):
    super(Resnet50KerasBenchmarkBase, self).fill_report_object(
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

Toby Boyd's avatar
Toby Boyd committed
918
919
920
921

class Resnet50KerasBenchmarkSynth(Resnet50KerasBenchmarkBase):
  """Resnet50 synthetic benchmark tests."""

922
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
923
924
    def_flags = {}
    def_flags['skip_eval'] = True
925
    def_flags['report_accuracy_metrics'] = False
Toby Boyd's avatar
Toby Boyd committed
926
927
928
929
    def_flags['use_synthetic_data'] = True
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

930
931
    super(Resnet50KerasBenchmarkSynth, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
Toby Boyd's avatar
Toby Boyd committed
932
933
934
935
936


class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
  """Resnet50 real data benchmark tests."""

937
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
938
939
    def_flags = {}
    def_flags['skip_eval'] = True
940
    def_flags['report_accuracy_metrics'] = False
941
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
Toby Boyd's avatar
Toby Boyd committed
942
943
944
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

945
946
    super(Resnet50KerasBenchmarkReal, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
947
948


949
class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
950
951
952
953
  """Trivial model with real data benchmark tests."""

  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
    flag_methods = [
954
955
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
956
957
958
    ]
    def_flags = {}
    def_flags['skip_eval'] = True
959
    def_flags['report_accuracy_metrics'] = False
960
961
962
963
964
965
966
    def_flags['dtype'] = 'fp16'
    def_flags['enable_xla'] = True
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
    def_flags['train_steps'] = 600
    def_flags['log_steps'] = 100
    def_flags['distribution_strategy'] = 'default'

967
    super(TrivialKerasBenchmarkReal, self).__init__(
968
969
970
971
972
973
974
975
976
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=def_flags)

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = keras_imagenet_main.run(FLAGS)
    wall_time_sec = time.time() - start_time_sec

977
    super(TrivialKerasBenchmarkReal, self)._report_benchmark(
978
979
980
981
982
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

983
984
985
986
987
988
989
990
991
992
993
  def benchmark_8_gpu_warmup(self):
    """Dummy test that runs over an epoch to warmup the machine."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_warmup')
    FLAGS.batch_size = 256
    FLAGS.train_steps = 700
    self._run_and_report_benchmark()

994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
  def benchmark_1_gpu(self):
    """Test trivial Keras model (input pipeline) with 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_graph_1_gpu(self):
    """Test trivial Keras model (input pipeline) with 1 GPU."""
    self._setup()

1008
    FLAGS.num_gpus = 1
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_8_gpu(self):
    """Test trivial Keras model (input pipeline) with 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
    FLAGS.batch_size = 256 * 8
    self._run_and_report_benchmark()

  def benchmark_8_gpu_tweaked(self):
    """Test trivial Keras model (input pipeline) with manual config tuning and
       8 GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
    FLAGS.data_delay_prefetch = True
    self._run_and_report_benchmark()

  def benchmark_8_gpu_slack(self):
    """Test trivial Keras model (input pipeline) with tf.data's
       experimental_slack and 8 GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_slack')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_data_experimental_slack = True
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu(self):
    """Test trivial Keras model (input pipeline) in legacy graph mode with 8
       GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
    FLAGS.batch_size = 256 * 8
    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu_tweaked(self):
    """Test trivial Keras model (input pipeline) in legacy graph mode with
       manual config tuning and 8 GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

  def fill_report_object(self, stats):
1077
    super(TrivialKerasBenchmarkReal, self).fill_report_object(
1078
1079
1080
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
1081
1082
1083
1084


if __name__ == '__main__':
  tf.test.main()