keras_imagenet_benchmark.py 25.8 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import print_function

import os
19
import time
20
21

from absl import flags
22
import tensorflow as tf # pylint: disable=g-bad-import-order
23
24

from official.resnet import imagenet_main
Toby Boyd's avatar
Toby Boyd committed
25
from official.resnet.keras import keras_benchmark
26
27
28
from official.resnet.keras import keras_common
from official.resnet.keras import keras_imagenet_main

Toby Boyd's avatar
Toby Boyd committed
29
30
MIN_TOP_1_ACCURACY = 0.76
MAX_TOP_1_ACCURACY = 0.77
31

Toby Boyd's avatar
Toby Boyd committed
32
FLAGS = flags.FLAGS
33
34


Toby Boyd's avatar
Toby Boyd committed
35
36
class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Benchmark accuracy tests for ResNet50 in Keras."""
37

38
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
39
40
41
42
43
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
44
45
46
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
47
48
    """

49
    flag_methods = [
50
51
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
52
    ]
Toby Boyd's avatar
Toby Boyd committed
53

54
    self.data_dir = os.path.join(root_data_dir, 'imagenet')
55
56
    super(Resnet50KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
57

Toby Boyd's avatar
Toby Boyd committed
58
  def benchmark_graph_8_gpu(self):
59
60
    """Test Keras model with Keras fit/dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
61
    FLAGS.num_gpus = 8
62
    FLAGS.data_dir = self.data_dir
63
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
64
    FLAGS.train_epochs = 90
65
    FLAGS.epochs_between_evals = 10
66
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
67
    FLAGS.dtype = 'fp32'
68
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
69
70

  def benchmark_8_gpu(self):
71
72
    """Test Keras model with eager, dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
73
    FLAGS.num_gpus = 8
74
    FLAGS.data_dir = self.data_dir
75
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
76
    FLAGS.train_epochs = 90
77
    FLAGS.epochs_between_evals = 10
78
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
79
80
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
81
82
    # Add some thread tunings to improve performance.
    FLAGS.datasets_num_private_threads = 14
83
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
84

Reed's avatar
Reed committed
85
86
87
88
89
90
91
  def benchmark_8_gpu_fp16(self):
    """Test Keras model with eager, dist_strat, 8 GPUs, and fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
92
    FLAGS.epochs_between_evals = 10
Reed's avatar
Reed committed
93
94
95
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
96
97
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
Reed's avatar
Reed committed
98
99
100
101
102
103
104
105
106
    self._run_and_report_benchmark()

  def benchmark_xla_8_gpu_fp16(self):
    """Test Keras model with XLA, eager, dist_strat, 8 GPUs and fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
107
    FLAGS.epochs_between_evals = 10
Reed's avatar
Reed committed
108
109
110
111
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
112
113
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
Reed's avatar
Reed committed
114
115
    self._run_and_report_benchmark()

116
117
118
119
120
121
122
  def benchmark_xla_8_gpu_fp16_dynamic(self):
    """Test Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
123
    FLAGS.epochs_between_evals = 10
124
125
126
127
128
129
130
131
132
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.loss_scale = 'dynamic'
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

133
134
135
136
137
138
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = keras_imagenet_main.run(flags.FLAGS)
    wall_time_sec = time.time() - start_time_sec

    super(Resnet50KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
139
        stats,
140
        wall_time_sec,
Toby Boyd's avatar
Toby Boyd committed
141
142
        top_1_min=MIN_TOP_1_ACCURACY,
        top_1_max=MAX_TOP_1_ACCURACY,
143
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
144
        log_steps=100)
145
146
147
148

  def _get_model_dir(self, folder_name):
    return os.path.join(self.output_dir, folder_name)

Toby Boyd's avatar
Toby Boyd committed
149
150
151
152
153

class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Resnet50 benchmarks."""

  def __init__(self, output_dir=None, default_flags=None):
154
    flag_methods = [
155
156
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
157
    ]
Toby Boyd's avatar
Toby Boyd committed
158
159
160
161
162
163

    super(Resnet50KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

164
165
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
Toby Boyd's avatar
Toby Boyd committed
166
    stats = keras_imagenet_main.run(FLAGS)
167
168
169
170
171
172
173
    wall_time_sec = time.time() - start_time_sec

    super(Resnet50KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
Toby Boyd's avatar
Toby Boyd committed
174
175

  def benchmark_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
176
    """Test Keras model with 1 GPU, no distribution strategy."""
Toby Boyd's avatar
Toby Boyd committed
177
178
179
180
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
181
    FLAGS.distribution_strategy = 'off'
182
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
183
    FLAGS.batch_size = 128
184
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
185
186

  def benchmark_graph_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
187
    """Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
Toby Boyd's avatar
Toby Boyd committed
188
189
190
191
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
192
    FLAGS.distribution_strategy = 'off'
193
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
194
195
    FLAGS.batch_size = 96  # BatchNorm is less efficient in legacy graph mode
                           # due to its reliance on v1 cond.
196
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
197
198

  def benchmark_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
199
    """Test Keras model with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
200
201
202
203
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
204
    FLAGS.distribution_strategy = 'default'
205
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
206
    FLAGS.batch_size = 128
207
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
208

209
210
211
212
213
214
215
216
217
218
219
220
  def benchmark_1_gpu_no_cloning(self):
    """Test Keras model with 1 GPU and no-cloning."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_cloning')
    FLAGS.batch_size = 128
    FLAGS.clone_model_in_keras_dist_strat = False
    self._run_and_report_benchmark()

Haoyu Zhang's avatar
Haoyu Zhang committed
221
222
223
224
225
226
227
228
229
230
231
232
  def benchmark_xla_1_gpu(self):
    """Test Keras model with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
233
  def benchmark_1_gpu_fp16(self):
234
    """Test Keras model with 1 GPU and fp16."""
Reed's avatar
Reed committed
235
236
237
238
239
240
241
242
243
244
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

245
246
247
248
249
250
251
252
253
254
255
256
257
  def benchmark_1_gpu_fp16_dynamic(self):
    """Test Keras model with 1 GPU, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.loss_scale = 'dynamic'
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
258
259
260
261
262
263
264
265
266
267
268
269
270
  def benchmark_xla_1_gpu_fp16(self):
    """Test Keras model with XLA, 1 GPU and fp16."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

271
272
273
274
275
276
277
278
279
280
281
  def benchmark_xla_1_gpu_fp16_tweaked(self):
    """Test Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
282
    FLAGS.use_tensor_lr = True
283
284
285
286
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_prefetch_with_slack = True
    self._run_and_report_benchmark()

287
288
289
290
291
292
293
294
295
296
297
298
299
300
  def benchmark_xla_1_gpu_fp16_dynamic(self):
    """Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.loss_scale = 'dynamic'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
301
  def benchmark_graph_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
302
    """Test Keras model in legacy graph mode with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
303
304
305
306
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
307
    FLAGS.distribution_strategy = 'default'
308
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
309
    FLAGS.batch_size = 128
310
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
311

Haoyu Zhang's avatar
Haoyu Zhang committed
312
313
314
315
316
317
318
319
320
321
322
323
  def benchmark_graph_xla_1_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

324
325
326
327
328
  def benchmark_graph_1_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 1 GPU and fp16."""
    self._setup()

    FLAGS.num_gpus = 1
329
    FLAGS.dtype = 'fp16'
330
331
332
333
334
335
336
337
338
339
340
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_fp16')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_graph_xla_1_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16 and XLA."""
    self._setup()

    FLAGS.num_gpus = 1
341
    FLAGS.dtype = 'fp16'
342
343
344
345
346
347
348
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu_fp16')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

349
350
351
352
353
354
355
356
357
358
359
360
361
362
  def benchmark_graph_xla_1_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16, XLA, and manual
       config tuning.
    """
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_1_gpu_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
363
    FLAGS.use_tensor_lr = True
364
365
366
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
367
  def benchmark_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
368
    """Test Keras model with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
369
370
371
372
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
373
    FLAGS.distribution_strategy = 'default'
374
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
375
    FLAGS.batch_size = 128 * 8  # 8 GPUs
376
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
377

378
379
380
381
382
383
384
385
386
387
388
389
  def benchmark_8_gpu_no_cloning(self):
    """Test Keras model with 8 GPUs and no-cloning."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_no_cloning')
    FLAGS.clone_model_in_keras_dist_strat = False
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    self._run_and_report_benchmark()

390
  def benchmark_8_gpu_tweaked(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
391
    """Test Keras model with manual config tuning and 8 GPUs."""
392
393
394
395
396
397
398
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 128 * 8  # 8 GPUs
399
    FLAGS.use_tensor_lr = True
400
    FLAGS.datasets_num_private_threads = 14
401
    FLAGS.data_prefetch_with_slack = True
402
403
    self._run_and_report_benchmark()

Haoyu Zhang's avatar
Haoyu Zhang committed
404
405
406
407
408
409
410
411
412
  def benchmark_xla_8_gpu(self):
    """Test Keras model with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu')
413
    FLAGS.batch_size = 128 * 8  # 8 GPUs
Haoyu Zhang's avatar
Haoyu Zhang committed
414
415
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
416
  def benchmark_8_gpu_fp16(self):
417
    """Test Keras model with 8 GPUs and fp16."""
Reed's avatar
Reed committed
418
419
420
    self._setup()

    FLAGS.num_gpus = 8
421
    FLAGS.dtype = 'fp16'
Reed's avatar
Reed committed
422
423
424
425
426
427
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

428
429
430
431
432
433
434
435
  def benchmark_8_gpu_fp16_tweaked(self):
    """Test Keras model with 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
436
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_tweaked')
437
    FLAGS.batch_size = 256 * 8  # 8 GPUs
438
    FLAGS.use_tensor_lr = True
439
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
440
    FLAGS.data_prefetch_with_slack = True
441
442
    self._run_and_report_benchmark()

443
444
445
446
447
448
449
450
451
452
453
454
  def benchmark_8_gpu_fp16_dynamic_tweaked(self):
    """Test Keras model with 8 GPUs, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
455
    FLAGS.use_tensor_lr = True
456
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
457
    FLAGS.data_prefetch_with_slack = True
458
459
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
460
  def benchmark_xla_8_gpu_fp16(self):
461
    """Test Keras model with XLA, 8 GPUs and fp16."""
Reed's avatar
Reed committed
462
463
464
    self._setup()

    FLAGS.num_gpus = 8
465
    FLAGS.dtype = 'fp16'
Reed's avatar
Reed committed
466
467
468
469
470
471
472
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

473
474
475
476
477
478
479
480
481
482
483
  def benchmark_xla_8_gpu_fp16_tweaked(self):
    """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
484
    FLAGS.use_tensor_lr = True
485
486
    # FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.data_prefetch_with_slack = True
487
488
    self._run_and_report_benchmark()

489
490
491
492
493
494
495
496
497
498
499
500
501
  def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self):
    """Test Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
502
    FLAGS.use_tensor_lr = True
503
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
504
    FLAGS.data_prefetch_with_slack = True
505
506
    self._run_and_report_benchmark()

507
508
509
510
511
512
513
514
515
516
517
518
  def benchmark_xla_8_gpu_fp16_tensorboard_tweaked(self):
    """Test to track Tensorboard performance overhead."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_tensorboard_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
519
    FLAGS.use_tensor_lr = True
520
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
521
    FLAGS.data_prefetch_with_slack = True
522
523
524
    FLAGS.enable_tensorboard = True
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
525
  def benchmark_graph_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
526
    """Test Keras model in legacy graph mode with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
527
528
529
530
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
531
    FLAGS.distribution_strategy = 'default'
532
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
533
    FLAGS.batch_size = 128 * 8  # 8 GPUs
534
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
535

Haoyu Zhang's avatar
Haoyu Zhang committed
536
537
538
539
540
541
542
543
544
  def benchmark_graph_xla_8_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu')
545
    FLAGS.batch_size = 128 * 8  # 8 GPUs
Haoyu Zhang's avatar
Haoyu Zhang committed
546
547
    self._run_and_report_benchmark()

548
549
550
551
552
553
554
555
556
557
558
559
  def benchmark_graph_8_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

560
561
562
563
564
565
566
567
568
569
570
571
572
  def benchmark_graph_xla_8_gpu_fp16(self):
    """Test Keras model in legacy graph mode with XLA, 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

573
574
575
576
577
578
579
580
581
582
583
584
  def benchmark_graph_8_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with manual config tuning, 8 GPUs
       and fp16.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
585
    FLAGS.use_tensor_lr = True
586
587
588
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

589
590
591
592
593
594
595
596
597
598
599
600
601
602
  def benchmark_graph_xla_8_gpu_fp16_tweaked(self):
    """Test Keras model in legacy graph mode with manual config tuning, XLA,
       8 GPUs and fp16.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
603
    FLAGS.use_tensor_lr = True
604
605
606
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

607
608
609
610
611
612
613
614
615
616
617
618
  def benchmark_graph_8_gpu_fp16_dynamic_tweaked(self):
    """Test graph Keras with config tuning, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
619
    FLAGS.use_tensor_lr = True
620
621
622
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

623
624
625
626
627
628
629
630
631
632
633
634
  def benchmark_graph_xla_8_gpu_fp16_dynamic_tweaked(self):
    """Test graph Keras with config tuning, XLA, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
635
    FLAGS.use_tensor_lr = True
636
637
638
639
    FLAGS.loss_scale = 'dynamic'
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
640
641
642
643
644
645
  def fill_report_object(self, stats):
    super(Resnet50KerasBenchmarkBase, self).fill_report_object(
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

Toby Boyd's avatar
Toby Boyd committed
646
647
648
649

class Resnet50KerasBenchmarkSynth(Resnet50KerasBenchmarkBase):
  """Resnet50 synthetic benchmark tests."""

650
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
651
652
    def_flags = {}
    def_flags['skip_eval'] = True
653
    def_flags['report_accuracy_metrics'] = False
Toby Boyd's avatar
Toby Boyd committed
654
655
656
657
    def_flags['use_synthetic_data'] = True
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

658
659
    super(Resnet50KerasBenchmarkSynth, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
Toby Boyd's avatar
Toby Boyd committed
660
661
662
663
664


class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
  """Resnet50 real data benchmark tests."""

665
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
666
667
    def_flags = {}
    def_flags['skip_eval'] = True
668
    def_flags['report_accuracy_metrics'] = False
669
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
Toby Boyd's avatar
Toby Boyd committed
670
671
672
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

673
674
    super(Resnet50KerasBenchmarkReal, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
675
676


677
class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
678
679
680
681
  """Trivial model with real data benchmark tests."""

  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
    flag_methods = [
682
683
        keras_common.define_keras_flags,
        lambda: imagenet_main.define_imagenet_flags(dynamic_loss_scale=True)
684
685
686
    ]
    def_flags = {}
    def_flags['skip_eval'] = True
687
    def_flags['report_accuracy_metrics'] = False
688
689
690
691
692
693
694
    def_flags['dtype'] = 'fp16'
    def_flags['enable_xla'] = True
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
    def_flags['train_steps'] = 600
    def_flags['log_steps'] = 100
    def_flags['distribution_strategy'] = 'default'

695
    super(TrivialKerasBenchmarkReal, self).__init__(
696
697
698
699
700
701
702
703
704
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=def_flags)

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = keras_imagenet_main.run(FLAGS)
    wall_time_sec = time.time() - start_time_sec

705
    super(TrivialKerasBenchmarkReal, self)._report_benchmark(
706
707
708
709
710
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

711
712
713
714
715
716
717
718
719
720
721
  def benchmark_8_gpu_warmup(self):
    """Dummy test that runs over an epoch to warmup the machine."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_warmup')
    FLAGS.batch_size = 256
    FLAGS.train_steps = 700
    self._run_and_report_benchmark()

722
723
724
725
726
727
728
729
730
731
732
733
734
735
  def benchmark_1_gpu(self):
    """Test trivial Keras model (input pipeline) with 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_graph_1_gpu(self):
    """Test trivial Keras model (input pipeline) with 1 GPU."""
    self._setup()

736
    FLAGS.num_gpus = 1
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_8_gpu(self):
    """Test trivial Keras model (input pipeline) with 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
    FLAGS.batch_size = 256 * 8
    self._run_and_report_benchmark()

  def benchmark_8_gpu_tweaked(self):
    """Test trivial Keras model (input pipeline) with manual config tuning and
       8 GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
763
    FLAGS.data_prefetch_with_slack = True
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu(self):
    """Test trivial Keras model (input pipeline) in legacy graph mode with 8
       GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
    FLAGS.batch_size = 256 * 8
    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu_tweaked(self):
    """Test trivial Keras model (input pipeline) in legacy graph mode with
       manual config tuning and 8 GPUs.
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

  def fill_report_object(self, stats):
792
    super(TrivialKerasBenchmarkReal, self).fill_report_object(
793
794
795
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
796
797
798
799


if __name__ == '__main__':
  tf.test.main()