keras_imagenet_benchmark.py 34.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import print_function

import os
19
import time
20
21

from absl import flags
22
import tensorflow as tf  # pylint: disable=g-bad-import-order
23

24
25
from official.benchmark import keras_benchmark
from official.vision.image_classification import resnet_imagenet_main
26

Toby Boyd's avatar
Toby Boyd committed
27
28
MIN_TOP_1_ACCURACY = 0.76
MAX_TOP_1_ACCURACY = 0.77
29

Toby Boyd's avatar
Toby Boyd committed
30
FLAGS = flags.FLAGS
31
32


Toby Boyd's avatar
Toby Boyd committed
33
34
class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Benchmark accuracy tests for ResNet50 in Keras."""
35

36
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
37
38
39
40
41
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
42
43
44
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
45
46
    """

47
    flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags]
Toby Boyd's avatar
Toby Boyd committed
48

49
    self.data_dir = os.path.join(root_data_dir, 'imagenet')
50
51
    super(Resnet50KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
52

Toby Boyd's avatar
Toby Boyd committed
53
  def benchmark_graph_8_gpu(self):
54
55
    """Test Keras model with Keras fit/dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
56
    FLAGS.num_gpus = 8
57
    FLAGS.data_dir = self.data_dir
58
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
59
    FLAGS.train_epochs = 90
60
    FLAGS.epochs_between_evals = 10
61
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
62
    FLAGS.dtype = 'fp32'
63
    FLAGS.use_tensor_lr = True
64
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
65
66

  def benchmark_8_gpu(self):
67
68
    """Test Keras model with eager, dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
69
    FLAGS.num_gpus = 8
70
    FLAGS.data_dir = self.data_dir
71
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
72
    FLAGS.train_epochs = 90
73
    FLAGS.epochs_between_evals = 10
74
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
75
76
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
77
78
    # Add some thread tunings to improve performance.
    FLAGS.datasets_num_private_threads = 14
79
    FLAGS.use_tensor_lr = True
80
    self._run_and_report_benchmark()
81

Reed's avatar
Reed committed
82
83
84
85
86
87
88
  def benchmark_8_gpu_fp16(self):
    """Test Keras model with eager, dist_strat, 8 GPUs, and fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
89
    FLAGS.epochs_between_evals = 10
Reed's avatar
Reed committed
90
91
92
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
93
94
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
95
    FLAGS.use_tensor_lr = True
Reed's avatar
Reed committed
96
97
98
99
100
101
102
103
104
    self._run_and_report_benchmark()

  def benchmark_xla_8_gpu_fp16(self):
    """Test Keras model with XLA, eager, dist_strat, 8 GPUs and fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
105
    FLAGS.epochs_between_evals = 10
Reed's avatar
Reed committed
106
107
108
109
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
110
111
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
112
    FLAGS.use_tensor_lr = True
Reed's avatar
Reed committed
113
114
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
  def benchmark_8_gpu_mlperf_like(self):
    """Test similar to the rules for MLPerf 0.5.

    Listed below are reasons this comparison is not to the MLSpec, but this is
    still a decent directional measurement:
      - Eval is every 4 epochs and again at the end. ~2 extra times.
      - Learning rate is not tuned to hit 75%, but we know the model is correct.
      - We measure total time and MLPerf 0.5 excluded some startup time.
      - Eval is not on the total set, need to set eval batch_size where
        8*batch_size/50K is even. 250 is a good number.
      - Not sure if we are doing any extra or too few steps due to epoch bleed.
    """
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 61
    FLAGS.epochs_between_evals = 4
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_mlperf_like')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
137
    self._run_and_report_benchmark(top_1_min=0.736)
Toby Boyd's avatar
Toby Boyd committed
138

139
140
141
142
143
144
145
  def benchmark_xla_8_gpu_fp16_dynamic(self):
    """Test Keras model with XLA, eager, dist_strat, 8 GPUs, dynamic fp16."""
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 256 * 8
    FLAGS.train_epochs = 90
146
    FLAGS.epochs_between_evals = 10
147
148
149
150
151
152
153
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.loss_scale = 'dynamic'
    # Thread tuning to improve performance.
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
154
    FLAGS.use_tensor_lr = True
155
    self._run_and_report_benchmark(top_1_min=0.736)
156

157
158
159
  def _run_and_report_benchmark(self,
                                top_1_min=MIN_TOP_1_ACCURACY,
                                top_1_max=MAX_TOP_1_ACCURACY):
160
    start_time_sec = time.time()
161
    stats = resnet_imagenet_main.run(flags.FLAGS)
162
163
164
    wall_time_sec = time.time() - start_time_sec

    super(Resnet50KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
165
        stats,
166
        wall_time_sec,
167
168
        top_1_min=top_1_min,
        top_1_max=top_1_max,
169
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
170
        log_steps=100)
171
172
173
174

  def _get_model_dir(self, folder_name):
    return os.path.join(self.output_dir, folder_name)

Toby Boyd's avatar
Toby Boyd committed
175
176
177
178
179

class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Resnet50 benchmarks."""

  def __init__(self, output_dir=None, default_flags=None):
180
    flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags]
Toby Boyd's avatar
Toby Boyd committed
181
182
183
184
185
186

    super(Resnet50KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

187
188
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
189
    stats = resnet_imagenet_main.run(FLAGS)
190
    wall_time_sec = time.time() - start_time_sec
191
192
193
    # Number of logged step time entries that are excluded in performance
    # report. We keep results from last 100 batches in this case.
    warmup = (FLAGS.train_steps - 100) // FLAGS.log_steps
194
195
196
197
198

    super(Resnet50KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
199
200
        log_steps=FLAGS.log_steps,
        warmup=warmup)
Toby Boyd's avatar
Toby Boyd committed
201
202

  def benchmark_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
203
    """Test Keras model with 1 GPU, no distribution strategy."""
Toby Boyd's avatar
Toby Boyd committed
204
205
206
207
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
208
    FLAGS.distribution_strategy = 'off'
209
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
210
    FLAGS.batch_size = 128
211
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
212

213
214
215
216
217
218
219
220
221
222
223
224
225
226
  def benchmark_1_gpu_no_dist_strat_tweaked(self):
    """Test with 1 GPU, no distribution strategy, and manual tuning."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.explicit_gpu_placement = True
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.set_learning_phase_to_train = False
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_tweaked')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

227
228
229
230
231
232
233
234
235
236
237
238
239
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
    """Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.batch_size = 64
    self._run_and_report_benchmark()

240
241
242
243
244
245
246
247
248
249
250
251
252
253
  def benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked(self):
    """Test Keras model with 1 GPU, no distribution strategy, run eagerly."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.explicit_gpu_placement = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly_tweaked')
    FLAGS.batch_size = 64
    self._run_and_report_benchmark()

254
255
256
257
258
259
260
261
262
263
264
265
266
267
  def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16(self):
    """Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
  def benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked(self):
    """Test with 1 GPU, no distribution strategy, fp16, run eagerly."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.explicit_gpu_placement = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
283
  def benchmark_graph_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
284
    """Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
Toby Boyd's avatar
Toby Boyd committed
285
286
287
288
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
289
    FLAGS.distribution_strategy = 'off'
290
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
291
292
    FLAGS.batch_size = 96  # BatchNorm is less efficient in legacy graph mode
                           # due to its reliance on v1 cond.
293
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
294
295

  def benchmark_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
296
    """Test Keras model with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
297
298
299
300
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
301
    FLAGS.distribution_strategy = 'default'
302
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
303
    FLAGS.batch_size = 128
304
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
305

306

Haoyu Zhang's avatar
Haoyu Zhang committed
307
308
309
310
311
312
313
314
315
316
317
318
  def benchmark_xla_1_gpu(self):
    """Test Keras model with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
319
  def benchmark_1_gpu_fp16(self):
320
    """Test Keras model with 1 GPU and fp16."""
Reed's avatar
Reed committed
321
322
323
324
325
326
327
328
329
330
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

331
332
333
334
335
336
337
338
339
340
341
342
343
  def benchmark_1_gpu_fp16_dynamic(self):
    """Test Keras model with 1 GPU, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.loss_scale = 'dynamic'
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
344
345
346
347
348
349
350
351
352
353
354
355
356
  def benchmark_xla_1_gpu_fp16(self):
    """Test Keras model with XLA, 1 GPU and fp16."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

357
358
359
360
361
362
363
364
365
366
367
  def benchmark_xla_1_gpu_fp16_tweaked(self):
    """Test Keras model with XLA, 1 GPU, fp16, and manual config tuning."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
368
    FLAGS.use_tensor_lr = True
369
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
370
371
    self._run_and_report_benchmark()

372
373
374
375
376
377
378
379
380
381
382
383
384
385
  def benchmark_xla_1_gpu_fp16_dynamic(self):
    """Test Keras model with XLA, 1 GPU, fp16, and dynamic loss scaling."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu_fp16_dynamic')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
    FLAGS.loss_scale = 'dynamic'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
386
  def benchmark_graph_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
387
    """Test Keras model in legacy graph mode with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
388
389
390
391
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
392
    FLAGS.distribution_strategy = 'default'
393
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
394
    FLAGS.batch_size = 128
395
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
396

Haoyu Zhang's avatar
Haoyu Zhang committed
397
398
399
400
401
402
403
404
405
406
407
408
  def benchmark_graph_xla_1_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

409
410
411
412
413
  def benchmark_graph_1_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 1 GPU and fp16."""
    self._setup()

    FLAGS.num_gpus = 1
414
    FLAGS.dtype = 'fp16'
415
416
417
418
419
420
421
422
423
424
425
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_fp16')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_graph_xla_1_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 1 GPU, fp16 and XLA."""
    self._setup()

    FLAGS.num_gpus = 1
426
    FLAGS.dtype = 'fp16'
427
428
429
430
431
432
433
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu_fp16')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

434
  def benchmark_graph_xla_1_gpu_fp16_tweaked(self):
435
    """Test Keras model in legacy graph with 1 GPU, fp16, XLA, and tuning."""
436
437
438
439
440
441
442
443
444
445
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_1_gpu_fp16_tweaked')
    FLAGS.dtype = 'fp16'
    FLAGS.batch_size = 256
446
    FLAGS.use_tensor_lr = True
447
448
449
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
450
  def benchmark_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
451
    """Test Keras model with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
452
453
454
455
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
456
    FLAGS.distribution_strategy = 'default'
457
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
458
    FLAGS.batch_size = 128 * 8  # 8 GPUs
459
    self._run_and_report_benchmark()
460

461
  def benchmark_8_gpu_tweaked(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
462
    """Test Keras model with manual config tuning and 8 GPUs."""
463
464
465
466
467
468
469
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 128 * 8  # 8 GPUs
470
    FLAGS.use_tensor_lr = True
471
    FLAGS.datasets_num_private_threads = 14
472
473
    self._run_and_report_benchmark()

Haoyu Zhang's avatar
Haoyu Zhang committed
474
475
476
477
478
479
480
481
482
  def benchmark_xla_8_gpu(self):
    """Test Keras model with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu')
483
    FLAGS.batch_size = 128 * 8  # 8 GPUs
Haoyu Zhang's avatar
Haoyu Zhang committed
484
485
    self._run_and_report_benchmark()

486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
  def benchmark_xla_8_gpu_tweaked(self):
    """Test Keras model with manual config tuning, 8 GPUs, and XLA."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_tweaked')
    FLAGS.batch_size = 128 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.datasets_num_private_threads = 24
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
501
  def benchmark_8_gpu_fp16(self):
502
    """Test Keras model with 8 GPUs and fp16."""
Reed's avatar
Reed committed
503
504
505
    self._setup()

    FLAGS.num_gpus = 8
506
    FLAGS.dtype = 'fp16'
Reed's avatar
Reed committed
507
508
509
510
511
512
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

513
  def benchmark_8_gpu_fp16_tweaked(self):
514
    """Test Keras model with 8 GPUs, fp16, and manual config tuning."""
515
516
517
518
519
520
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
521
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_fp16_tweaked')
522
    FLAGS.batch_size = 256 * 8  # 8 GPUs
523
    FLAGS.use_tensor_lr = True
524
525
526
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

527
  def benchmark_8_gpu_fp16_dynamic_tweaked(self):
Toby Boyd's avatar
Toby Boyd committed
528
    """Test Keras model with 8 GPUs, fp16, dynamic loss scaling, and tuned."""
529
530
531
532
533
534
535
536
537
538
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
539
    FLAGS.use_tensor_lr = True
540
541
542
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

Reed's avatar
Reed committed
543
  def benchmark_xla_8_gpu_fp16(self):
544
    """Test Keras model with XLA, 8 GPUs and fp16."""
Reed's avatar
Reed committed
545
546
547
    self._setup()

    FLAGS.num_gpus = 8
548
    FLAGS.dtype = 'fp16'
Reed's avatar
Reed committed
549
550
551
552
553
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
554
555
    self._run_and_report_benchmark()

556
557
558
559
560
561
562
563
564
565
566
  def benchmark_xla_8_gpu_fp16_tweaked(self):
    """Test Keras model with manual config tuning, XLA, 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
567
    FLAGS.use_tensor_lr = True
568
569
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.datasets_num_private_threads = 48
570
571
    self._run_and_report_benchmark()

572
  def benchmark_xla_8_gpu_fp16_tweaked_delay_measure(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
573
574
575
    """Test with manual config tuning, XLA, 8 GPUs and fp16.

    Delay performance measurement for stable performance on 96 vCPU platforms.
576
577
578
579
580
581
582
583
584
585
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_tweaked_delay_measure')
586
    FLAGS.batch_size = 256 * 8
587
588
589
590
591
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.train_steps = 310
    self._run_and_report_benchmark()

592
593
594
595
596
597
598
599
600
601
602
603
604
  def benchmark_xla_8_gpu_fp16_dynamic_tweaked(self):
    """Test Keras model with config tuning, XLA, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_xla_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
605
    FLAGS.use_tensor_lr = True
606
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
607
    FLAGS.datasets_num_private_threads = 48
608
609
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
610
  def benchmark_graph_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
611
    """Test Keras model in legacy graph mode with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
612
613
614
615
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
616
    FLAGS.distribution_strategy = 'default'
617
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
618
    FLAGS.batch_size = 128 * 8  # 8 GPUs
619
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
620

Haoyu Zhang's avatar
Haoyu Zhang committed
621
622
623
624
625
626
627
628
629
  def benchmark_graph_xla_8_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu')
630
    FLAGS.batch_size = 128 * 8  # 8 GPUs
Haoyu Zhang's avatar
Haoyu Zhang committed
631
632
    self._run_and_report_benchmark()

633
634
635
636
637
638
639
640
641
642
643
644
  def benchmark_graph_8_gpu_fp16(self):
    """Test Keras model in legacy graph mode with 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

645
646
647
648
649
650
651
652
653
654
655
656
657
  def benchmark_graph_xla_8_gpu_fp16(self):
    """Test Keras model in legacy graph mode with XLA, 8 GPUs and fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu_fp16')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    self._run_and_report_benchmark()

658
  def benchmark_graph_8_gpu_fp16_tweaked(self):
659
    """Test Keras model in legacy graph mode, tuning, 8 GPUs, and FP16."""
660
661
662
663
664
665
666
667
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
668
    FLAGS.use_tensor_lr = True
669
670
671
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

672
  def benchmark_graph_xla_8_gpu_fp16_tweaked(self):
673
    """Test Keras model in legacy graph tuning, XLA_FP16, 8 GPUs and fp16."""
674
675
676
677
678
679
680
681
682
683
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
684
    FLAGS.use_tensor_lr = True
685
686
687
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

688
  def benchmark_graph_xla_8_gpu_fp16_tweaked_delay_measure(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
689
690
691
    """Test in legacy graph mode with manual config tuning, XLA, 8 GPUs, fp16.

    Delay performance measurement for stable performance on 96 vCPU platforms.
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
    """
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_tweaked_delay_measure')
    FLAGS.batch_size = 256 * 8
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.train_steps = 310
    self._run_and_report_benchmark()

708
709
710
711
712
713
714
715
716
717
718
719
  def benchmark_graph_8_gpu_fp16_dynamic_tweaked(self):
    """Test graph Keras with config tuning, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
    FLAGS.loss_scale = 'dynamic'
720
    FLAGS.use_tensor_lr = True
721
722
723
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

724
725
726
727
728
729
730
731
732
733
734
735
  def benchmark_graph_xla_8_gpu_fp16_dynamic_tweaked(self):
    """Test graph Keras with config tuning, XLA, 8 GPUs and dynamic fp16."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_xla_8_gpu_fp16_dynamic_tweaked')
    FLAGS.batch_size = 256 * 8  # 8 GPUs
736
    FLAGS.use_tensor_lr = True
737
738
739
740
    FLAGS.loss_scale = 'dynamic'
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
741
742
743
744
745
746
  def fill_report_object(self, stats):
    super(Resnet50KerasBenchmarkBase, self).fill_report_object(
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

Toby Boyd's avatar
Toby Boyd committed
747
748
749
750

class Resnet50KerasBenchmarkSynth(Resnet50KerasBenchmarkBase):
  """Resnet50 synthetic benchmark tests."""

751
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
752
753
    def_flags = {}
    def_flags['skip_eval'] = True
754
    def_flags['report_accuracy_metrics'] = False
Toby Boyd's avatar
Toby Boyd committed
755
756
757
758
    def_flags['use_synthetic_data'] = True
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

759
760
    super(Resnet50KerasBenchmarkSynth, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
Toby Boyd's avatar
Toby Boyd committed
761
762
763
764
765


class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
  """Resnet50 real data benchmark tests."""

766
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
767
768
    def_flags = {}
    def_flags['skip_eval'] = True
769
    def_flags['report_accuracy_metrics'] = False
770
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
Toby Boyd's avatar
Toby Boyd committed
771
772
773
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

774
775
    super(Resnet50KerasBenchmarkReal, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
776
777


778
class TrivialKerasBenchmarkReal(keras_benchmark.KerasBenchmark):
779
780
781
  """Trivial model with real data benchmark tests."""

  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
782
    flag_methods = [resnet_imagenet_main.define_imagenet_keras_flags]
Toby Boyd's avatar
Toby Boyd committed
783

784
    def_flags = {}
785
    def_flags['use_trivial_model'] = True
786
    def_flags['skip_eval'] = True
787
    def_flags['report_accuracy_metrics'] = False
788
    def_flags['use_tensor_lr'] = True
789
790
791
792
793
794
    def_flags['dtype'] = 'fp16'
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
    def_flags['train_steps'] = 600
    def_flags['log_steps'] = 100
    def_flags['distribution_strategy'] = 'default'

795
    super(TrivialKerasBenchmarkReal, self).__init__(
796
797
798
799
800
801
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=def_flags)

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
802
    stats = resnet_imagenet_main.run(FLAGS)
803
804
    wall_time_sec = time.time() - start_time_sec

805
    super(TrivialKerasBenchmarkReal, self)._report_benchmark(
806
807
808
809
810
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

811
812
813
814
815
816
817
  def benchmark_8_gpu_warmup(self):
    """Dummy test that runs over an epoch to warmup the machine."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_warmup')
818
    FLAGS.batch_size = 256 * 8
819
820
821
    FLAGS.train_steps = 700
    self._run_and_report_benchmark()

822
823
824
825
826
827
  def benchmark_1_gpu(self):
    """Test trivial Keras model (input pipeline) with 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
828
    FLAGS.enable_xla = True
829
830
831
832
833
834
835
836
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_graph_1_gpu(self):
    """Test trivial Keras model (input pipeline) with 1 GPU."""
    self._setup()

837
    FLAGS.num_gpus = 1
838
    FLAGS.enable_eager = False
839
    FLAGS.enable_xla = True
840
841
842
843
844
845
846
847
848
849
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
    FLAGS.batch_size = 256
    self._run_and_report_benchmark()

  def benchmark_8_gpu(self):
    """Test trivial Keras model (input pipeline) with 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
850
    FLAGS.enable_xla = True
851
852
853
854
855
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
    FLAGS.batch_size = 256 * 8
    self._run_and_report_benchmark()

  def benchmark_8_gpu_tweaked(self):
856
    """Test trivial Keras model with tuning and 8 GPUs."""
857
858
859
860
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
861
    FLAGS.enable_xla = True
862
863
864
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
865
    FLAGS.datasets_num_private_threads = 48
866
867
868
    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu(self):
869
    """Test trivial Keras model in legacy graph mode with 8 GPUs."""
870
871
872
873
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
874
    FLAGS.enable_xla = True
875
876
877
878
879
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
    FLAGS.batch_size = 256 * 8
    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu_tweaked(self):
880
    """Test trivial Keras model in legacy graph mode with tuning and 8 GPUs."""
881
882
883
884
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
885
    FLAGS.enable_xla = True
886
887
888
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu_tweaked')
    FLAGS.batch_size = 256 * 8
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
889
    FLAGS.datasets_num_private_threads = 48
890
891
892
    self._run_and_report_benchmark()

  def fill_report_object(self, stats):
893
    super(TrivialKerasBenchmarkReal, self).fill_report_object(
894
895
896
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
897
898


899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
class Resnet50MultiWorkerKerasBenchmark(Resnet50KerasBenchmarkBase):
  """Resnet50 distributed benchmark tests with multiple workers."""

  def __init__(self, output_dir=None, default_flags=None):
    super(Resnet50MultiWorkerKerasBenchmark, self).__init__(
        output_dir=output_dir, default_flags=default_flags)

  def _benchmark_common(self, eager, num_workers, all_reduce_alg):
    """Common to all benchmarks in this class."""
    self._setup()

    num_gpus = 8
    FLAGS.num_gpus = num_gpus
    FLAGS.dtype = 'fp16'
    FLAGS.enable_eager = eager
    FLAGS.enable_xla = False
    FLAGS.distribution_strategy = 'multi_worker_mirrored'
    FLAGS.use_tensor_lr = True
    FLAGS.tf_gpu_thread_mode = 'gpu_private'
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_graph_8_gpu_{}_worker_fp16_{}_tweaked'.format(
            num_workers, all_reduce_alg))
    FLAGS.batch_size = 256 * num_gpus * num_workers
    FLAGS.all_reduce_alg = all_reduce_alg

    self._run_and_report_benchmark()

  def benchmark_graph_8_gpu_1_worker_fp16_ring_tweaked(self):
    """Legacy graph, 8 GPUs per worker, 1 worker, fp16, ring all-reduce."""
    self._benchmark_common(eager=False, num_workers=1, all_reduce_alg='ring')

  def benchmark_graph_8_gpu_1_worker_fp16_nccl_tweaked(self):
    """Legacy graph, 8 GPUs per worker, 1 worker, fp16, nccl all-reduce."""
    self._benchmark_common(eager=False, num_workers=1, all_reduce_alg='nccl')

  def benchmark_graph_8_gpu_2_workers_fp16_ring_tweaked(self):
    """Legacy graph, 8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
    self._benchmark_common(eager=False, num_workers=2, all_reduce_alg='ring')

  def benchmark_graph_8_gpu_2_workers_fp16_nccl_tweaked(self):
    """Legacy graph, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
    self._benchmark_common(eager=False, num_workers=2, all_reduce_alg='nccl')

  def benchmark_graph_8_gpu_8_workers_fp16_ring_tweaked(self):
    """Legacy graph, 8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
    self._benchmark_common(eager=False, num_workers=8, all_reduce_alg='ring')

  def benchmark_graph_8_gpu_8_workers_fp16_nccl_tweaked(self):
    """Legacy graph, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
    self._benchmark_common(eager=False, num_workers=8, all_reduce_alg='nccl')

  def benchmark_eager_8_gpu_1_worker_fp16_ring_tweaked(self):
    """Eager, 8 GPUs per worker, 1 worker, fp16, ring all-reduce."""
    self._benchmark_common(eager=True, num_workers=1, all_reduce_alg='ring')

  def benchmark_eager_8_gpu_1_worker_fp16_nccl_tweaked(self):
    """Eager, 8 GPUs per worker, 1 worker, fp16, nccl all-reduce."""
    self._benchmark_common(eager=True, num_workers=1, all_reduce_alg='nccl')

  def benchmark_eager_8_gpu_2_workers_fp16_ring_tweaked(self):
    """Eager, 8 GPUs per worker, 2 workers, fp16, ring all-reduce."""
    self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='ring')

  def benchmark_eager_8_gpu_2_workers_fp16_nccl_tweaked(self):
    """Eager, 8 GPUs per worker, 2 workers, fp16, nccl all-reduce."""
    self._benchmark_common(eager=True, num_workers=2, all_reduce_alg='nccl')

  def benchmark_eager_8_gpu_8_workers_fp16_ring_tweaked(self):
    """Eager, 8 GPUs per worker, 8 workers, fp16, ring all-reduce."""
    self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='ring')

  def benchmark_eager_8_gpu_8_workers_fp16_nccl_tweaked(self):
    """Eager, 8 GPUs per worker, 8 workers, fp16, nccl all-reduce."""
    self._benchmark_common(eager=True, num_workers=8, all_reduce_alg='nccl')


Ayush Dubey's avatar
Ayush Dubey committed
975
class Resnet50MultiWorkerKerasBenchmarkSynth(Resnet50MultiWorkerKerasBenchmark):
976
  """Resnet50 multi-worker synthetic data benchmark tests."""
977
978
979
980
981
982
983
984
985
986
987
988
989

  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
    def_flags = {}
    def_flags['skip_eval'] = True
    def_flags['report_accuracy_metrics'] = False
    def_flags['use_synthetic_data'] = True
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

    super(Resnet50MultiWorkerKerasBenchmarkSynth, self).__init__(
        output_dir=output_dir, default_flags=def_flags)


990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
class Resnet50MultiWorkerKerasBenchmarkReal(Resnet50MultiWorkerKerasBenchmark):
  """Resnet50 multi-worker real data benchmark tests."""

  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
    def_flags = {}
    def_flags['skip_eval'] = True
    def_flags['report_accuracy_metrics'] = False
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

    super(Resnet50MultiWorkerKerasBenchmarkReal, self).__init__(
        output_dir=output_dir, default_flags=def_flags)


1005
1006
if __name__ == '__main__':
  tf.test.main()