keras_cifar_benchmark.py 15.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
Toby Boyd's avatar
Toby Boyd committed
15
"""Executes Keras benchmarks and accuracy tests."""
Shining Sun's avatar
Shining Sun committed
16
17
from __future__ import absolute_import
from __future__ import division
Toby Boyd's avatar
Toby Boyd committed
18
19
from __future__ import print_function

20
import os
21
import time
Toby Boyd's avatar
Toby Boyd committed
22
from absl import flags
23
import tensorflow as tf  # pylint: disable=g-bad-import-order
Toby Boyd's avatar
Toby Boyd committed
24

25
from official.benchmark import keras_benchmark
26
from official.utils.testing import benchmark_wrappers
27
from official.vision.image_classification import resnet_cifar_main
28

29
MIN_TOP_1_ACCURACY = 0.929
30
MAX_TOP_1_ACCURACY = 0.938
Toby Boyd's avatar
Toby Boyd committed
31

Toby Boyd's avatar
Toby Boyd committed
32
FLAGS = flags.FLAGS
33
CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
Toby Boyd's avatar
Toby Boyd committed
34

35

Toby Boyd's avatar
Toby Boyd committed
36
37
class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Accuracy tests for ResNet56 Keras CIFAR-10."""
38

39
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
40
41
42
43
44
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
45
46
47
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
48
49
    """

50
    self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
51
    flag_methods = [resnet_cifar_main.define_cifar_flags]
Toby Boyd's avatar
Toby Boyd committed
52

53
54
    super(Resnet56KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
Toby Boyd's avatar
Toby Boyd committed
55

Toby Boyd's avatar
Toby Boyd committed
56
  def benchmark_graph_1_gpu(self):
57
    """Test keras based model with Keras fit and distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
58
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
59
    FLAGS.num_gpus = 1
60
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
61
62
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
63
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
64
    FLAGS.dtype = 'fp32'
65
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
66
67

  def benchmark_1_gpu(self):
68
69
    """Test keras based model with eager and distribution strategies."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
70
    FLAGS.num_gpus = 1
71
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
72
73
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
74
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
75
76
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
77
    self._run_and_report_benchmark()
78

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
  def benchmark_cpu(self):
    """Test keras based model on CPU."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat(self):
    """Test keras based model on CPU without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test keras based model on CPU w/forced eager and no dist_strat."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_cpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

122
123
124
125
  def benchmark_1_gpu_no_dist_strat(self):
    """Test keras based model with eager and no dist strat."""
    self._setup()
    FLAGS.num_gpus = 1
126
    FLAGS.explicit_gpu_placement = True
127
128
129
130
131
132
133
134
135
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

136
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
137
    """Test keras based model w/forced eager and no dist_strat."""
138
139
140
141
142
143
144
145
146
147
148
149
150
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

151
152
  def benchmark_graph_1_gpu_no_dist_strat(self):
    """Test keras based model with Keras fit but not distribution strategies."""
153
    self._setup()
154
155
    FLAGS.distribution_strategy = 'off'
    FLAGS.num_gpus = 1
156
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
157
158
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
159
160
161
162
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    self._run_and_report_benchmark()

163
164
  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
    """No dist strat forced v1 execution path."""
165
166
167
168
169
170
171
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.num_gpus = 1
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
172
        'benchmark_1_gpu_no_dist_strat_force_v1_path')
Toby Boyd's avatar
Toby Boyd committed
173
174
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
175
    FLAGS.force_v2_in_keras_compile = False
176
177
178
179
    self._run_and_report_benchmark()

  def benchmark_2_gpu(self):
    """Test keras based model with eager and distribution strategies."""
180
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
181
    FLAGS.num_gpus = 2
182
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
183
184
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
185
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
186
    FLAGS.dtype = 'fp32'
187
    FLAGS.enable_eager = True
188
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
189

190
191
  def benchmark_graph_2_gpu(self):
    """Test keras based model with Keras fit and distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
192
    self._setup()
193
    FLAGS.num_gpus = 2
194
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
195
196
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
197
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
198
    FLAGS.dtype = 'fp32'
199
200
    self._run_and_report_benchmark()

201
  @benchmark_wrappers.enable_runtime_flags
202
203
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
204
    stats = resnet_cifar_main.run(FLAGS)
205
    wall_time_sec = time.time() - start_time_sec
Toby Boyd's avatar
Toby Boyd committed
206

207
    super(Resnet56KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
208
        stats,
209
        wall_time_sec,
Toby Boyd's avatar
Toby Boyd committed
210
211
        top_1_min=MIN_TOP_1_ACCURACY,
        top_1_max=MAX_TOP_1_ACCURACY,
212
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
213
214
215
216
217
218
219
        log_steps=100)


class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Short performance tests for ResNet56 via Keras and CIFAR-10."""

  def __init__(self, output_dir=None, default_flags=None):
220
    flag_methods = [resnet_cifar_main.define_cifar_flags]
Toby Boyd's avatar
Toby Boyd committed
221
222
223
224
225
226

    super(Resnet56KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

227
  @benchmark_wrappers.enable_runtime_flags
228
229
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
230
    stats = resnet_cifar_main.run(FLAGS)
231
232
233
234
235
236
237
    wall_time_sec = time.time() - start_time_sec

    super(Resnet56KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
Toby Boyd's avatar
Toby Boyd committed
238

239
240
  def benchmark_1_gpu(self):
    """Test 1 gpu."""
241
242
243
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
244
245
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
246
247
248
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

249
250
251
252
253
  def benchmark_1_gpu_xla(self):
    """Test 1 gpu with xla enabled."""
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
254
    FLAGS.run_eagerly = False
255
256
257
258
259
260
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

261
262
  def benchmark_1_gpu_force_v1_path(self):
    """Test 1 gpu using forced v1 execution path."""
263
264
265
266
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
267
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
268
    FLAGS.batch_size = 128
269
    FLAGS.force_v2_in_keras_compile = False
270
271
    self._run_and_report_benchmark()

272
273
  def benchmark_graph_1_gpu(self):
    """Test 1 gpu graph."""
Toby Boyd's avatar
Toby Boyd committed
274
275
276
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
277
    FLAGS.run_eagerly = False
278
279
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
280
    FLAGS.batch_size = 128
281
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
282

283
284
  def benchmark_1_gpu_no_dist_strat(self):
    """Test 1 gpu without distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
285
286
287
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
288
289
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
290
    FLAGS.batch_size = 128
291
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
292

293
294
  def benchmark_graph_1_gpu_no_dist_strat(self):
    """Test 1 gpu graph mode without distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
295
296
297
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
298
299
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
300
    FLAGS.batch_size = 128
301
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
302

303
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
304
    """Test 1 gpu without distribution strategy and forced eager."""
305
306
307
308
309
310
311
312
313
314
315
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

316
317
  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
    """No dist strat but forced v1 execution path."""
318
319
320
321
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
322
        'benchmark_1_gpu_no_dist_strat_force_v1_path')
323
324
325
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
326
    FLAGS.force_v2_in_keras_compile = False
327
328
    self._run_and_report_benchmark()

329
330
  def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
    """Forced v1 execution path and forced eager."""
331
332
333
334
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
335
        'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
336
337
338
339
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
340
    FLAGS.force_v2_in_keras_compile = False
341
342
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
343
  def benchmark_2_gpu(self):
344
    """Test 2 gpu."""
Toby Boyd's avatar
Toby Boyd committed
345
346
347
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.enable_eager = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
348
    FLAGS.run_eagerly = False
349
    FLAGS.distribution_strategy = 'default'
350
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
351
    FLAGS.batch_size = 128 * 2  # 2 GPUs
352
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
353
354

  def benchmark_graph_2_gpu(self):
355
    """Test 2 gpu graph mode."""
Toby Boyd's avatar
Toby Boyd committed
356
357
358
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.enable_eager = False
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
359
    FLAGS.run_eagerly = False
360
    FLAGS.distribution_strategy = 'default'
361
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
362
    FLAGS.batch_size = 128 * 2  # 2 GPUs
363
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
364

365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
  def benchmark_cpu(self):
    """Test cpu."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_graph_cpu(self):
    """Test cpu graph mode."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test cpu without distribution strategy and forced eager."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.distribution_strategy = 'off'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_cpu_no_dist_strat_run_eagerly')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat(self):
    """Test cpu without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

409
410
  def benchmark_cpu_no_dist_strat_force_v1_path(self):
    """Test cpu without dist strat and force v1 in model.compile."""
411
412
413
414
415
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
416
        'benchmark_cpu_no_dist_strat_force_v1_path')
417
418
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
419
    FLAGS.force_v2_in_keras_compile = False
420
421
    self._run_and_report_benchmark()

422
423
424
425
426
427
428
429
430
431
432
  def benchmark_graph_cpu_no_dist_strat(self):
    """Test cpu graph mode without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu_no_dist_strat')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
433
434
435
436

class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase):
  """Synthetic benchmarks for ResNet56 and Keras."""

437
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
438
439
440
441
442
    default_flags = {}
    default_flags['skip_eval'] = True
    default_flags['use_synthetic_data'] = True
    default_flags['train_steps'] = 110
    default_flags['log_steps'] = 10
Toby Boyd's avatar
Toby Boyd committed
443

444
    super(Resnet56KerasBenchmarkSynth, self).__init__(
445
        output_dir=output_dir, default_flags=default_flags)
Toby Boyd's avatar
Toby Boyd committed
446
447
448
449
450


class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase):
  """Real data benchmarks for ResNet56 and Keras."""

451
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
452
453
454
455
456
    default_flags = {}
    default_flags['skip_eval'] = True
    default_flags['data_dir'] = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
    default_flags['train_steps'] = 110
    default_flags['log_steps'] = 10
Toby Boyd's avatar
Toby Boyd committed
457

458
    super(Resnet56KerasBenchmarkReal, self).__init__(
459
        output_dir=output_dir, default_flags=default_flags)
460
461
462
463


if __name__ == '__main__':
  tf.test.main()