keras_cifar_benchmark.py 15.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
Toby Boyd's avatar
Toby Boyd committed
15
"""Executes Keras benchmarks and accuracy tests."""
Shining Sun's avatar
Shining Sun committed
16
17
from __future__ import absolute_import
from __future__ import division
Toby Boyd's avatar
Toby Boyd committed
18
19
from __future__ import print_function

20
import os
21
import time
Toby Boyd's avatar
Toby Boyd committed
22
from absl import flags
23
import tensorflow as tf  # pylint: disable=g-bad-import-order
Toby Boyd's avatar
Toby Boyd committed
24

25
26
from official.benchmark import keras_benchmark
from official.vision.image_classification import resnet_cifar_main
27

28
MIN_TOP_1_ACCURACY = 0.929
29
MAX_TOP_1_ACCURACY = 0.938
Toby Boyd's avatar
Toby Boyd committed
30

Toby Boyd's avatar
Toby Boyd committed
31
FLAGS = flags.FLAGS
32
CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
Toby Boyd's avatar
Toby Boyd committed
33

34

Toby Boyd's avatar
Toby Boyd committed
35
36
class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Accuracy tests for ResNet56 Keras CIFAR-10."""
37

38
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
39
40
41
42
43
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
44
45
46
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
47
48
    """

49
    self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
50
    flag_methods = [resnet_cifar_main.define_cifar_flags]
Toby Boyd's avatar
Toby Boyd committed
51

52
53
    super(Resnet56KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
Toby Boyd's avatar
Toby Boyd committed
54

Toby Boyd's avatar
Toby Boyd committed
55
  def benchmark_graph_1_gpu(self):
56
    """Test keras based model with Keras fit and distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
57
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
58
    FLAGS.num_gpus = 1
59
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
60
61
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
62
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
63
    FLAGS.dtype = 'fp32'
64
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
65
66

  def benchmark_1_gpu(self):
67
68
    """Test keras based model with eager and distribution strategies."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
69
    FLAGS.num_gpus = 1
70
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
71
72
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
73
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
74
75
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
76
    self._run_and_report_benchmark()
77

78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  def benchmark_cpu(self):
    """Test keras based model on CPU."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat(self):
    """Test keras based model on CPU without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test keras based model on CPU w/forced eager and no dist_strat."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_cpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

121
122
123
124
  def benchmark_1_gpu_no_dist_strat(self):
    """Test keras based model with eager and no dist strat."""
    self._setup()
    FLAGS.num_gpus = 1
125
    FLAGS.explicit_gpu_placement = True
126
127
128
129
130
131
132
133
134
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

135
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
136
    """Test keras based model w/forced eager and no dist_strat."""
137
138
139
140
141
142
143
144
145
146
147
148
149
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

150
151
  def benchmark_graph_1_gpu_no_dist_strat(self):
    """Test keras based model with Keras fit but not distribution strategies."""
152
    self._setup()
153
154
    FLAGS.distribution_strategy = 'off'
    FLAGS.num_gpus = 1
155
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
156
157
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
158
159
160
161
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    self._run_and_report_benchmark()

162
163
  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
    """No dist strat forced v1 execution path."""
164
165
166
167
168
169
170
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.num_gpus = 1
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
171
        'benchmark_1_gpu_no_dist_strat_force_v1_path')
Toby Boyd's avatar
Toby Boyd committed
172
173
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
174
    FLAGS.force_v2_in_keras_compile = False
175
176
177
178
    self._run_and_report_benchmark()

  def benchmark_2_gpu(self):
    """Test keras based model with eager and distribution strategies."""
179
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
180
    FLAGS.num_gpus = 2
181
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
182
183
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
184
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
185
    FLAGS.dtype = 'fp32'
186
    FLAGS.enable_eager = True
187
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
188

189
190
  def benchmark_graph_2_gpu(self):
    """Test keras based model with Keras fit and distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
191
    self._setup()
192
    FLAGS.num_gpus = 2
193
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
194
195
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
196
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
197
    FLAGS.dtype = 'fp32'
198
199
200
201
    self._run_and_report_benchmark()

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
202
    stats = resnet_cifar_main.run(FLAGS)
203
    wall_time_sec = time.time() - start_time_sec
Toby Boyd's avatar
Toby Boyd committed
204

205
    super(Resnet56KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
206
        stats,
207
        wall_time_sec,
Toby Boyd's avatar
Toby Boyd committed
208
209
        top_1_min=MIN_TOP_1_ACCURACY,
        top_1_max=MAX_TOP_1_ACCURACY,
210
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
211
212
213
214
215
216
217
        log_steps=100)


class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Short performance tests for ResNet56 via Keras and CIFAR-10."""

  def __init__(self, output_dir=None, default_flags=None):
218
    flag_methods = [resnet_cifar_main.define_cifar_flags]
Toby Boyd's avatar
Toby Boyd committed
219
220
221
222
223
224

    super(Resnet56KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

225
226
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
227
    stats = resnet_cifar_main.run(FLAGS)
228
229
230
231
232
233
234
    wall_time_sec = time.time() - start_time_sec

    super(Resnet56KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
Toby Boyd's avatar
Toby Boyd committed
235

236
237
  def benchmark_1_gpu(self):
    """Test 1 gpu."""
238
239
240
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
241
242
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
243
244
245
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

246
247
248
249
250
  def benchmark_1_gpu_xla(self):
    """Test 1 gpu with xla enabled."""
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
251
    FLAGS.run_eagerly = False
252
253
254
255
256
257
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

258
259
  def benchmark_1_gpu_force_v1_path(self):
    """Test 1 gpu using forced v1 execution path."""
260
261
262
263
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
264
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
265
    FLAGS.batch_size = 128
266
    FLAGS.force_v2_in_keras_compile = False
267
268
    self._run_and_report_benchmark()

269
270
  def benchmark_graph_1_gpu(self):
    """Test 1 gpu graph."""
Toby Boyd's avatar
Toby Boyd committed
271
272
273
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
274
    FLAGS.run_eagerly = False
275
276
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
277
    FLAGS.batch_size = 128
278
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
279

280
281
  def benchmark_1_gpu_no_dist_strat(self):
    """Test 1 gpu without distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
282
283
284
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
285
286
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
287
    FLAGS.batch_size = 128
288
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
289

290
291
  def benchmark_graph_1_gpu_no_dist_strat(self):
    """Test 1 gpu graph mode without distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
292
293
294
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
295
296
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
297
    FLAGS.batch_size = 128
298
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
299

300
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
301
    """Test 1 gpu without distribution strategy and forced eager."""
302
303
304
305
306
307
308
309
310
311
312
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

313
314
  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
    """No dist strat but forced v1 execution path."""
315
316
317
318
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
319
        'benchmark_1_gpu_no_dist_strat_force_v1_path')
320
321
322
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
323
    FLAGS.force_v2_in_keras_compile = False
324
325
    self._run_and_report_benchmark()

326
327
  def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
    """Forced v1 execution path and forced eager."""
328
329
330
331
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
332
        'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
333
334
335
336
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
337
    FLAGS.force_v2_in_keras_compile = False
338
339
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
340
  def benchmark_2_gpu(self):
341
    """Test 2 gpu."""
Toby Boyd's avatar
Toby Boyd committed
342
343
344
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.enable_eager = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
345
    FLAGS.run_eagerly = False
346
    FLAGS.distribution_strategy = 'default'
347
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
348
    FLAGS.batch_size = 128 * 2  # 2 GPUs
349
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
350
351

  def benchmark_graph_2_gpu(self):
352
    """Test 2 gpu graph mode."""
Toby Boyd's avatar
Toby Boyd committed
353
354
355
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.enable_eager = False
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
356
    FLAGS.run_eagerly = False
357
    FLAGS.distribution_strategy = 'default'
358
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
359
    FLAGS.batch_size = 128 * 2  # 2 GPUs
360
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
361

362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
  def benchmark_cpu(self):
    """Test cpu."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_graph_cpu(self):
    """Test cpu graph mode."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test cpu without distribution strategy and forced eager."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.distribution_strategy = 'off'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_cpu_no_dist_strat_run_eagerly')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat(self):
    """Test cpu without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

406
407
  def benchmark_cpu_no_dist_strat_force_v1_path(self):
    """Test cpu without dist strat and force v1 in model.compile."""
408
409
410
411
412
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
413
        'benchmark_cpu_no_dist_strat_force_v1_path')
414
415
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
416
    FLAGS.force_v2_in_keras_compile = False
417
418
    self._run_and_report_benchmark()

419
420
421
422
423
424
425
426
427
428
429
  def benchmark_graph_cpu_no_dist_strat(self):
    """Test cpu graph mode without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu_no_dist_strat')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
430
431
432
433

class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase):
  """Synthetic benchmarks for ResNet56 and Keras."""

434
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
435
436
437
438
439
    default_flags = {}
    default_flags['skip_eval'] = True
    default_flags['use_synthetic_data'] = True
    default_flags['train_steps'] = 110
    default_flags['log_steps'] = 10
Toby Boyd's avatar
Toby Boyd committed
440

441
    super(Resnet56KerasBenchmarkSynth, self).__init__(
442
        output_dir=output_dir, default_flags=default_flags)
Toby Boyd's avatar
Toby Boyd committed
443
444
445
446
447


class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase):
  """Real data benchmarks for ResNet56 and Keras."""

448
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
449
450
451
452
453
    default_flags = {}
    default_flags['skip_eval'] = True
    default_flags['data_dir'] = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
    default_flags['train_steps'] = 110
    default_flags['log_steps'] = 10
Toby Boyd's avatar
Toby Boyd committed
454

455
    super(Resnet56KerasBenchmarkReal, self).__init__(
456
        output_dir=output_dir, default_flags=default_flags)
457
458
459
460


if __name__ == '__main__':
  tf.test.main()