keras_cifar_benchmark.py 15.6 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
Toby Boyd's avatar
Toby Boyd committed
15
"""Executes Keras benchmarks and accuracy tests."""
Shining Sun's avatar
Shining Sun committed
16
17
from __future__ import absolute_import
from __future__ import division
Toby Boyd's avatar
Toby Boyd committed
18
19
from __future__ import print_function

20
import os
21
import time
Toby Boyd's avatar
Toby Boyd committed
22
from absl import flags
23
import tensorflow as tf  # pylint: disable=g-bad-import-order
Toby Boyd's avatar
Toby Boyd committed
24

25
from official.benchmark import keras_benchmark
26
from official.utils.testing import benchmark_wrappers
27
from official.benchmark.models import resnet_cifar_main
28

29
MIN_TOP_1_ACCURACY = 0.929
30
MAX_TOP_1_ACCURACY = 0.938
Toby Boyd's avatar
Toby Boyd committed
31

Toby Boyd's avatar
Toby Boyd committed
32
FLAGS = flags.FLAGS
33
CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
Toby Boyd's avatar
Toby Boyd committed
34

35

Toby Boyd's avatar
Toby Boyd committed
36
37
class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Accuracy tests for ResNet56 Keras CIFAR-10."""
38

39
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
40
41
42
43
44
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
45
46
47
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
48
49
    """

50
    self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
51
    flag_methods = [resnet_cifar_main.define_cifar_flags]
Toby Boyd's avatar
Toby Boyd committed
52

53
54
    super(Resnet56KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
Toby Boyd's avatar
Toby Boyd committed
55

56
57
58
59
  def _setup(self):
    super(Resnet56KerasAccuracy, self)._setup()
    FLAGS.use_tensor_lr = False

Toby Boyd's avatar
Toby Boyd committed
60
  def benchmark_graph_1_gpu(self):
61
    """Test keras based model with Keras fit and distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
62
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
63
    FLAGS.num_gpus = 1
64
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
65
66
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
67
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
68
    FLAGS.dtype = 'fp32'
69
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
70
71

  def benchmark_1_gpu(self):
72
73
    """Test keras based model with eager and distribution strategies."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
74
    FLAGS.num_gpus = 1
75
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
76
77
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
78
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
79
80
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
81
    self._run_and_report_benchmark()
82

83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
  def benchmark_cpu(self):
    """Test keras based model on CPU."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat(self):
    """Test keras based model on CPU without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test keras based model on CPU w/forced eager and no dist_strat."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_cpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

126
127
128
129
130
131
132
133
134
135
136
137
138
  def benchmark_1_gpu_no_dist_strat(self):
    """Test keras based model with eager and no dist strat."""
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

139
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
140
    """Test keras based model w/forced eager and no dist_strat."""
141
142
143
144
145
146
147
148
149
150
151
152
153
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

154
155
  def benchmark_graph_1_gpu_no_dist_strat(self):
    """Test keras based model with Keras fit but not distribution strategies."""
156
    self._setup()
157
158
    FLAGS.distribution_strategy = 'off'
    FLAGS.num_gpus = 1
159
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
160
161
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
162
163
164
165
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    self._run_and_report_benchmark()

166
167
  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
    """No dist strat forced v1 execution path."""
168
169
170
171
172
173
174
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.num_gpus = 1
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
175
        'benchmark_1_gpu_no_dist_strat_force_v1_path')
Toby Boyd's avatar
Toby Boyd committed
176
177
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
178
    FLAGS.force_v2_in_keras_compile = False
179
180
181
182
    self._run_and_report_benchmark()

  def benchmark_2_gpu(self):
    """Test keras based model with eager and distribution strategies."""
183
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
184
    FLAGS.num_gpus = 2
185
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
186
187
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
188
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
189
    FLAGS.dtype = 'fp32'
190
    FLAGS.enable_eager = True
191
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
192

193
194
  def benchmark_graph_2_gpu(self):
    """Test keras based model with Keras fit and distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
195
    self._setup()
196
    FLAGS.num_gpus = 2
197
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
198
199
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
200
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
201
    FLAGS.dtype = 'fp32'
202
203
    self._run_and_report_benchmark()

204
  @benchmark_wrappers.enable_runtime_flags
205
206
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
207
    stats = resnet_cifar_main.run(FLAGS)
208
    wall_time_sec = time.time() - start_time_sec
Toby Boyd's avatar
Toby Boyd committed
209

210
    super(Resnet56KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
211
        stats,
212
        wall_time_sec,
Toby Boyd's avatar
Toby Boyd committed
213
214
        top_1_min=MIN_TOP_1_ACCURACY,
        top_1_max=MAX_TOP_1_ACCURACY,
215
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
216
217
218
219
220
221
222
        log_steps=100)


class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Short performance tests for ResNet56 via Keras and CIFAR-10."""

  def __init__(self, output_dir=None, default_flags=None):
223
    flag_methods = [resnet_cifar_main.define_cifar_flags]
Toby Boyd's avatar
Toby Boyd committed
224
225
226
227
228
229

    super(Resnet56KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

230
  @benchmark_wrappers.enable_runtime_flags
231
232
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
233
    stats = resnet_cifar_main.run(FLAGS)
234
235
236
237
238
239
240
    wall_time_sec = time.time() - start_time_sec

    super(Resnet56KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
Toby Boyd's avatar
Toby Boyd committed
241

242
243
  def benchmark_1_gpu(self):
    """Test 1 gpu."""
244
245
246
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
247
    FLAGS.distribution_strategy = 'one_device'
248
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
249
250
251
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

252
253
254
255
256
  def benchmark_1_gpu_xla(self):
    """Test 1 gpu with xla enabled."""
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
257
    FLAGS.run_eagerly = False
258
    FLAGS.enable_xla = True
259
    FLAGS.distribution_strategy = 'one_device'
260
261
262
263
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

264
265
  def benchmark_1_gpu_force_v1_path(self):
    """Test 1 gpu using forced v1 execution path."""
266
267
268
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
269
    FLAGS.distribution_strategy = 'one_device'
270
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_force_v1_path')
271
    FLAGS.batch_size = 128
272
    FLAGS.force_v2_in_keras_compile = False
273
274
    self._run_and_report_benchmark()

275
276
  def benchmark_graph_1_gpu(self):
    """Test 1 gpu graph."""
Toby Boyd's avatar
Toby Boyd committed
277
278
279
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
280
    FLAGS.run_eagerly = False
281
    FLAGS.distribution_strategy = 'one_device'
282
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
283
    FLAGS.batch_size = 128
284
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
285

286
287
  def benchmark_1_gpu_no_dist_strat(self):
    """Test 1 gpu without distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
288
289
290
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
291
292
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
293
    FLAGS.batch_size = 128
294
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
295

296
297
  def benchmark_graph_1_gpu_no_dist_strat(self):
    """Test 1 gpu graph mode without distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
298
299
300
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
301
302
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
303
    FLAGS.batch_size = 128
304
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
305

306
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
307
    """Test 1 gpu without distribution strategy and forced eager."""
308
309
310
311
312
313
314
315
316
317
318
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

319
320
  def benchmark_1_gpu_no_dist_strat_force_v1_path(self):
    """No dist strat but forced v1 execution path."""
321
322
323
324
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
325
        'benchmark_1_gpu_no_dist_strat_force_v1_path')
326
327
328
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
329
    FLAGS.force_v2_in_keras_compile = False
330
331
    self._run_and_report_benchmark()

332
333
  def benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly(self):
    """Forced v1 execution path and forced eager."""
334
335
336
337
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
338
        'benchmark_1_gpu_no_dist_strat_force_v1_path_run_eagerly')
339
340
341
342
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
343
    FLAGS.force_v2_in_keras_compile = False
344
345
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
346
  def benchmark_2_gpu(self):
347
    """Test 2 gpu."""
Toby Boyd's avatar
Toby Boyd committed
348
349
350
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.enable_eager = True
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
351
    FLAGS.run_eagerly = False
352
    FLAGS.distribution_strategy = 'mirrored'
353
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
354
    FLAGS.batch_size = 128 * 2  # 2 GPUs
355
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
356
357

  def benchmark_graph_2_gpu(self):
358
    """Test 2 gpu graph mode."""
Toby Boyd's avatar
Toby Boyd committed
359
360
361
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.enable_eager = False
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
362
    FLAGS.run_eagerly = False
363
    FLAGS.distribution_strategy = 'mirrored'
364
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
365
    FLAGS.batch_size = 128 * 2  # 2 GPUs
366
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
367

368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
  def benchmark_cpu(self):
    """Test cpu."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_graph_cpu(self):
    """Test cpu graph mode."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test cpu without distribution strategy and forced eager."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.distribution_strategy = 'off'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_cpu_no_dist_strat_run_eagerly')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat(self):
    """Test cpu without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

412
413
  def benchmark_cpu_no_dist_strat_force_v1_path(self):
    """Test cpu without dist strat and force v1 in model.compile."""
414
415
416
417
418
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir(
419
        'benchmark_cpu_no_dist_strat_force_v1_path')
420
421
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
422
    FLAGS.force_v2_in_keras_compile = False
423
424
    self._run_and_report_benchmark()

425
426
427
428
429
430
431
432
433
434
435
  def benchmark_graph_cpu_no_dist_strat(self):
    """Test cpu graph mode without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu_no_dist_strat')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
436
437
438
439

class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase):
  """Synthetic benchmarks for ResNet56 and Keras."""

440
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
441
442
443
444
445
    default_flags = {}
    default_flags['skip_eval'] = True
    default_flags['use_synthetic_data'] = True
    default_flags['train_steps'] = 110
    default_flags['log_steps'] = 10
446
    default_flags['use_tensor_lr'] = False
Toby Boyd's avatar
Toby Boyd committed
447

448
    super(Resnet56KerasBenchmarkSynth, self).__init__(
449
        output_dir=output_dir, default_flags=default_flags)
Toby Boyd's avatar
Toby Boyd committed
450
451
452
453
454


class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase):
  """Real data benchmarks for ResNet56 and Keras."""

455
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
456
457
458
459
460
    default_flags = {}
    default_flags['skip_eval'] = True
    default_flags['data_dir'] = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
    default_flags['train_steps'] = 110
    default_flags['log_steps'] = 10
461
    default_flags['use_tensor_lr'] = False
Toby Boyd's avatar
Toby Boyd committed
462

463
    super(Resnet56KerasBenchmarkReal, self).__init__(
464
        output_dir=output_dir, default_flags=default_flags)
465
466
467
468


if __name__ == '__main__':
  tf.test.main()