keras_cifar_benchmark.py 12.5 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
Toby Boyd's avatar
Toby Boyd committed
15
"""Executes Keras benchmarks and accuracy tests."""
Shining Sun's avatar
Shining Sun committed
16
17
from __future__ import absolute_import
from __future__ import division
Toby Boyd's avatar
Toby Boyd committed
18
19
from __future__ import print_function

20
import os
21
import time
Toby Boyd's avatar
Toby Boyd committed
22
from absl import flags
23
import tensorflow as tf  # pylint: disable=g-bad-import-order
Toby Boyd's avatar
Toby Boyd committed
24

Toby Boyd's avatar
Toby Boyd committed
25
from official.resnet.keras import keras_benchmark
26
27
from official.resnet.keras import keras_cifar_main

28
29
MIN_TOP_1_ACCURACY = 0.925
MAX_TOP_1_ACCURACY = 0.938
Toby Boyd's avatar
Toby Boyd committed
30

Toby Boyd's avatar
Toby Boyd committed
31
FLAGS = flags.FLAGS
32
CIFAR_DATA_DIR_NAME = 'cifar-10-batches-bin'
Toby Boyd's avatar
Toby Boyd committed
33

34

Toby Boyd's avatar
Toby Boyd committed
35
36
class Resnet56KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Accuracy tests for ResNet56 Keras CIFAR-10."""
37

38
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
39
40
41
42
43
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
44
45
46
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
47
48
    """

49
    self.data_dir = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
50
    flag_methods = [keras_cifar_main.define_cifar_flags]
Toby Boyd's avatar
Toby Boyd committed
51

52
53
    super(Resnet56KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
Toby Boyd's avatar
Toby Boyd committed
54

Toby Boyd's avatar
Toby Boyd committed
55
  def benchmark_graph_1_gpu(self):
56
    """Test keras based model with Keras fit and distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
57
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
58
    FLAGS.num_gpus = 1
59
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
60
61
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
62
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
63
    FLAGS.dtype = 'fp32'
64
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
65
66

  def benchmark_1_gpu(self):
67
68
    """Test keras based model with eager and distribution strategies."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
69
    FLAGS.num_gpus = 1
70
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
71
72
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
73
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
74
75
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
76
    self._run_and_report_benchmark()
77

78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
  def benchmark_cpu(self):
    """Test keras based model on CPU."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat(self):
    """Test keras based model on CPU without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test keras based model on CPU w/forced eager and no dist_strat."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_cpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

121
122
123
124
  def benchmark_1_gpu_no_dist_strat(self):
    """Test keras based model with eager and no dist strat."""
    self._setup()
    FLAGS.num_gpus = 1
125
    FLAGS.explicit_gpu_placement = True
126
127
128
129
130
131
132
133
134
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

135
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
136
    """Test keras based model w/forced eager and no dist_strat."""
137
138
139
140
141
142
143
144
145
146
147
148
149
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.data_dir = self.data_dir
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
150
  def benchmark_2_gpu(self):
151
152
    """Test keras based model with eager and distribution strategies."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
153
    FLAGS.num_gpus = 2
154
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
155
156
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
157
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
158
159
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
160
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
161
162

  def benchmark_graph_2_gpu(self):
163
164
    """Test keras based model with Keras fit and distribution strategies."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
165
    FLAGS.num_gpus = 2
166
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
167
168
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
169
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
170
    FLAGS.dtype = 'fp32'
171
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
172
173

  def benchmark_graph_1_gpu_no_dist_strat(self):
174
    """Test keras based model with Keras fit but not distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
175
    self._setup()
176
    FLAGS.distribution_strategy = 'off'
Toby Boyd's avatar
Toby Boyd committed
177
    FLAGS.num_gpus = 1
178
    FLAGS.data_dir = self.data_dir
Toby Boyd's avatar
Toby Boyd committed
179
180
    FLAGS.batch_size = 128
    FLAGS.train_epochs = 182
181
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
182
    FLAGS.dtype = 'fp32'
183
184
185
186
    self._run_and_report_benchmark()

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
Toby Boyd's avatar
Toby Boyd committed
187
    stats = keras_cifar_main.run(FLAGS)
188
    wall_time_sec = time.time() - start_time_sec
Toby Boyd's avatar
Toby Boyd committed
189

190
    super(Resnet56KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
191
        stats,
192
        wall_time_sec,
Toby Boyd's avatar
Toby Boyd committed
193
194
        top_1_min=MIN_TOP_1_ACCURACY,
        top_1_max=MAX_TOP_1_ACCURACY,
195
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
196
197
198
199
200
201
202
        log_steps=100)


class Resnet56KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Short performance tests for ResNet56 via Keras and CIFAR-10."""

  def __init__(self, output_dir=None, default_flags=None):
203
    flag_methods = [keras_cifar_main.define_cifar_flags]
Toby Boyd's avatar
Toby Boyd committed
204
205
206
207
208
209

    super(Resnet56KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

210
211
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
Toby Boyd's avatar
Toby Boyd committed
212
    stats = keras_cifar_main.run(FLAGS)
213
214
215
216
217
218
219
    wall_time_sec = time.time() - start_time_sec

    super(Resnet56KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
Toby Boyd's avatar
Toby Boyd committed
220

221
222
  def benchmark_1_gpu(self):
    """Test 1 gpu."""
223
224
225
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
226
227
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
228
229
230
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

231
232
  def benchmark_graph_1_gpu(self):
    """Test 1 gpu graph."""
Toby Boyd's avatar
Toby Boyd committed
233
234
235
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
236
237
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
238
    FLAGS.batch_size = 128
239
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
240

241
242
  def benchmark_1_gpu_no_dist_strat(self):
    """Test 1 gpu without distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
243
244
245
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
246
247
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
248
    FLAGS.batch_size = 128
249
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
250

251
252
  def benchmark_graph_1_gpu_no_dist_strat(self):
    """Test 1 gpu graph mode without distribution strategies."""
Toby Boyd's avatar
Toby Boyd committed
253
254
255
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
256
257
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
258
    FLAGS.batch_size = 128
259
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
260

261
  def benchmark_1_gpu_no_dist_strat_run_eagerly(self):
262
    """Test 1 gpu without distribution strategy and forced eager."""
263
264
265
266
267
268
269
270
271
272
273
    self._setup()
    FLAGS.num_gpus = 1
    FLAGS.batch_size = 128
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_1_gpu_no_dist_strat_run_eagerly')
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
274
  def benchmark_2_gpu(self):
275
    """Test 2 gpu."""
Toby Boyd's avatar
Toby Boyd committed
276
277
278
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.enable_eager = True
279
    FLAGS.distribution_strategy = 'default'
280
    FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
281
    FLAGS.batch_size = 128 * 2  # 2 GPUs
282
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
283
284

  def benchmark_graph_2_gpu(self):
285
    """Test 2 gpu graph mode."""
Toby Boyd's avatar
Toby Boyd committed
286
287
288
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.enable_eager = False
289
    FLAGS.distribution_strategy = 'default'
290
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_2_gpu')
Toby Boyd's avatar
Toby Boyd committed
291
    FLAGS.batch_size = 128 * 2  # 2 GPUs
292
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
293

294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
  def benchmark_cpu(self):
    """Test cpu."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_graph_cpu(self):
    """Test cpu graph mode."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = False
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat_run_eagerly(self):
    """Test cpu without distribution strategy and forced eager."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.distribution_strategy = 'off'
    FLAGS.enable_eager = True
    FLAGS.run_eagerly = True
    FLAGS.model_dir = self._get_model_dir(
        'benchmark_cpu_no_dist_strat_run_eagerly')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_cpu_no_dist_strat(self):
    """Test cpu without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_cpu_no_dist_strat')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

  def benchmark_graph_cpu_no_dist_strat(self):
    """Test cpu graph mode without distribution strategies."""
    self._setup()
    FLAGS.num_gpus = 0
    FLAGS.enable_eager = False
    FLAGS.distribution_strategy = 'off'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_cpu_no_dist_strat')
    FLAGS.batch_size = 128
    FLAGS.data_format = 'channels_last'
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
349
350
351
352

class Resnet56KerasBenchmarkSynth(Resnet56KerasBenchmarkBase):
  """Synthetic benchmarks for ResNet56 and Keras."""

353
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
354
355
356
357
358
    default_flags = {}
    default_flags['skip_eval'] = True
    default_flags['use_synthetic_data'] = True
    default_flags['train_steps'] = 110
    default_flags['log_steps'] = 10
Toby Boyd's avatar
Toby Boyd committed
359

360
    super(Resnet56KerasBenchmarkSynth, self).__init__(
361
        output_dir=output_dir, default_flags=default_flags)
Toby Boyd's avatar
Toby Boyd committed
362
363
364
365
366


class Resnet56KerasBenchmarkReal(Resnet56KerasBenchmarkBase):
  """Real data benchmarks for ResNet56 and Keras."""

367
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
368
369
370
371
372
    default_flags = {}
    default_flags['skip_eval'] = True
    default_flags['data_dir'] = os.path.join(root_data_dir, CIFAR_DATA_DIR_NAME)
    default_flags['train_steps'] = 110
    default_flags['log_steps'] = 10
Toby Boyd's avatar
Toby Boyd committed
373

374
    super(Resnet56KerasBenchmarkReal, self).__init__(
375
        output_dir=output_dir, default_flags=default_flags)
376
377
378
379


if __name__ == '__main__':
  tf.test.main()