keras_imagenet_benchmark.py 9.12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import print_function

import os
19
import time
20
21
22
23

from absl import flags

from official.resnet import imagenet_main
Toby Boyd's avatar
Toby Boyd committed
24
from official.resnet.keras import keras_benchmark
25
26
27
from official.resnet.keras import keras_common
from official.resnet.keras import keras_imagenet_main

Toby Boyd's avatar
Toby Boyd committed
28
29
MIN_TOP_1_ACCURACY = 0.76
MAX_TOP_1_ACCURACY = 0.77
30

Toby Boyd's avatar
Toby Boyd committed
31
FLAGS = flags.FLAGS
32
33


Toby Boyd's avatar
Toby Boyd committed
34
35
class Resnet50KerasAccuracy(keras_benchmark.KerasBenchmark):
  """Benchmark accuracy tests for ResNet50 in Keras."""
36

37
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
38
39
40
41
42
    """A benchmark class.

    Args:
      output_dir: directory where to output e.g. log files
      root_data_dir: directory under which to look for dataset
43
44
45
      **kwargs: arbitrary named arguments. This is needed to make the
                constructor forward compatible in case PerfZero provides more
                named arguments before updating the constructor.
46
47
    """

48
49
50
    flag_methods = [
        keras_common.define_keras_flags, imagenet_main.define_imagenet_flags
    ]
Toby Boyd's avatar
Toby Boyd committed
51

52
    self.data_dir = os.path.join(root_data_dir, 'imagenet')
53
54
    super(Resnet50KerasAccuracy, self).__init__(
        output_dir=output_dir, flag_methods=flag_methods)
55

Toby Boyd's avatar
Toby Boyd committed
56
  def benchmark_graph_8_gpu(self):
57
58
    """Test Keras model with Keras fit/dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
59
    FLAGS.num_gpus = 8
60
    FLAGS.data_dir = self.data_dir
61
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
62
    FLAGS.train_epochs = 90
63
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
64
    FLAGS.dtype = 'fp32'
65
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
66
67

  def benchmark_8_gpu(self):
68
69
    """Test Keras model with eager, dist_strat and 8 GPUs."""
    self._setup()
Toby Boyd's avatar
Toby Boyd committed
70
    FLAGS.num_gpus = 8
71
    FLAGS.data_dir = self.data_dir
72
    FLAGS.batch_size = 128 * 8
Toby Boyd's avatar
Toby Boyd committed
73
    FLAGS.train_epochs = 90
74
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
75
76
    FLAGS.dtype = 'fp32'
    FLAGS.enable_eager = True
77
78
    # Add some thread tunings to improve performance.
    FLAGS.datasets_num_private_threads = 14
79
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
80

81
82
83
84
85
86
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = keras_imagenet_main.run(flags.FLAGS)
    wall_time_sec = time.time() - start_time_sec

    super(Resnet50KerasAccuracy, self)._report_benchmark(
Toby Boyd's avatar
Toby Boyd committed
87
        stats,
88
        wall_time_sec,
Toby Boyd's avatar
Toby Boyd committed
89
90
        top_1_min=MIN_TOP_1_ACCURACY,
        top_1_max=MAX_TOP_1_ACCURACY,
91
        total_batch_size=FLAGS.batch_size,
Toby Boyd's avatar
Toby Boyd committed
92
        log_steps=100)
93
94
95
96

  def _get_model_dir(self, folder_name):
    return os.path.join(self.output_dir, folder_name)

Toby Boyd's avatar
Toby Boyd committed
97
98
99
100
101

class Resnet50KerasBenchmarkBase(keras_benchmark.KerasBenchmark):
  """Resnet50 benchmarks."""

  def __init__(self, output_dir=None, default_flags=None):
102
103
104
    flag_methods = [
        keras_common.define_keras_flags, imagenet_main.define_imagenet_flags
    ]
Toby Boyd's avatar
Toby Boyd committed
105
106
107
108
109
110

    super(Resnet50KerasBenchmarkBase, self).__init__(
        output_dir=output_dir,
        flag_methods=flag_methods,
        default_flags=default_flags)

111
112
  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
Toby Boyd's avatar
Toby Boyd committed
113
    stats = keras_imagenet_main.run(FLAGS)
114
115
116
117
118
119
120
    wall_time_sec = time.time() - start_time_sec

    super(Resnet50KerasBenchmarkBase, self)._report_benchmark(
        stats,
        wall_time_sec,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)
Toby Boyd's avatar
Toby Boyd committed
121
122

  def benchmark_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
123
    """Test Keras model with 1 GPU, no distribution strategy."""
Toby Boyd's avatar
Toby Boyd committed
124
125
126
127
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
128
    FLAGS.distribution_strategy = 'off'
129
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
130
    FLAGS.batch_size = 128
131
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
132
133

  def benchmark_graph_1_gpu_no_dist_strat(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
134
    """Test Keras model in legacy graph mode with 1 GPU, no dist strat."""
Toby Boyd's avatar
Toby Boyd committed
135
136
137
138
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
139
    FLAGS.distribution_strategy = 'off'
140
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu_no_dist_strat')
Toby Boyd's avatar
Toby Boyd committed
141
    FLAGS.batch_size = 128
142
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
143
144

  def benchmark_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
145
    """Test Keras model with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
146
147
148
149
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
150
    FLAGS.distribution_strategy = 'default'
151
    FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
152
    FLAGS.batch_size = 128
153
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
154

Haoyu Zhang's avatar
Haoyu Zhang committed
155
156
157
158
159
160
161
162
163
164
165
166
  def benchmark_xla_1_gpu(self):
    """Test Keras model with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
167
  def benchmark_graph_1_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
168
    """Test Keras model in legacy graph mode with 1 GPU."""
Toby Boyd's avatar
Toby Boyd committed
169
170
171
172
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
173
    FLAGS.distribution_strategy = 'default'
174
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_1_gpu')
Toby Boyd's avatar
Toby Boyd committed
175
    FLAGS.batch_size = 128
176
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
177

Haoyu Zhang's avatar
Haoyu Zhang committed
178
179
180
181
182
183
184
185
186
187
188
189
  def benchmark_graph_xla_1_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 1 GPU."""
    self._setup()

    FLAGS.num_gpus = 1
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_1_gpu')
    FLAGS.batch_size = 128
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
190
  def benchmark_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
191
    """Test Keras model with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
192
193
194
195
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
196
    FLAGS.distribution_strategy = 'default'
197
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
198
    FLAGS.batch_size = 128 * 8  # 8 GPUs
199
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
200

201
  def benchmark_8_gpu_tweaked(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
202
    """Test Keras model with manual config tuning and 8 GPUs."""
203
204
205
206
207
208
209
210
211
212
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_tweaked')
    FLAGS.batch_size = 128 * 8  # 8 GPUs
    FLAGS.datasets_num_private_threads = 14
    self._run_and_report_benchmark()

Haoyu Zhang's avatar
Haoyu Zhang committed
213
214
215
216
217
218
219
220
221
222
223
224
225
  def benchmark_xla_8_gpu(self):
    """Test Keras model with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = True
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_xla_8_gpu')
    # TODO(haoyuzhang): Set size to 128 per GPU when multi-GPU XLA OOM is fixed
    FLAGS.batch_size = 64 * 8  # 8 GPUs
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
226
  def benchmark_graph_8_gpu(self):
Haoyu Zhang's avatar
Haoyu Zhang committed
227
    """Test Keras model in legacy graph mode with 8 GPUs."""
Toby Boyd's avatar
Toby Boyd committed
228
229
230
231
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
232
    FLAGS.distribution_strategy = 'default'
233
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_8_gpu')
Toby Boyd's avatar
Toby Boyd committed
234
    FLAGS.batch_size = 128 * 8  # 8 GPUs
235
    self._run_and_report_benchmark()
Toby Boyd's avatar
Toby Boyd committed
236

Haoyu Zhang's avatar
Haoyu Zhang committed
237
238
239
240
241
242
243
244
245
246
247
248
249
  def benchmark_graph_xla_8_gpu(self):
    """Test Keras model in legacy graph mode with XLA and 8 GPUs."""
    self._setup()

    FLAGS.num_gpus = 8
    FLAGS.enable_eager = False
    FLAGS.enable_xla = True
    FLAGS.distribution_strategy = 'default'
    FLAGS.model_dir = self._get_model_dir('benchmark_graph_xla_8_gpu')
    # TODO(haoyuzhang): Set size to 128 per GPU when multi-GPU XLA OOM is fixed
    FLAGS.batch_size = 64 * 8  # 8 GPUs
    self._run_and_report_benchmark()

Toby Boyd's avatar
Toby Boyd committed
250
251
252
253
254
255
  def fill_report_object(self, stats):
    super(Resnet50KerasBenchmarkBase, self).fill_report_object(
        stats,
        total_batch_size=FLAGS.batch_size,
        log_steps=FLAGS.log_steps)

Toby Boyd's avatar
Toby Boyd committed
256
257
258
259

class Resnet50KerasBenchmarkSynth(Resnet50KerasBenchmarkBase):
  """Resnet50 synthetic benchmark tests."""

260
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
261
262
263
264
265
266
    def_flags = {}
    def_flags['skip_eval'] = True
    def_flags['use_synthetic_data'] = True
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

267
268
    super(Resnet50KerasBenchmarkSynth, self).__init__(
        output_dir=output_dir, default_flags=def_flags)
Toby Boyd's avatar
Toby Boyd committed
269
270
271
272
273


class Resnet50KerasBenchmarkReal(Resnet50KerasBenchmarkBase):
  """Resnet50 real data benchmark tests."""

274
  def __init__(self, output_dir=None, root_data_dir=None, **kwargs):
Toby Boyd's avatar
Toby Boyd committed
275
276
    def_flags = {}
    def_flags['skip_eval'] = True
277
    def_flags['data_dir'] = os.path.join(root_data_dir, 'imagenet')
Toby Boyd's avatar
Toby Boyd committed
278
279
280
    def_flags['train_steps'] = 110
    def_flags['log_steps'] = 10

281
282
    super(Resnet50KerasBenchmarkReal, self).__init__(
        output_dir=output_dir, default_flags=def_flags)