ncf_keras_benchmark.py 11.9 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time

from absl import flags
from absl.testing import flagsaver
import tensorflow as tf  # pylint: disable=g-bad-import-order

from official.recommendation import ncf_common
from official.recommendation import ncf_keras_main
from official.utils.flags import core

FLAGS = flags.FLAGS
Toby Boyd's avatar
Toby Boyd committed
33
34
NCF_DATA_DIR_NAME = 'movielens_data'

35

36
class NCFKerasBenchmarkBase(tf.test.Benchmark):
37
38
39
40
41
42
43
44
45
46
47
48
49
  """Base class for NCF model benchmark."""
  local_flags = None

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):
    self.output_dir = output_dir
    self.default_flags = default_flags or {}

  def _setup(self):
    """Sets up and resets flags before each test."""
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
50
    if NCFKerasBenchmarkBase.local_flags is None:
Toby Boyd's avatar
Toby Boyd committed
51
      ncf_common.define_ncf_flags()
52
53
54
55
      # Loads flags to get defaults to then override. List cannot be empty.
      flags.FLAGS(['foo'])
      core.set_defaults(**self.default_flags)
      saved_flag_values = flagsaver.save_flag_values()
56
      NCFKerasBenchmarkBase.local_flags = saved_flag_values
57
    else:
58
      flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags)
59

Toby Boyd's avatar
Toby Boyd committed
60
  def _run_and_report_benchmark(self, hr_at_10_min=0, hr_at_10_max=0):
61
62
63
64
    start_time_sec = time.time()
    stats = ncf_keras_main.run_ncf(FLAGS)
    wall_time_sec = time.time() - start_time_sec

Toby Boyd's avatar
Toby Boyd committed
65
66
67
    metrics = []
    metrics.append({'name': 'exp_per_second',
                    'value': stats['avg_exp_per_second']})
68

Toby Boyd's avatar
Toby Boyd committed
69
70
71
72
73
74
75
76
77
78
    if hr_at_10_min > 0:
      metrics.append({'name': 'hr_at_10',
                      'value': stats['eval_hit_rate'],
                      'min_value': hr_at_10_min,
                      'max_value': hr_at_10_max})

      metrics.append({'name': 'train_loss',
                      'value': stats['loss']})

    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
79
80


81
class NCFKerasAccuracy(NCFKerasBenchmarkBase):
82
83
84
85
  """Benchmark NCF model using real data."""

  def __init__(self,
               output_dir=None,
Toby Boyd's avatar
Toby Boyd committed
86
               root_data_dir=None,
87
88
89
90
91
92
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
93
    default_flags['train_epochs'] = 10
94
    default_flags['clean'] = True
95
    default_flags['batch_size'] = 99000
96
97
98
99
100
101
102
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
103
    default_flags['ml_perf'] = True
104
    default_flags['use_synthetic_data'] = False
Toby Boyd's avatar
Toby Boyd committed
105
    default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
106

107
    super(NCFKerasAccuracy, self).__init__(
108
109
110
111
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

Toby Boyd's avatar
Toby Boyd committed
112
113
  def _run_and_report_benchmark_mlperf_like(self):
    """Run test and report results.
Toby Boyd's avatar
Toby Boyd committed
114

Toby Boyd's avatar
Toby Boyd committed
115
116
117
    Note: MLPerf like tests are not tuned to hit a specific hr@10 value, but
    we want it recorded.
    """
118
    self._run_and_report_benchmark(hr_at_10_min=0.61)
Toby Boyd's avatar
Toby Boyd committed
119

120
  def _run_and_report_benchmark(self, hr_at_10_min=0.630, hr_at_10_max=0.645):
Toby Boyd's avatar
Toby Boyd committed
121
    """Run test and report results.
Toby Boyd's avatar
Toby Boyd committed
122

Toby Boyd's avatar
Toby Boyd committed
123
124
125
126
127
128
129
130
    Note: Target is 0.635, but some runs are below that level. Until we have
    multi-run tests, we have to accept a lower target.

    Args:
      hr_at_10_min: Minimum acceptable hr@10 value.
      hr_at_10_max: Maximum acceptable hr@10 value.
    """
    super(NCFKerasAccuracy, self)._run_and_report_benchmark(
131
132
        hr_at_10_min=hr_at_10_min,
        hr_at_10_max=hr_at_10_max)
133

134
  def benchmark_1_gpu_early_stop(self):
135
    self._setup()
136
    FLAGS.early_stopping = True
137
138
    self._run_and_report_benchmark()

139
  def benchmark_1_gpu_force_v1_path_early_stop(self):
140
141
    self._setup()
    FLAGS.early_stopping = True
142
    FLAGS.force_v2_in_keras_compile = False
143
144
    self._run_and_report_benchmark()

145
146
147
148
149
150
  def benchmark_1_gpu_no_dist_strat_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

151
  def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
152
153
154
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
155
    FLAGS.force_v2_in_keras_compile = False
156
157
    self._run_and_report_benchmark()

158
159
160
161
162
163
164
165
166
167
168
169
170
  def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    FLAGS.run_eagerly = True
    self._run_and_report_benchmark()

  def benchmark_xla_1_gpu_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

171
  def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
172
173
174
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
175
    FLAGS.force_v2_in_keras_compile = False
176
177
    self._run_and_report_benchmark()

178
179
180
181
182
183
  def benchmark_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

184
185
186
187
188
189
190
  def benchmark_1_gpu_ctl_run_eagerly_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.run_eagerly = True
    self._run_and_report_benchmark()

191
192
193
194
195
196
197
  def benchmark_xla_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

198
199
200
201
  def benchmark_2_gpus_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
202
    FLAGS.eval_batch_size = 160000
203
    self._run_and_report_benchmark()
204

205
  def benchmark_2_gpus_ctl_early_stop(self):
206
    """NCF with custom training loop. Works only in TF 2.0."""
207
208
209
210
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
211
    FLAGS.eval_batch_size = 160000
212
213
    self._run_and_report_benchmark()

214
#############################################
215
# Tests below with mlperf in the test name are of two types:
216
217
218
219
220
221
222
#  1) 1 GPU tests are based on MLPerf 0.5 and the TensorFlow pulled submission.
#  2) 8 GPU tests are based on MLPerf 0.5 and use NVIDIA's hyper parameters.
#
# The purpose of both is to get a number to compare to existing results. To do
# this the number of epochs is held constant rather than a race to a given
# accuracy. The accuracy validation is done by the "early_stop" tests.
#############################################
223
224

  def benchmark_1_gpu_mlperf_like(self):
225
    """1 GPU using keras fit/compile."""
226
227
    self._setup()
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
228
    self._run_and_report_benchmark_mlperf_like()
229

230
  def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
231
232
233
234
    """1 GPU using compile/fit without dist_strat."""
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
235
    FLAGS.force_v2_in_keras_compile = False
236
237
    self._run_and_report_benchmark()

238
  def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
239
    """1 GPU using compile/fit without dist_strat."""
240
241
242
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
Toby Boyd's avatar
Toby Boyd committed
243
    self._run_and_report_benchmark_mlperf_like()
244
245
246
247
248
249

  def benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like(self):
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
    FLAGS.run_eagerly = True
Toby Boyd's avatar
Toby Boyd committed
250
    self._run_and_report_benchmark_mlperf_like()
251
252

  def benchmark_xla_1_gpu_mlperf_like(self):
253
    """1 GPU using compile/fit with XLA."""
254
255
    self._setup()
    FLAGS.train_epochs = 7
256
    FLAGS.enable_xla = True
Toby Boyd's avatar
Toby Boyd committed
257
    self._run_and_report_benchmark_mlperf_like()
258

259
260
261
262
263
  def benchmark_1_gpu_ctl_mlperf_like(self):
    """1 GPU using CTL."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
264
    self._run_and_report_benchmark_mlperf_like()
265

Nimit Nigania's avatar
Nimit Nigania committed
266
267
268
269
270
271
272
273
274
  def benchmark_1_gpu_ctl_fp16_mlperf_like(self):
    """1 GPU using CTL."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.train_epochs = 7
    FLAGS.dtype = 'fp16'
    FLAGS.loss_scale = 8192
    self._run_and_report_benchmark_mlperf_like()

275
276
277
278
279
280
281
282
  def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
    """1 GPU using CTL with eager and distribution strategy."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.run_eagerly = True
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark()

283
284
  def benchmark_xla_1_gpu_ctl_mlperf_like(self):
    """1 GPU using CTL with XLA."""
285
286
    self._setup()
    FLAGS.keras_use_ctl = True
287
288
    FLAGS.enable_xla = True
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
289
    self._run_and_report_benchmark_mlperf_like()
290

Nimit Nigania's avatar
Nimit Nigania committed
291
292
293
294
295
296
297
298
299
300
  def benchmark_xla_1_gpu_ctl_fp16_mlperf_like(self):
    """1 GPU using CTL with XLA."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.enable_xla = True
    FLAGS.train_epochs = 7
    FLAGS.dtype = 'fp16'
    FLAGS.loss_scale = 8192
    self._run_and_report_benchmark_mlperf_like()

301
302
303
  def benchmark_8_gpu_mlperf_like(self):
    """8 GPU using keras fit/compile."""
    self._setup()
304
305
306
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
307
    FLAGS.eval_batch_size = 160000
308
309
310
311
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
Toby Boyd's avatar
Toby Boyd committed
312
    self._run_and_report_benchmark_mlperf_like()
313

314
315
  def benchmark_8_gpu_force_v1_path_mlperf_like(self):
    """8 GPU using keras fit/compile v1 codepath."""
316
317
318
319
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
320
    FLAGS.eval_batch_size = 160000
321
322
323
324
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
325
    FLAGS.force_v2_in_keras_compile = False
326
    self._run_and_report_benchmark_mlperf_like()
327

328
329
330
331
332
333
334
  def benchmark_8_gpu_ctl_mlperf_like(self):
    """8 GPU using CTL."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
335
    FLAGS.eval_batch_size = 160000
336
337
338
339
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
Toby Boyd's avatar
Toby Boyd committed
340
    self._run_and_report_benchmark_mlperf_like()
341
342


343
class NCFKerasSynth(NCFKerasBenchmarkBase):
344
345
346
347
348
349
350
351
352
353
  """Benchmark NCF model using synthetic data."""

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
354
355
    default_flags['train_epochs'] = 8
    default_flags['batch_size'] = 99000
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
356
    default_flags['eval_batch_size'] = 160000
357
358
359
360
361
362
363
364
365
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
    default_flags['use_synthetic_data'] = True

366
    super(NCFKerasSynth, self).__init__(
367
368
369
370
371
372
373
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()
374
375
376
377
378

  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()