ncf_keras_benchmark.py 11.4 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time

from absl import flags
from absl.testing import flagsaver
import tensorflow as tf  # pylint: disable=g-bad-import-order

from official.recommendation import ncf_common
from official.recommendation import ncf_keras_main
from official.utils.flags import core

FLAGS = flags.FLAGS
Toby Boyd's avatar
Toby Boyd committed
33
34
NCF_DATA_DIR_NAME = 'movielens_data'

35

36
class NCFKerasBenchmarkBase(tf.test.Benchmark):
37
38
39
40
41
42
43
44
45
46
47
48
49
  """Base class for NCF model benchmark."""
  local_flags = None

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):
    self.output_dir = output_dir
    self.default_flags = default_flags or {}

  def _setup(self):
    """Sets up and resets flags before each test."""
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
50
    if NCFKerasBenchmarkBase.local_flags is None:
Toby Boyd's avatar
Toby Boyd committed
51
      ncf_common.define_ncf_flags()
52
53
54
55
      # Loads flags to get defaults to then override. List cannot be empty.
      flags.FLAGS(['foo'])
      core.set_defaults(**self.default_flags)
      saved_flag_values = flagsaver.save_flag_values()
56
      NCFKerasBenchmarkBase.local_flags = saved_flag_values
57
    else:
58
      flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags)
59

Toby Boyd's avatar
Toby Boyd committed
60
  def _run_and_report_benchmark(self, hr_at_10_min=0, hr_at_10_max=0):
61
62
63
64
    start_time_sec = time.time()
    stats = ncf_keras_main.run_ncf(FLAGS)
    wall_time_sec = time.time() - start_time_sec

Toby Boyd's avatar
Toby Boyd committed
65
66
67
    metrics = []
    metrics.append({'name': 'exp_per_second',
                    'value': stats['avg_exp_per_second']})
68

Toby Boyd's avatar
Toby Boyd committed
69
70
71
72
73
74
75
76
77
78
    if hr_at_10_min > 0:
      metrics.append({'name': 'hr_at_10',
                      'value': stats['eval_hit_rate'],
                      'min_value': hr_at_10_min,
                      'max_value': hr_at_10_max})

      metrics.append({'name': 'train_loss',
                      'value': stats['loss']})

    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
79
80


81
class NCFKerasAccuracy(NCFKerasBenchmarkBase):
82
83
84
85
  """Benchmark NCF model using real data."""

  def __init__(self,
               output_dir=None,
Toby Boyd's avatar
Toby Boyd committed
86
               root_data_dir=None,
87
88
89
90
91
92
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
93
    default_flags['train_epochs'] = 10
94
    default_flags['clean'] = True
95
    default_flags['batch_size'] = 99000
96
97
98
99
100
101
102
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
103
    default_flags['ml_perf'] = True
104
    default_flags['use_synthetic_data'] = False
Toby Boyd's avatar
Toby Boyd committed
105
    default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
106

107
    super(NCFKerasAccuracy, self).__init__(
108
109
110
111
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

Toby Boyd's avatar
Toby Boyd committed
112
113
  def _run_and_report_benchmark_mlperf_like(self):
    """Run test and report results.
Toby Boyd's avatar
Toby Boyd committed
114

Toby Boyd's avatar
Toby Boyd committed
115
116
117
    Note: MLPerf like tests are not tuned to hit a specific hr@10 value, but
    we want it recorded.
    """
118
    self._run_and_report_benchmark(hr_at_10_min=0.61)
Toby Boyd's avatar
Toby Boyd committed
119

120
  def _run_and_report_benchmark(self, hr_at_10_min=0.630, hr_at_10_max=0.645):
Toby Boyd's avatar
Toby Boyd committed
121
    """Run test and report results.
Toby Boyd's avatar
Toby Boyd committed
122

Toby Boyd's avatar
Toby Boyd committed
123
124
125
126
127
128
129
130
    Note: Target is 0.635, but some runs are below that level. Until we have
    multi-run tests, we have to accept a lower target.

    Args:
      hr_at_10_min: Minimum acceptable hr@10 value.
      hr_at_10_max: Maximum acceptable hr@10 value.
    """
    super(NCFKerasAccuracy, self)._run_and_report_benchmark(
131
132
        hr_at_10_min=hr_at_10_min,
        hr_at_10_max=hr_at_10_max)
133

134
  def benchmark_1_gpu_early_stop(self):
135
    self._setup()
136
    FLAGS.early_stopping = True
137
138
    self._run_and_report_benchmark()

139
  def benchmark_1_gpu_force_v1_path_early_stop(self):
140
141
    self._setup()
    FLAGS.early_stopping = True
142
    FLAGS.force_v2_in_keras_compile = False
143
144
    self._run_and_report_benchmark()

145
146
147
148
149
150
  def benchmark_1_gpu_no_dist_strat_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

151
  def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
152
153
154
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
155
    FLAGS.force_v2_in_keras_compile = False
156
157
    self._run_and_report_benchmark()

158
159
160
161
162
163
164
165
166
167
168
169
170
  def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    FLAGS.run_eagerly = True
    self._run_and_report_benchmark()

  def benchmark_xla_1_gpu_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

171
  def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
172
173
174
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
175
    FLAGS.force_v2_in_keras_compile = False
176
177
    self._run_and_report_benchmark()

178
179
180
181
182
183
  def benchmark_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

184
185
186
187
188
189
190
  def benchmark_1_gpu_ctl_run_eagerly_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.run_eagerly = True
    self._run_and_report_benchmark()

191
192
193
194
195
196
197
  def benchmark_xla_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

198
199
200
201
  def benchmark_2_gpus_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
202
    FLAGS.eval_batch_size = 160000
203
    self._run_and_report_benchmark()
204

205
  def benchmark_2_gpus_ctl_early_stop(self):
206
    """NCF with custom training loop. Works only in TF 2.0."""
207
208
209
210
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
211
    FLAGS.eval_batch_size = 160000
212
213
    self._run_and_report_benchmark()

214
#############################################
215
# Tests below with mlperf in the test name are of two types:
216
217
218
219
220
221
222
#  1) 1 GPU tests are based on MLPerf 0.5 and the TensorFlow pulled submission.
#  2) 8 GPU tests are based on MLPerf 0.5 and use NVIDIA's hyper parameters.
#
# The purpose of both is to get a number to compare to existing results. To do
# this the number of epochs is held constant rather than a race to a given
# accuracy. The accuracy validation is done by the "early_stop" tests.
#############################################
223
224

  def benchmark_1_gpu_mlperf_like(self):
225
    """1 GPU using keras fit/compile."""
226
227
    self._setup()
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
228
    self._run_and_report_benchmark_mlperf_like()
229

230
  def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
231
232
233
234
    """1 GPU using compile/fit without dist_strat."""
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
235
    FLAGS.force_v2_in_keras_compile = False
236
237
    self._run_and_report_benchmark()

238
  def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
239
    """1 GPU using compile/fit without dist_strat."""
240
241
242
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
Toby Boyd's avatar
Toby Boyd committed
243
    self._run_and_report_benchmark_mlperf_like()
244
245
246
247
248
249

  def benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like(self):
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
    FLAGS.run_eagerly = True
Toby Boyd's avatar
Toby Boyd committed
250
    self._run_and_report_benchmark_mlperf_like()
251
252

  def benchmark_xla_1_gpu_mlperf_like(self):
253
    """1 GPU using compile/fit with XLA."""
254
255
    self._setup()
    FLAGS.train_epochs = 7
256
    FLAGS.enable_xla = True
Toby Boyd's avatar
Toby Boyd committed
257
    self._run_and_report_benchmark_mlperf_like()
258

259
260
261
262
263
  def benchmark_1_gpu_ctl_mlperf_like(self):
    """1 GPU using CTL."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
264
    self._run_and_report_benchmark_mlperf_like()
265

266
267
268
269
270
271
272
273
  def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
    """1 GPU using CTL with eager and distribution strategy."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.run_eagerly = True
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark()

274
275
  def benchmark_xla_1_gpu_ctl_mlperf_like(self):
    """1 GPU using CTL with XLA."""
276
277
    self._setup()
    FLAGS.keras_use_ctl = True
278
279
    FLAGS.enable_xla = True
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
280
    self._run_and_report_benchmark_mlperf_like()
281
282
283
284

  def benchmark_8_gpu_mlperf_like(self):
    """8 GPU using keras fit/compile."""
    self._setup()
285
286
287
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
288
    FLAGS.eval_batch_size = 160000
289
290
291
292
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
Toby Boyd's avatar
Toby Boyd committed
293
    self._run_and_report_benchmark_mlperf_like()
294

295
296
  def benchmark_8_gpu_force_v1_path_mlperf_like(self):
    """8 GPU using keras fit/compile v1 codepath."""
297
298
299
300
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
301
    FLAGS.eval_batch_size = 160000
302
303
304
305
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
306
    FLAGS.force_v2_in_keras_compile = False
307
    self._run_and_report_benchmark_mlperf_like()
308

309
310
311
312
313
314
315
  def benchmark_8_gpu_ctl_mlperf_like(self):
    """8 GPU using CTL."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
316
    FLAGS.eval_batch_size = 160000
317
318
319
320
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
Toby Boyd's avatar
Toby Boyd committed
321
    self._run_and_report_benchmark_mlperf_like()
322
323


324
class NCFKerasSynth(NCFKerasBenchmarkBase):
325
326
327
328
329
330
331
332
333
334
  """Benchmark NCF model using synthetic data."""

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
335
336
    default_flags['train_epochs'] = 8
    default_flags['batch_size'] = 99000
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
337
    default_flags['eval_batch_size'] = 160000
338
339
340
341
342
343
344
345
346
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
    default_flags['use_synthetic_data'] = True

347
    super(NCFKerasSynth, self).__init__(
348
349
350
351
352
353
354
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()
355
356
357
358
359

  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()