"src/lib/apis/modelfiles/index.ts" did not exist on "422159477809730c85fadd06ef9dd3cefb3deb32"
ncf_keras_benchmark.py 12 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time

from absl import flags
from absl.testing import flagsaver
Nimit Nigania's avatar
Nimit Nigania committed
26
27
28
29

os.environ["TF_CPP_VMODULE"] = "meta_optimizer=2"
os.environ["TF_CPP_MIN_VLOG_LEVEL"] = "1"
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "0"
30
31
32
33
34
35
36
import tensorflow as tf  # pylint: disable=g-bad-import-order

from official.recommendation import ncf_common
from official.recommendation import ncf_keras_main
from official.utils.flags import core

FLAGS = flags.FLAGS
Toby Boyd's avatar
Toby Boyd committed
37
38
NCF_DATA_DIR_NAME = 'movielens_data'

39

40
class NCFKerasBenchmarkBase(tf.test.Benchmark):
41
42
43
44
45
46
47
48
49
50
51
52
53
  """Base class for NCF model benchmark."""
  local_flags = None

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):
    self.output_dir = output_dir
    self.default_flags = default_flags or {}

  def _setup(self):
    """Sets up and resets flags before each test."""
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
54
    if NCFKerasBenchmarkBase.local_flags is None:
Toby Boyd's avatar
Toby Boyd committed
55
      ncf_common.define_ncf_flags()
56
57
58
59
      # Loads flags to get defaults to then override. List cannot be empty.
      flags.FLAGS(['foo'])
      core.set_defaults(**self.default_flags)
      saved_flag_values = flagsaver.save_flag_values()
60
      NCFKerasBenchmarkBase.local_flags = saved_flag_values
61
    else:
62
      flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags)
63

Toby Boyd's avatar
Toby Boyd committed
64
  def _run_and_report_benchmark(self, hr_at_10_min=0, hr_at_10_max=0):
65
66
67
68
    start_time_sec = time.time()
    stats = ncf_keras_main.run_ncf(FLAGS)
    wall_time_sec = time.time() - start_time_sec

Toby Boyd's avatar
Toby Boyd committed
69
70
71
    metrics = []
    metrics.append({'name': 'exp_per_second',
                    'value': stats['avg_exp_per_second']})
72

Toby Boyd's avatar
Toby Boyd committed
73
74
75
76
77
78
79
80
81
82
    if hr_at_10_min > 0:
      metrics.append({'name': 'hr_at_10',
                      'value': stats['eval_hit_rate'],
                      'min_value': hr_at_10_min,
                      'max_value': hr_at_10_max})

      metrics.append({'name': 'train_loss',
                      'value': stats['loss']})

    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
83
84


85
class NCFKerasAccuracy(NCFKerasBenchmarkBase):
86
87
88
89
  """Benchmark NCF model using real data."""

  def __init__(self,
               output_dir=None,
Toby Boyd's avatar
Toby Boyd committed
90
               root_data_dir=None,
91
92
93
94
95
96
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
97
    default_flags['train_epochs'] = 10
98
    default_flags['clean'] = True
99
    default_flags['batch_size'] = 99000
100
101
102
103
104
105
106
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
107
    default_flags['ml_perf'] = True
108
    default_flags['use_synthetic_data'] = False
Toby Boyd's avatar
Toby Boyd committed
109
    default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
110

111
    super(NCFKerasAccuracy, self).__init__(
112
113
114
115
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

Toby Boyd's avatar
Toby Boyd committed
116
117
  def _run_and_report_benchmark_mlperf_like(self):
    """Run test and report results.
Toby Boyd's avatar
Toby Boyd committed
118

Toby Boyd's avatar
Toby Boyd committed
119
120
121
    Note: MLPerf like tests are not tuned to hit a specific hr@10 value, but
    we want it recorded.
    """
122
    self._run_and_report_benchmark(hr_at_10_min=0.61)
Toby Boyd's avatar
Toby Boyd committed
123

Toby Boyd's avatar
Toby Boyd committed
124
125
  def _run_and_report_benchmark(self, hr_at_10_min=0.630, hr_at_10_max=0.640):
    """Run test and report results.
Toby Boyd's avatar
Toby Boyd committed
126

Toby Boyd's avatar
Toby Boyd committed
127
128
129
130
131
132
133
134
    Note: Target is 0.635, but some runs are below that level. Until we have
    multi-run tests, we have to accept a lower target.

    Args:
      hr_at_10_min: Minimum acceptable hr@10 value.
      hr_at_10_max: Maximum acceptable hr@10 value.
    """
    super(NCFKerasAccuracy, self)._run_and_report_benchmark(
135
136
        hr_at_10_min=hr_at_10_min,
        hr_at_10_max=hr_at_10_max)
137

138
  def benchmark_1_gpu_early_stop(self):
139
    self._setup()
140
    FLAGS.early_stopping = True
141
142
    self._run_and_report_benchmark()

143
  def benchmark_1_gpu_force_v1_path_early_stop(self):
144
145
    self._setup()
    FLAGS.early_stopping = True
146
    FLAGS.force_v2_in_keras_compile = False
147
148
    self._run_and_report_benchmark()

149
150
151
152
153
154
  def benchmark_1_gpu_no_dist_strat_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

155
  def benchmark_1_gpu_no_dist_strat_force_v1_path_early_stop(self):
156
157
158
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
159
    FLAGS.force_v2_in_keras_compile = False
160
161
    self._run_and_report_benchmark()

162
163
164
165
166
167
168
169
170
171
172
173
174
  def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    FLAGS.run_eagerly = True
    self._run_and_report_benchmark()

  def benchmark_xla_1_gpu_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

175
  def benchmark_xla_1_gpu_force_v1_path_early_stop(self):
176
177
178
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
179
    FLAGS.force_v2_in_keras_compile = False
180
181
    self._run_and_report_benchmark()

182
183
184
185
186
187
  def benchmark_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

188
189
190
191
192
193
194
  def benchmark_1_gpu_ctl_run_eagerly_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.run_eagerly = True
    self._run_and_report_benchmark()

195
196
197
198
199
200
201
  def benchmark_xla_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

202
203
204
205
206
  def benchmark_2_gpus_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()
207

208
  def benchmark_2_gpus_ctl_early_stop(self):
209
    """NCF with custom training loop. Works only in TF 2.0."""
210
211
212
213
214
215
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

216
#############################################
217
# Tests below with mlperf in the test name are of two types:
218
219
220
221
222
223
224
#  1) 1 GPU tests are based on MLPerf 0.5 and the TensorFlow pulled submission.
#  2) 8 GPU tests are based on MLPerf 0.5 and use NVIDIA's hyper parameters.
#
# The purpose of both is to get a number to compare to existing results. To do
# this the number of epochs is held constant rather than a race to a given
# accuracy. The accuracy validation is done by the "early_stop" tests.
#############################################
225
226

  def benchmark_1_gpu_mlperf_like(self):
227
    """1 GPU using keras fit/compile."""
228
229
    self._setup()
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
230
    self._run_and_report_benchmark_mlperf_like()
231

232
  def benchmark_1_gpu_no_dist_strat_force_v1_path_mlperf_like(self):
233
234
235
236
    """1 GPU using compile/fit without dist_strat."""
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
237
    FLAGS.force_v2_in_keras_compile = False
238
239
    self._run_and_report_benchmark()

240
  def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
241
    """1 GPU using compile/fit without dist_strat."""
242
243
244
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
Toby Boyd's avatar
Toby Boyd committed
245
    self._run_and_report_benchmark_mlperf_like()
246
247
248
249
250
251

  def benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like(self):
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
    FLAGS.run_eagerly = True
Toby Boyd's avatar
Toby Boyd committed
252
    self._run_and_report_benchmark_mlperf_like()
253
254

  def benchmark_xla_1_gpu_mlperf_like(self):
255
    """1 GPU using compile/fit with XLA."""
256
257
    self._setup()
    FLAGS.train_epochs = 7
258
    FLAGS.enable_xla = True
Toby Boyd's avatar
Toby Boyd committed
259
    self._run_and_report_benchmark_mlperf_like()
260

261
262
263
264
265
  def benchmark_1_gpu_ctl_mlperf_like(self):
    """1 GPU using CTL."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
266
    self._run_and_report_benchmark_mlperf_like()
267

268
269
270
271
272
273
274
275
  def benchmark_1_gpu_ctl_run_eagerly_mlperf_like(self):
    """1 GPU using CTL with eager and distribution strategy."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.run_eagerly = True
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark()

276
277
  def benchmark_xla_1_gpu_ctl_mlperf_like(self):
    """1 GPU using CTL with XLA."""
278
279
    self._setup()
    FLAGS.keras_use_ctl = True
280
281
    FLAGS.enable_xla = True
    FLAGS.train_epochs = 7
Toby Boyd's avatar
Toby Boyd committed
282
    self._run_and_report_benchmark_mlperf_like()
283
284
285
286

  def benchmark_8_gpu_mlperf_like(self):
    """8 GPU using keras fit/compile."""
    self._setup()
287
288
289
290
291
292
293
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
Toby Boyd's avatar
Toby Boyd committed
294
    self._run_and_report_benchmark_mlperf_like()
295

296
297
  def benchmark_8_gpu_force_v1_path_mlperf_like(self):
    """8 GPU using keras fit/compile v1 codepath."""
298
299
300
301
302
303
304
305
    self._setup()
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
306
    FLAGS.force_v2_in_keras_compile = False
307
    self._run_and_report_benchmark_mlperf_like()
308

309
310
  def benchmark_xla_8_gpu_mlperf_like(self):
    """8 GPU using keras fit/compile with XLA."""
nnigania's avatar
nnigania committed
311
312
    self._setup()
    FLAGS.num_gpus = 8
313
    FLAGS.enable_xla = True
nnigania's avatar
nnigania committed
314
315
316
317
318
319
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
Toby Boyd's avatar
Toby Boyd committed
320
    self._run_and_report_benchmark_mlperf_like()
nnigania's avatar
nnigania committed
321

322
323
324
325
326
327
328
329
330
331
332
  def benchmark_8_gpu_ctl_mlperf_like(self):
    """8 GPU using CTL."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
Toby Boyd's avatar
Toby Boyd committed
333
    self._run_and_report_benchmark_mlperf_like()
334
335
336
337
338
339
340
341
342
343
344
345
346

  def benchmark_xla_8_gpu_ctl_mlperf_like(self):
    """8 GPU using CTL with XLA."""
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.enable_xla = True
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
Toby Boyd's avatar
Toby Boyd committed
347
    self._run_and_report_benchmark_mlperf_like()
348
349


350
class NCFKerasSynth(NCFKerasBenchmarkBase):
351
352
353
354
355
356
357
358
359
360
  """Benchmark NCF model using synthetic data."""

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
361
362
    default_flags['train_epochs'] = 8
    default_flags['batch_size'] = 99000
363
364
365
366
367
368
369
370
371
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
    default_flags['use_synthetic_data'] = True

372
    super(NCFKerasSynth, self).__init__(
373
374
375
376
377
378
379
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()
380
381
382
383
384

  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()