ncf_keras_benchmark.py 9.19 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time

from absl import flags
from absl.testing import flagsaver
import tensorflow as tf  # pylint: disable=g-bad-import-order

from official.recommendation import ncf_common
from official.recommendation import ncf_keras_main
from official.utils.flags import core

FLAGS = flags.FLAGS
Toby Boyd's avatar
Toby Boyd committed
33
34
NCF_DATA_DIR_NAME = 'movielens_data'

35

36
class NCFKerasBenchmarkBase(tf.test.Benchmark):
37
38
39
40
41
42
43
44
45
46
47
48
49
  """Base class for NCF model benchmark."""
  local_flags = None

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):
    self.output_dir = output_dir
    self.default_flags = default_flags or {}

  def _setup(self):
    """Sets up and resets flags before each test."""
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
50
    if NCFKerasBenchmarkBase.local_flags is None:
Toby Boyd's avatar
Toby Boyd committed
51
      ncf_common.define_ncf_flags()
52
53
54
55
      # Loads flags to get defaults to then override. List cannot be empty.
      flags.FLAGS(['foo'])
      core.set_defaults(**self.default_flags)
      saved_flag_values = flagsaver.save_flag_values()
56
      NCFKerasBenchmarkBase.local_flags = saved_flag_values
57
    else:
58
      flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags)
59
60
61
62
63
64

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = ncf_keras_main.run_ncf(FLAGS)
    wall_time_sec = time.time() - start_time_sec

Toby Boyd's avatar
Toby Boyd committed
65
66
    metrics = self._extract_benchmark_report_extras(stats)
    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
67
68

  def _extract_benchmark_report_extras(self, stats):
Toby Boyd's avatar
Toby Boyd committed
69
    raise NotImplementedError('Not implemented')
70
71


72
class NCFKerasAccuracy(NCFKerasBenchmarkBase):
73
74
75
76
  """Benchmark NCF model using real data."""

  def __init__(self,
               output_dir=None,
Toby Boyd's avatar
Toby Boyd committed
77
               root_data_dir=None,
78
79
80
81
82
83
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
84
    default_flags['train_epochs'] = 10
85
    default_flags['clean'] = True
86
    default_flags['batch_size'] = 99000
87
88
89
90
91
92
93
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
94
    default_flags['ml_perf'] = True
95
    default_flags['use_synthetic_data'] = False
Toby Boyd's avatar
Toby Boyd committed
96
    default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
97

98
    super(NCFKerasAccuracy, self).__init__(
99
100
101
102
103
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def _extract_benchmark_report_extras(self, stats):
Toby Boyd's avatar
Toby Boyd committed
104
105
106
107
    metrics = []
    metrics.append({'name': 'exp_per_second',
                    'value': stats['avg_exp_per_second']})

guptapriya's avatar
guptapriya committed
108
    # Target is 0.635, but some runs are below that level. Until we have
Toby Boyd's avatar
Toby Boyd committed
109
110
111
    # multi-run tests, we have to accept a lower target.
    metrics.append({'name': 'hr_at_10',
                    'value': stats['eval_hit_rate'],
guptapriya's avatar
guptapriya committed
112
113
                    'min_value': 0.630,
                    'max_value': 0.640})
Toby Boyd's avatar
Toby Boyd committed
114
115
116
117
118

    metrics.append({'name': 'train_loss',
                    'value': stats['loss']})

    return metrics
119
120
121
122
123

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()

124
125
126
127
128
129
  def benchmark_1_gpu_no_dist_strat_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

130
131
132
133
134
  def benchmark_1_gpu_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

135
136
137
138
139
140
141
142
143
144
145
146
147
  def benchmark_1_gpu_no_dist_strat_run_eagerly_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    FLAGS.run_eagerly = True
    self._run_and_report_benchmark()

  def benchmark_xla_1_gpu_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

148
149
150
151
152
153
154
155
156
157
158
159
160
  # NCF with custom training loop. Works only in TF 2.0
  def benchmark_1_gpu_ctl(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    self._run_and_report_benchmark()

  # NCF with custom training loop. Works only in TF 2.0
  def benchmark_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

161
162
163
164
165
166
167
  def benchmark_xla_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.enable_xla = True
    self._run_and_report_benchmark()

168
169
170
171
172
  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

173
174
175
176
177
178
  def benchmark_2_gpus_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

179
  def benchmark_2_gpus_ctl(self):
180
    """NCF with custom training loop. Works only in TF 2.0."""
181
182
183
184
185
186
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

  def benchmark_2_gpus_ctl_early_stop(self):
187
    """NCF with custom training loop. Works only in TF 2.0."""
188
189
190
191
192
193
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

194
  def benchmark_1_gpu_ctl_mlperf_like(self):
195
196
197
198
199
200
    """1-GPU test to compare Google implementation with MLPerf 0.5.

       Using similar rules as MLPerf 0.5
       - Using Google's convergence hparams as base for 1-GPU test.
       - Fixed the number of epochs to 7, to remove the perf variance.
       - MLPerf submission consistently converges in 7 epochs.
201
202
203
204
205
206
207
    """
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark()

  def benchmark_1_gpu_mlperf_like(self):
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
    """1-GPU MLPerf like test with compile/fit version."""
    self._setup()
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark()

  def benchmark_1_gpu_no_dist_strat_mlperf_like(self):
    """1-GPU MLPerf like test with compile/fit version without dist_strat."""
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
    self._run_and_report_benchmark()

  def benchmark_1_gpu_no_dist_strat_run_eagerly_mlperf_like(self):
    self._setup()
    FLAGS.train_epochs = 7
    FLAGS.distribution_strategy = 'off'
    FLAGS.run_eagerly = True
    self._run_and_report_benchmark()

  def benchmark_xla_1_gpu_mlperf_like(self):
    """1-GPU MLPerf like test with compile/fit version w/xla."""
229
230
    self._setup()
    FLAGS.train_epochs = 7
231
    FLAGS.enable_xla = True
232
233
234
    self._run_and_report_benchmark()

  def benchmark_8_gpu_ctl_mlperf_like(self):
235
236
237
238
239
240
    """8 GPU test meant to compare Google implementation.

       MLPerf 0.5 top line submission using the
       - hyper-parameters from the winning MLPerf0.5 submission.
       - Using similar rules as MLPerf0.5
       - Fixed epochs to MLPerf submission's convergence on 17 epochs
241
242
243
244
245
246
247
248
249
250
251
    """
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
    self._run_and_report_benchmark()
252

253
class NCFKerasSynth(NCFKerasBenchmarkBase):
254
255
256
257
258
259
260
261
262
263
  """Benchmark NCF model using synthetic data."""

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
264
265
    default_flags['train_epochs'] = 8
    default_flags['batch_size'] = 99000
266
267
268
269
270
271
272
273
274
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
    default_flags['use_synthetic_data'] = True

275
    super(NCFKerasSynth, self).__init__(
276
277
278
279
280
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def _extract_benchmark_report_extras(self, stats):
Toby Boyd's avatar
Toby Boyd committed
281
282
283
284
    metrics = []
    metrics.append({'name': 'exp_per_second',
                    'value': stats['avg_exp_per_second']})
    return metrics
285
286
287
288

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()
289
290
291
292
293

  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()