ncf_keras_benchmark.py 7.92 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time

from absl import flags
from absl.testing import flagsaver
import tensorflow as tf  # pylint: disable=g-bad-import-order

from official.recommendation import ncf_common
from official.recommendation import ncf_keras_main
from official.utils.flags import core

FLAGS = flags.FLAGS
Toby Boyd's avatar
Toby Boyd committed
33
34
NCF_DATA_DIR_NAME = 'movielens_data'

35

36
class NCFKerasBenchmarkBase(tf.test.Benchmark):
37
38
39
40
41
42
43
44
45
46
47
48
49
  """Base class for NCF model benchmark."""
  local_flags = None

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):
    self.output_dir = output_dir
    self.default_flags = default_flags or {}

  def _setup(self):
    """Sets up and resets flags before each test."""
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
50
    if NCFKerasBenchmarkBase.local_flags is None:
Toby Boyd's avatar
Toby Boyd committed
51
      ncf_common.define_ncf_flags()
52
53
54
55
      # Loads flags to get defaults to then override. List cannot be empty.
      flags.FLAGS(['foo'])
      core.set_defaults(**self.default_flags)
      saved_flag_values = flagsaver.save_flag_values()
56
      NCFKerasBenchmarkBase.local_flags = saved_flag_values
57
    else:
58
      flagsaver.restore_flag_values(NCFKerasBenchmarkBase.local_flags)
59
60
61
62
63
64

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = ncf_keras_main.run_ncf(FLAGS)
    wall_time_sec = time.time() - start_time_sec

Toby Boyd's avatar
Toby Boyd committed
65
66
    metrics = self._extract_benchmark_report_extras(stats)
    self.report_benchmark(iters=-1, wall_time=wall_time_sec, metrics=metrics)
67
68

  def _extract_benchmark_report_extras(self, stats):
Toby Boyd's avatar
Toby Boyd committed
69
    raise NotImplementedError('Not implemented')
70
71


72
class NCFKerasAccuracy(NCFKerasBenchmarkBase):
73
74
75
76
  """Benchmark NCF model using real data."""

  def __init__(self,
               output_dir=None,
Toby Boyd's avatar
Toby Boyd committed
77
               root_data_dir=None,
78
79
80
81
82
83
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
84
    default_flags['train_epochs'] = 10
85
    default_flags['clean'] = True
86
    default_flags['batch_size'] = 99000
87
88
89
90
91
92
93
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
94
    default_flags['ml_perf'] = True
95
    default_flags['use_synthetic_data'] = False
Toby Boyd's avatar
Toby Boyd committed
96
    default_flags['data_dir'] = os.path.join(root_data_dir, NCF_DATA_DIR_NAME)
97

98
    super(NCFKerasAccuracy, self).__init__(
99
100
101
102
103
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def _extract_benchmark_report_extras(self, stats):
Toby Boyd's avatar
Toby Boyd committed
104
105
106
107
    metrics = []
    metrics.append({'name': 'exp_per_second',
                    'value': stats['avg_exp_per_second']})

guptapriya's avatar
guptapriya committed
108
    # Target is 0.635, but some runs are below that level. Until we have
Toby Boyd's avatar
Toby Boyd committed
109
110
111
    # multi-run tests, we have to accept a lower target.
    metrics.append({'name': 'hr_at_10',
                    'value': stats['eval_hit_rate'],
guptapriya's avatar
guptapriya committed
112
113
                    'min_value': 0.630,
                    'max_value': 0.640})
Toby Boyd's avatar
Toby Boyd committed
114
115
116
117
118

    metrics.append({'name': 'train_loss',
                    'value': stats['loss']})

    return metrics
119
120
121
122
123

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()

124
125
126
127
128
129
  def benchmark_1_gpu_no_dist_strat_early_stop(self):
    self._setup()
    FLAGS.distribution_strategy = 'off'
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

130
131
132
133
134
  def benchmark_1_gpu_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

135
136
137
138
139
140
141
142
143
144
145
146
147
  # NCF with custom training loop. Works only in TF 2.0
  def benchmark_1_gpu_ctl(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    self._run_and_report_benchmark()

  # NCF with custom training loop. Works only in TF 2.0
  def benchmark_1_gpu_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    self._run_and_report_benchmark()

148
149
150
151
152
  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

153
154
155
156
157
158
  def benchmark_2_gpus_early_stop(self):
    self._setup()
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
  # NCF with custom training loop. Works only in TF 2.0
  def benchmark_2_gpus_ctl(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

  # NCF with custom training loop. Works only in TF 2.0
  def benchmark_2_gpus_ctl_early_stop(self):
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.early_stopping = True
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
  def benchmark_1_gpu_ctl_mlperf_like(self):
    """1-GPU test to compare Google implementation with MLperf0.5.
       Using similar rules as MLPerf0.5
       Using Google's convergence hparams as base for 1-GPU test.
       Fixed the number of epochs to 7, to remove the perf variance.
       MLPerf submission consistently converges in 7 epochs.
    """
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark()

  def benchmark_1_gpu_mlperf_like(self):
    """1-GPU MLPerf like test with compile/fit version"""
    self._setup()
    FLAGS.train_epochs = 7
    self._run_and_report_benchmark()

  def benchmark_8_gpu_ctl_mlperf_like(self):
    """8 GPU test meant to compare Google implementation
       with MLperf top line submission using the
       hyper-parameters from the winning MLPerf0.5 submission.
       Using similar rules as MLPerf0.5
       Fixed epochs to MLPerf sumbmission's convergnce on 17 epochs
    """
    self._setup()
    FLAGS.keras_use_ctl = True
    FLAGS.num_gpus = 8
    FLAGS.train_epochs = 17
    FLAGS.batch_size = 1048576
    FLAGS.learning_rate = 0.0045
    FLAGS.beta1 = 0.25
    FLAGS.beta2 = 0.5
    FLAGS.epsilon = 1e-8
    self._run_and_report_benchmark()
209

210
class NCFKerasSynth(NCFKerasBenchmarkBase):
211
212
213
214
215
216
217
218
219
220
  """Benchmark NCF model using synthetic data."""

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
221
222
    default_flags['train_epochs'] = 8
    default_flags['batch_size'] = 99000
223
224
225
226
227
228
229
230
231
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
    default_flags['use_synthetic_data'] = True

232
    super(NCFKerasSynth, self).__init__(
233
234
235
236
237
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def _extract_benchmark_report_extras(self, stats):
Toby Boyd's avatar
Toby Boyd committed
238
239
240
241
    metrics = []
    metrics.append({'name': 'exp_per_second',
                    'value': stats['avg_exp_per_second']})
    return metrics
242
243
244
245

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()
246
247
248
249
250

  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()