ncf_keras_benchmark.py 5.65 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import os
import time
import json

from absl import flags
from absl.testing import flagsaver
import tensorflow as tf  # pylint: disable=g-bad-import-order

from official.recommendation import ncf_common
from official.recommendation import ncf_keras_main
from official.utils.flags import core

FLAGS = flags.FLAGS

class KerasNCFBenchmarkBase(tf.test.Benchmark):
  """Base class for NCF model benchmark."""
  local_flags = None

  def __init__(self,
               output_dir=None,
               root_data_dir=None,
               default_flags=None,
               **kwargs):

    self.output_dir = output_dir
    self.default_flags = default_flags or {}
    ncf_common.define_ncf_flags()

    if root_data_dir:
      FLAGS.data_dir = os.path.join(root_data_dir, 'movielens_data')

  def _setup(self):
    """Sets up and resets flags before each test."""
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.DEBUG)
    if KerasNCFBenchmarkBase.local_flags is None:
      # Loads flags to get defaults to then override. List cannot be empty.
      flags.FLAGS(['foo'])
      core.set_defaults(**self.default_flags)
      saved_flag_values = flagsaver.save_flag_values()
      KerasNCFBenchmarkBase.local_flags = saved_flag_values
    else:
      flagsaver.restore_flag_values(KerasNCFBenchmarkBase.local_flags)

  def _run_and_report_benchmark(self):
    start_time_sec = time.time()
    stats = ncf_keras_main.run_ncf(FLAGS)
    wall_time_sec = time.time() - start_time_sec

    extras = self._extract_benchmark_report_extras(stats)
    self.report_benchmark(iters=-1, wall_time=wall_time_sec, extras=extras)

  def _extract_benchmark_report_extras(self, stats):
    raise NotImplementedError("Not implemented")


class KerasNCFRealData(KerasNCFBenchmarkBase):
  """Benchmark NCF model using real data."""

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
    default_flags['train_epochs'] = 14
88
    default_flags['clean'] = True
89
    default_flags['batch_size'] = 160000
90
91
92
93
94
95
96
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
97
    default_flags['ml_perf'] = True
98
99
100
101
102
103
104
105
106
107
    default_flags['use_synthetic_data'] = False

    super(KerasNCFRealData, self).__init__(
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def _extract_benchmark_report_extras(self, stats):
    extras = {}
    extras['train_loss'] = stats['loss']
Toby Boyd's avatar
Toby Boyd committed
108
109
    extras['hr_at_10'] = stats['eval_hit_rate']
    extras['exp_per_second'] = stats['avg_exp_per_second']
110
111
112
113
114
115
    return extras

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()

116
117
118
119
120
  def benchmark_1_gpu_no_cloning(self):
    self._setup()
    FLAGS.clone_model_in_keras_dist_strat = False
    self._run_and_report_benchmark()

121
122
123
124
125
  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()

126
127
128
129
130
131
  def benchmark_2_gpus_no_cloning(self):
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.clone_model_in_keras_dist_strat = False
    self._run_and_report_benchmark()

132
133
134
135
136
137
138
139
140
141
142
143
144

class KerasNCFSyntheticData(KerasNCFBenchmarkBase):
  """Benchmark NCF model using synthetic data."""

  def __init__(self,
               output_dir=None,
               default_flags=None,
               **kwargs):

    default_flags = {}
    default_flags['dataset'] = 'ml-20m'
    default_flags['num_gpus'] = 1
    default_flags['train_epochs'] = 14
145
    default_flags['batch_size'] = 160000
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
    default_flags['learning_rate'] = 0.00382059
    default_flags['beta1'] = 0.783529
    default_flags['beta2'] = 0.909003
    default_flags['epsilon'] = 1.45439e-07
    default_flags['layers'] = [256, 256, 128, 64]
    default_flags['num_factors'] = 64
    default_flags['hr_threshold'] = 0.635
    default_flags['use_synthetic_data'] = True

    super(KerasNCFSyntheticData, self).__init__(
        output_dir=output_dir,
        default_flags=default_flags,
        **kwargs)

  def _extract_benchmark_report_extras(self, stats):
    extras = {}
Toby Boyd's avatar
Toby Boyd committed
162
    extras['exp_per_second'] = stats['avg_exp_per_second']
163
164
165
166
167
    return extras

  def benchmark_1_gpu(self):
    self._setup()
    self._run_and_report_benchmark()
168

169
170
171
172
173
  def benchmark_1_gpu_no_cloning(self):
    self._setup()
    FLAGS.clone_model_in_keras_dist_strat = False
    self._run_and_report_benchmark()

174
175
176
177
  def benchmark_2_gpus(self):
    self._setup()
    FLAGS.num_gpus = 2
    self._run_and_report_benchmark()
178
179
180
181
182
183

  def benchmark_2_gpus_no_cloning(self):
    self._setup()
    FLAGS.num_gpus = 2
    FLAGS.clone_model_in_keras_dist_strat = False
    self._run_and_report_benchmark()