ncf_test.py 10 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests NCF."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math
22
import unittest
23

24
import mock
25
26
27
import numpy as np
import tensorflow as tf

28
from official.recommendation import constants as rconst
29
from official.recommendation import data_pipeline
30
from official.recommendation import neumf_model
Shining Sun's avatar
Shining Sun committed
31
32
from official.recommendation import ncf_common
from official.recommendation import ncf_estimator_main
33
from official.recommendation import ncf_keras_main
34
from official.utils.misc import keras_utils
35
from official.utils.testing import integration
36

37
from tensorflow.python.eager import context # pylint: disable=ungrouped-imports
38
39
40


NUM_TRAIN_NEG = 4
41
42
43


class NcfTest(tf.test.TestCase):
Reed's avatar
Reed committed
44
45
46
47

  @classmethod
  def setUpClass(cls):  # pylint: disable=invalid-name
    super(NcfTest, cls).setUpClass()
Shining Sun's avatar
Shining Sun committed
48
    ncf_common.define_ncf_flags()
Reed's avatar
Reed committed
49

50
51
52
53
54
55
56
57
58
  def setUp(self):
    self.top_k_old = rconst.TOP_K
    self.num_eval_negatives_old = rconst.NUM_EVAL_NEGATIVES
    rconst.NUM_EVAL_NEGATIVES = 2

  def tearDown(self):
    rconst.NUM_EVAL_NEGATIVES = self.num_eval_negatives_old
    rconst.TOP_K = self.top_k_old

59
  @unittest.skipIf(keras_utils.is_v2_0(), "TODO(b/136018594)")
60
61
62
63
  def get_hit_rate_and_ndcg(self, predicted_scores_by_user, items_by_user,
                            top_k=rconst.TOP_K, match_mlperf=False):
    rconst.TOP_K = top_k
    rconst.NUM_EVAL_NEGATIVES = predicted_scores_by_user.shape[1] - 1
64
65
66
67
68
69
70
    batch_size = items_by_user.shape[0]

    users = np.repeat(np.arange(batch_size)[:, np.newaxis],
                      rconst.NUM_EVAL_NEGATIVES + 1, axis=1)
    users, items, duplicate_mask = \
      data_pipeline.BaseDataConstructor._assemble_eval_batch(
          users, items_by_user[:, -1:], items_by_user[:, :-1], batch_size)
71
72
73
74
75
76
77

    g = tf.Graph()
    with g.as_default():
      logits = tf.convert_to_tensor(
          predicted_scores_by_user.reshape((-1, 1)), tf.float32)
      softmax_logits = tf.concat([tf.zeros(logits.shape, dtype=logits.dtype),
                                  logits], axis=1)
78
      duplicate_mask = tf.convert_to_tensor(duplicate_mask, tf.float32)
79

Shining Sun's avatar
Shining Sun committed
80
      metric_ops = neumf_model._get_estimator_spec_with_metrics(
81
82
83
84
85
86
87
          logits=logits, softmax_logits=softmax_logits,
          duplicate_mask=duplicate_mask, num_training_neg=NUM_TRAIN_NEG,
          match_mlperf=match_mlperf).eval_metric_ops

      hr = metric_ops[rconst.HR_KEY]
      ndcg = metric_ops[rconst.NDCG_KEY]

88
89
      init = [tf.compat.v1.global_variables_initializer(),
              tf.compat.v1.local_variables_initializer()]
90

91
    with self.session(graph=g) as sess:
92
93
94
      sess.run(init)
      return sess.run([hr[1], ndcg[1]])

95
96
97
  def test_hit_rate_and_ndcg(self):
    # Test with no duplicate items
    predictions = np.array([
98
99
100
101
        [2., 0., 1.],  # In top 2
        [1., 0., 2.],  # In top 1
        [2., 1., 0.],  # In top 3
        [3., 4., 2.]   # In top 3
102
103
104
    ])
    items = np.array([
        [2, 3, 1],
105
        [3, 1, 2],
106
        [2, 1, 3],
107
        [1, 3, 2],
108
    ])
109
110

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1)
111
112
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
113
114

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2)
115
116
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
117
118

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3)
119
120
121
122
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(4)) / 4)

123
124
    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1,
                                          match_mlperf=True)
125
126
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
127
128
129

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2,
                                          match_mlperf=True)
130
131
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
132
133
134

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3,
                                          match_mlperf=True)
135
136
137
138
139
140
141
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(4)) / 4)

    # Test with duplicate items. In the MLPerf case, we treat the duplicates as
    # a single item. Otherwise, we treat the duplicates as separate items.
    predictions = np.array([
142
143
144
145
        [2., 2., 3., 1.],  # In top 4. MLPerf: In top 3
        [1., 0., 2., 3.],  # In top 1. MLPerf: In top 1
        [2., 3., 2., 0.],  # In top 4. MLPerf: In top 3
        [2., 4., 2., 3.]   # In top 2. MLPerf: In top 2
146
147
    ])
    items = np.array([
148
149
150
151
        [2, 2, 3, 1],
        [2, 3, 4, 1],
        [2, 3, 2, 1],
        [3, 2, 1, 4],
152
    ])
153
    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1)
154
155
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
156
157

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2)
158
159
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
160
161

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3)
162
163
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
164
165

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 4)
166
167
168
169
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(5)) / 4)

170
171
    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1,
                                          match_mlperf=True)
172
173
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
174
175
176

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2,
                                          match_mlperf=True)
177
178
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
179
180
181

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3,
                                          match_mlperf=True)
182
183
184
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(4)) / 4)
185
186
187

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 4,
                                          match_mlperf=True)
188
189
190
191
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(4)) / 4)

192
  _BASE_END_TO_END_FLAGS = ['-batch_size', '1024', '-train_epochs', '1']
193

194
  @unittest.skipIf(keras_utils.is_v2_0(), "TODO(b/136018594)")
195
196
197
198
199
200
  @mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
  def test_end_to_end_estimator(self):
    integration.run_synthetic(
        ncf_estimator_main.main, tmp_root=self.get_temp_dir(), max_train=None,
        extra_flags=self._BASE_END_TO_END_FLAGS)

201
  @unittest.skipIf(keras_utils.is_v2_0(), "TODO(b/136018594)")
202
203
204
205
206
  @mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
  def test_end_to_end_estimator_mlperf(self):
    integration.run_synthetic(
        ncf_estimator_main.main, tmp_root=self.get_temp_dir(), max_train=None,
        extra_flags=self._BASE_END_TO_END_FLAGS + ['-ml_perf', 'True'])
Reed's avatar
Reed committed
207

208
  @mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
209
  def test_end_to_end_keras_no_dist_strat(self):
210
211
212
    integration.run_synthetic(
        ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None,
        extra_flags=self._BASE_END_TO_END_FLAGS +
213
        ['-distribution_strategy', 'off'])
Reed's avatar
Reed committed
214

215
  @mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
guptapriya's avatar
guptapriya committed
216
  @unittest.skipUnless(keras_utils.is_v2_0(), 'TF 2.0 only test.')
217
  def test_end_to_end_keras_dist_strat(self):
218
219
    integration.run_synthetic(
        ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None,
220
221
222
        extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '0'])

  @mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
guptapriya's avatar
guptapriya committed
223
  @unittest.skipUnless(keras_utils.is_v2_0(), 'TF 2.0 only test.')
224
225
226
227
228
229
  def test_end_to_end_keras_dist_strat_ctl(self):
    flags = (self._BASE_END_TO_END_FLAGS +
             ['-num_gpus', '0'] +
             ['-keras_use_ctl', 'True'])
    integration.run_synthetic(
        ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None,
guptapriya's avatar
guptapriya committed
230
        extra_flags=flags)
Reed's avatar
Reed committed
231

232
  @mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
guptapriya's avatar
guptapriya committed
233
  @unittest.skipUnless(keras_utils.is_v2_0(), 'TF 2.0 only test.')
234
  def test_end_to_end_keras_1_gpu_dist_strat(self):
235
236
237
238
239
240
241
242
243
244
    if context.num_gpus() < 1:
      self.skipTest(
          "{} GPUs are not available for this test. {} GPUs are available".
          format(1, context.num_gpus()))

    integration.run_synthetic(
        ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None,
        extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '1'])

  @mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
guptapriya's avatar
guptapriya committed
245
  @unittest.skipUnless(keras_utils.is_v2_0(), 'TF 2.0 only test.')
246
247
248
249
250
251
252
253
254
  def test_end_to_end_keras_2_gpu(self):
    if context.num_gpus() < 2:
      self.skipTest(
          "{} GPUs are not available for this test. {} GPUs are available".
          format(2, context.num_gpus()))

    integration.run_synthetic(
        ncf_keras_main.main, tmp_root=self.get_temp_dir(), max_train=None,
        extra_flags=self._BASE_END_TO_END_FLAGS + ['-num_gpus', '2'])
255
256
257

if __name__ == "__main__":
  tf.test.main()