ncf_test.py 8.75 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests NCF."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import math

import numpy as np
import tensorflow as tf

26
27
from official.recommendation import constants as rconst
from official.recommendation import neumf_model
28
from official.recommendation import ncf_main
29
30
31
32
from official.recommendation import stat_utils


NUM_TRAIN_NEG = 4
33
34
35


class NcfTest(tf.test.TestCase):
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
  def setUp(self):
    self.top_k_old = rconst.TOP_K
    self.num_eval_negatives_old = rconst.NUM_EVAL_NEGATIVES
    rconst.NUM_EVAL_NEGATIVES = 2

  def tearDown(self):
    rconst.NUM_EVAL_NEGATIVES = self.num_eval_negatives_old
    rconst.TOP_K = self.top_k_old

  def get_hit_rate_and_ndcg(self, predicted_scores_by_user, items_by_user,
                            top_k=rconst.TOP_K, match_mlperf=False):
    rconst.TOP_K = top_k
    rconst.NUM_EVAL_NEGATIVES = predicted_scores_by_user.shape[1] - 1

    g = tf.Graph()
    with g.as_default():
      logits = tf.convert_to_tensor(
          predicted_scores_by_user.reshape((-1, 1)), tf.float32)
      softmax_logits = tf.concat([tf.zeros(logits.shape, dtype=logits.dtype),
                                  logits], axis=1)
      duplicate_mask = tf.convert_to_tensor(
          stat_utils.mask_duplicates(items_by_user, axis=1), tf.float32)

      metric_ops = neumf_model.compute_eval_loss_and_metrics(
          logits=logits, softmax_logits=softmax_logits,
          duplicate_mask=duplicate_mask, num_training_neg=NUM_TRAIN_NEG,
          match_mlperf=match_mlperf).eval_metric_ops

      hr = metric_ops[rconst.HR_KEY]
      ndcg = metric_ops[rconst.NDCG_KEY]

      init = [tf.global_variables_initializer(),
              tf.local_variables_initializer()]

    with self.test_session(graph=g) as sess:
      sess.run(init)
      return sess.run([hr[1], ndcg[1]])



76
77
78
79
80
81
82
83
84
85
86
87
88
89
  def test_hit_rate_and_ndcg(self):
    # Test with no duplicate items
    predictions = np.array([
        [1., 2., 0.],  # In top 2
        [2., 1., 0.],  # In top 1
        [0., 2., 1.],  # In top 3
        [2., 3., 4.]   # In top 3
    ])
    items = np.array([
        [1, 2, 3],
        [2, 3, 1],
        [3, 2, 1],
        [2, 1, 3],
    ])
90
91

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1)
92
93
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
94
95

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2)
96
97
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
98
99

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3)
100
101
102
103
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(4)) / 4)

104
105
    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1,
                                          match_mlperf=True)
106
107
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
108
109
110

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2,
                                          match_mlperf=True)
111
112
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
113
114
115

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3,
                                          match_mlperf=True)
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(4)) / 4)

    # Test with duplicate items. In the MLPerf case, we treat the duplicates as
    # a single item. Otherwise, we treat the duplicates as separate items.
    predictions = np.array([
        [1., 2., 2., 3.],  # In top 4. MLPerf: In top 3
        [3., 1., 0., 2.],  # In top 1. MLPerf: In top 1
        [0., 2., 3., 2.],  # In top 4. MLPerf: In top 3
        [3., 2., 4., 2.]   # In top 2. MLPerf: In top 2
    ])
    items = np.array([
        [1, 2, 2, 3],
        [1, 2, 3, 4],
        [1, 2, 3, 2],
        [4, 3, 2, 1],
    ])
134
    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1)
135
136
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
137
138

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2)
139
140
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
141
142

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3)
143
144
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
145
146

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 4)
147
148
149
150
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(5)) / 4)

151
152
    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1,
                                          match_mlperf=True)
153
154
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
155
156
157

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2,
                                          match_mlperf=True)
158
159
    self.assertAlmostEqual(hr, 2 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
160
161
162

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3,
                                          match_mlperf=True)
163
164
165
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(4)) / 4)
166
167
168

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 4,
                                          match_mlperf=True)
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
                                  2 * math.log(2) / math.log(4)) / 4)

    # Test with duplicate items, where the predictions for the same item can
    # differ. In the MLPerf case, we should take the first prediction.
    predictions = np.array([
        [3., 2., 4., 4.],  # In top 3. MLPerf: In top 2
        [3., 4., 2., 4.],  # In top 3. MLPerf: In top 3
        [2., 3., 4., 1.],  # In top 3. MLPerf: In top 2
        [4., 3., 5., 2.]   # In top 2. MLPerf: In top 1
    ])
    items = np.array([
        [1, 2, 2, 3],
        [4, 3, 3, 2],
        [2, 1, 1, 1],
        [4, 2, 2, 1],
    ])
187
    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1)
188
189
    self.assertAlmostEqual(hr, 0 / 4)
    self.assertAlmostEqual(ndcg, 0 / 4)
190
191

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2)
192
193
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, (math.log(2) / math.log(3)) / 4)
194
195

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3)
196
197
198
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (math.log(2) / math.log(3) +
                                  3 * math.log(2) / math.log(4)) / 4)
199
200

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 4)
201
202
203
204
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (math.log(2) / math.log(3) +
                                  3 * math.log(2) / math.log(4)) / 4)

205
206
    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1,
                                          match_mlperf=True)
207
208
    self.assertAlmostEqual(hr, 1 / 4)
    self.assertAlmostEqual(ndcg, 1 / 4)
209
210
211

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2,
                                          match_mlperf=True)
212
213
    self.assertAlmostEqual(hr, 3 / 4)
    self.assertAlmostEqual(ndcg, (1 + 2 * math.log(2) / math.log(3)) / 4)
214
215
216

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3,
                                          match_mlperf=True)
217
218
219
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + 2 * math.log(2) / math.log(3) +
                                  math.log(2) / math.log(4)) / 4)
220
221
222

    hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 4,
                                          match_mlperf=True)
223
224
225
226
227
228
229
230
    self.assertAlmostEqual(hr, 4 / 4)
    self.assertAlmostEqual(ndcg, (1 + 2 * math.log(2) / math.log(3) +
                                  math.log(2) / math.log(4)) / 4)


if __name__ == "__main__":
  tf.logging.set_verbosity(tf.logging.INFO)
  tf.test.main()