ncf_keras_main.py 19.4 KB
Newer Older
Shining Sun's avatar
Shining Sun committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""NCF framework to train and evaluate the NeuMF model.

The NeuMF model assembles both MF and MLP models under the NCF framework. Check
`neumf_model.py` for more details about the models.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

25
import json
Shining Sun's avatar
Shining Sun committed
26
27
28
import os

# pylint: disable=g-bad-import-order
David Chen's avatar
David Chen committed
29
from absl import app
Shining Sun's avatar
Shining Sun committed
30
from absl import flags
31
from absl import logging
Hongkun Yu's avatar
Hongkun Yu committed
32
import tensorflow.compat.v2 as tf
Shining Sun's avatar
Shining Sun committed
33
34
# pylint: enable=g-bad-import-order

35
from official.recommendation import constants as rconst
36
from official.recommendation import movielens
Shining Sun's avatar
Shining Sun committed
37
from official.recommendation import ncf_common
38
from official.recommendation import ncf_input_pipeline
Shining Sun's avatar
Shining Sun committed
39
from official.recommendation import neumf_model
40
from official.utils.misc import distribution_utils
41
from official.utils.misc import keras_utils
Shining Sun's avatar
Shining Sun committed
42
from official.utils.misc import model_helpers
Nimit Nigania's avatar
Nimit Nigania committed
43
from official.utils.flags import core as flags_core
Shining Sun's avatar
Shining Sun committed
44
45
46
47

FLAGS = flags.FLAGS


guptapriya's avatar
guptapriya committed
48
49
def metric_fn(logits, dup_mask, params):
  dup_mask = tf.cast(dup_mask, tf.float32)
50
  logits = tf.slice(logits, [0, 1], [-1, -1])
guptapriya's avatar
guptapriya committed
51
52
53
  in_top_k, _, metric_weights, _ = neumf_model.compute_top_k_and_ndcg(
      logits,
      dup_mask,
guptapriya's avatar
cleanup  
guptapriya committed
54
      params["match_mlperf"])
guptapriya's avatar
guptapriya committed
55
56
57
58
  metric_weights = tf.cast(metric_weights, tf.float32)
  return in_top_k, metric_weights


59
60
61
62
63
64
class MetricLayer(tf.keras.layers.Layer):
  """Custom layer of metrics for NCF model."""

  def __init__(self, params):
    super(MetricLayer, self).__init__()
    self.params = params
guptapriya's avatar
guptapriya committed
65

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
66
  def call(self, inputs, training=False):
67
    logits, dup_mask = inputs
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
68
69
70
71
72
73
74
75
76
77
78

    if training:
      hr_sum = 0.0
      hr_count = 0.0
    else:
      metric, metric_weights = metric_fn(logits, dup_mask, self.params)
      hr_sum = tf.reduce_sum(metric * metric_weights)
      hr_count = tf.reduce_sum(metric_weights)

    self.add_metric(hr_sum, name="hr_sum", aggregation="mean")
    self.add_metric(hr_count, name="hr_count", aggregation="mean")
guptapriya's avatar
guptapriya committed
79
    return logits
80
81


82
83
84
85
class LossLayer(tf.keras.layers.Layer):
  """Pass-through loss layer for NCF model."""

  def __init__(self, loss_normalization_factor):
86
87
    # The loss may overflow in float16, so we use float32 instead.
    super(LossLayer, self).__init__(dtype="float32")
88
89
90
91
92
93
94
95
96
97
98
99
100
    self.loss_normalization_factor = loss_normalization_factor
    self.loss = tf.keras.losses.SparseCategoricalCrossentropy(
        from_logits=True, reduction="sum")

  def call(self, inputs):
    logits, labels, valid_pt_mask_input = inputs
    loss = self.loss(
        y_true=labels, y_pred=logits, sample_weight=valid_pt_mask_input)
    loss = loss * (1.0 / self.loss_normalization_factor)
    self.add_loss(loss)
    return logits


Shining Sun's avatar
Shining Sun committed
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
class IncrementEpochCallback(tf.keras.callbacks.Callback):
  """A callback to increase the requested epoch for the data producer.

  The reason why we need this is because we can only buffer a limited amount of
  data. So we keep a moving window to represent the buffer. This is to move the
  one of the window's boundaries for each epoch.
  """

  def __init__(self, producer):
    self._producer = producer

  def on_epoch_begin(self, epoch, logs=None):
    self._producer.increment_request_epoch()


116
117
118
119
120
121
122
123
class CustomEarlyStopping(tf.keras.callbacks.Callback):
  """Stop training has reached a desired hit rate."""

  def __init__(self, monitor, desired_value):
    super(CustomEarlyStopping, self).__init__()

    self.monitor = monitor
    self.desired = desired_value
124
    self.stopped_epoch = 0
125
126
127
128
129
130
131
132
133

  def on_epoch_end(self, epoch, logs=None):
    current = self.get_monitor_value(logs)
    if current and current >= self.desired:
      self.stopped_epoch = epoch
      self.model.stop_training = True

  def on_train_end(self, logs=None):
    if self.stopped_epoch > 0:
Haoyu Zhang's avatar
Haoyu Zhang committed
134
      print("Epoch %05d: early stopping" % (self.stopped_epoch + 1))
135
136
137
138
139

  def get_monitor_value(self, logs):
    logs = logs or {}
    monitor_value = logs.get(self.monitor)
    if monitor_value is None:
Haoyu Zhang's avatar
Haoyu Zhang committed
140
141
142
      logging.warning("Early stopping conditioned on metric `%s` "
                      "which is not available. Available metrics are: %s",
                      self.monitor, ",".join(list(logs.keys())))
143
144
145
    return monitor_value


Shining Sun's avatar
Shining Sun committed
146
147
def _get_keras_model(params):
  """Constructs and returns the model."""
Haoyu Zhang's avatar
Haoyu Zhang committed
148
  batch_size = params["batch_size"]
Shining Sun's avatar
Shining Sun committed
149
150

  user_input = tf.keras.layers.Input(
151
      shape=(1,), name=movielens.USER_COLUMN, dtype=tf.int32)
Shining Sun's avatar
Shining Sun committed
152
153

  item_input = tf.keras.layers.Input(
154
      shape=(1,), name=movielens.ITEM_COLUMN, dtype=tf.int32)
guptapriya's avatar
guptapriya committed
155

156
  valid_pt_mask_input = tf.keras.layers.Input(
157
      shape=(1,), name=rconst.VALID_POINT_MASK, dtype=tf.bool)
158
159

  dup_mask_input = tf.keras.layers.Input(
160
      shape=(1,), name=rconst.DUPLICATE_MASK, dtype=tf.int32)
161
162

  label_input = tf.keras.layers.Input(
163
      shape=(1,), name=rconst.TRAIN_LABEL_KEY, dtype=tf.bool)
Shining Sun's avatar
Shining Sun committed
164

165
  base_model = neumf_model.construct_model(user_input, item_input, params)
Shining Sun's avatar
Shining Sun committed
166

167
  logits = base_model.output
168

Shining Sun's avatar
Shining Sun committed
169
  zeros = tf.keras.layers.Lambda(
170
      lambda x: x * 0)(logits)
Shining Sun's avatar
Shining Sun committed
171
172

  softmax_logits = tf.keras.layers.concatenate(
173
      [zeros, logits],
Shining Sun's avatar
Shining Sun committed
174
175
      axis=-1)

176
177
  # Custom training loop calculates loss and metric as a part of
  # training/evaluation step function.
178
179
  if not params["keras_use_ctl"]:
    softmax_logits = MetricLayer(params)([softmax_logits, dup_mask_input])
180
181
182
183
    # TODO(b/134744680): Use model.add_loss() instead once the API is well
    # supported.
    softmax_logits = LossLayer(batch_size)(
        [softmax_logits, label_input, valid_pt_mask_input])
184

Shining Sun's avatar
Shining Sun committed
185
  keras_model = tf.keras.Model(
guptapriya's avatar
guptapriya committed
186
187
188
189
190
191
      inputs={
          movielens.USER_COLUMN: user_input,
          movielens.ITEM_COLUMN: item_input,
          rconst.VALID_POINT_MASK: valid_pt_mask_input,
          rconst.DUPLICATE_MASK: dup_mask_input,
          rconst.TRAIN_LABEL_KEY: label_input},
Shining Sun's avatar
Shining Sun committed
192
193
194
195
196
197
198
      outputs=softmax_logits)

  keras_model.summary()
  return keras_model


def run_ncf(_):
199
200
  """Run NCF training and eval with Keras."""

201
202
  keras_utils.set_session_config(enable_xla=FLAGS.enable_xla)

guptapriya's avatar
guptapriya committed
203
204
205
  if FLAGS.seed is not None:
    print("Setting tf seed")
    tf.random.set_seed(FLAGS.seed)
206

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
207
  model_helpers.apply_clean(FLAGS)
Shining Sun's avatar
Shining Sun committed
208

209
210
211
212
213
214
  if FLAGS.dtype == "fp16" and FLAGS.fp16_implementation == "keras":
    policy = tf.keras.mixed_precision.experimental.Policy(
        "mixed_float16",
        loss_scale=flags_core.get_loss_scale(FLAGS, default_for_fp16="dynamic"))
    tf.keras.mixed_precision.experimental.set_policy(policy)

215
216
  strategy = distribution_utils.get_distribution_strategy(
      distribution_strategy=FLAGS.distribution_strategy,
217
218
      num_gpus=FLAGS.num_gpus,
      tpu_address=FLAGS.tpu)
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
219
220

  params = ncf_common.parse_flags(FLAGS)
221
222
  params["distribute_strategy"] = strategy

223
  if not keras_utils.is_v2_0() and strategy is not None:
224
225
    logging.error("NCF Keras only works with distribution strategy in TF 2.0")
    return
guptapriya's avatar
guptapriya committed
226
  if (params["keras_use_ctl"] and (
227
      not keras_utils.is_v2_0() or strategy is None)):
228
    logging.error(
guptapriya's avatar
guptapriya committed
229
        "Custom training loop only works with tensorflow 2.0 and dist strat.")
230
    return
231
232
233
  if params["use_tpu"] and not params["keras_use_ctl"]:
    logging.error("Custom training loop must be used when using TPUStrategy.")
    return
234

235
  batch_size = params["batch_size"]
236
237
238
239
240
241
242
243
  time_callback = keras_utils.TimeHistory(batch_size, FLAGS.log_steps)
  callbacks = [time_callback]

  producer, input_meta_data = None, None
  generate_input_online = params["train_dataset_path"] is None

  if generate_input_online:
    # Start data producing thread.
244
    num_users, num_items, _, _, producer = ncf_common.get_inputs(params)
245
246
247
248
249
    producer.start()
    per_epoch_callback = IncrementEpochCallback(producer)
    callbacks.append(per_epoch_callback)
  else:
    assert params["eval_dataset_path"] and params["input_meta_data_path"]
250
    with tf.io.gfile.GFile(params["input_meta_data_path"], "rb") as reader:
251
252
253
      input_meta_data = json.loads(reader.read().decode("utf-8"))
      num_users = input_meta_data["num_users"]
      num_items = input_meta_data["num_items"]
Shining Sun's avatar
Shining Sun committed
254
255

  params["num_users"], params["num_items"] = num_users, num_items
256
257
258

  if FLAGS.early_stopping:
    early_stopping_callback = CustomEarlyStopping(
guptapriya's avatar
guptapriya committed
259
        "val_HR_METRIC", desired_value=FLAGS.hr_threshold)
260
    callbacks.append(early_stopping_callback)
261

262
263
264
265
266
267
268
269
270
271
272
273
274
  (train_input_dataset, eval_input_dataset,
   num_train_steps, num_eval_steps) = \
    (ncf_input_pipeline.create_ncf_input_data(
        params, producer, input_meta_data, strategy))
  steps_per_epoch = None if generate_input_online else num_train_steps

  with distribution_utils.get_strategy_scope(strategy):
    keras_model = _get_keras_model(params)
    optimizer = tf.keras.optimizers.Adam(
        learning_rate=params["learning_rate"],
        beta_1=params["beta1"],
        beta_2=params["beta2"],
        epsilon=params["epsilon"])
275
    if FLAGS.fp16_implementation == "graph_rewrite":
276
277
      optimizer = \
        tf.compat.v1.train.experimental.enable_mixed_precision_graph_rewrite(
278
            optimizer,
279
280
            loss_scale=flags_core.get_loss_scale(FLAGS,
                                                 default_for_fp16="dynamic"))
281
282
283
284
285
286
    elif FLAGS.dtype == "fp16" and params["keras_use_ctl"]:
      # When keras_use_ctl is False, instead Model.fit() automatically applies
      # loss scaling so we don't need to create a LossScaleOptimizer.
      optimizer = tf.keras.mixed_precision.experimental.LossScaleOptimizer(
          optimizer,
          tf.keras.mixed_precision.experimental.global_policy().loss_scale)
287
288
289
290
291
292
293
294
295
296
297
298
299
300

    if params["keras_use_ctl"]:
      train_loss, eval_results = run_ncf_custom_training(
          params,
          strategy,
          keras_model,
          optimizer,
          callbacks,
          train_input_dataset,
          eval_input_dataset,
          num_train_steps,
          num_eval_steps,
          generate_input_online=generate_input_online)
    else:
301
      keras_model.compile(optimizer=optimizer, run_eagerly=FLAGS.run_eagerly)
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
302
303
304
305
306
307
308
309
310
311

      if not FLAGS.ml_perf:
        # Create Tensorboard summary and checkpoint callbacks.
        summary_dir = os.path.join(FLAGS.model_dir, "summaries")
        summary_callback = tf.keras.callbacks.TensorBoard(summary_dir)
        checkpoint_path = os.path.join(FLAGS.model_dir, "checkpoint")
        checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            checkpoint_path, save_weights_only=True)

        callbacks += [summary_callback, checkpoint_callback]
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342

      history = keras_model.fit(
          train_input_dataset,
          epochs=FLAGS.train_epochs,
          steps_per_epoch=steps_per_epoch,
          callbacks=callbacks,
          validation_data=eval_input_dataset,
          validation_steps=num_eval_steps,
          verbose=2)

      logging.info("Training done. Start evaluating")

      eval_loss_and_metrics = keras_model.evaluate(
          eval_input_dataset, steps=num_eval_steps, verbose=2)

      logging.info("Keras evaluation is done.")

      # Keras evaluate() API returns scalar loss and metric values from
      # evaluation as a list. Here, the returned list would contain
      # [evaluation loss, hr sum, hr count].
      eval_hit_rate = eval_loss_and_metrics[1] / eval_loss_and_metrics[2]

      # Format evaluation result into [eval loss, eval hit accuracy].
      eval_results = [eval_loss_and_metrics[0], eval_hit_rate]

      if history and history.history:
        train_history = history.history
        train_loss = train_history["loss"][-1]

  stats = build_stats(train_loss, eval_results, time_callback)
  return stats
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377


def run_ncf_custom_training(params,
                            strategy,
                            keras_model,
                            optimizer,
                            callbacks,
                            train_input_dataset,
                            eval_input_dataset,
                            num_train_steps,
                            num_eval_steps,
                            generate_input_online=True):
  """Runs custom training loop.

  Args:
    params: Dictionary containing training parameters.
    strategy: Distribution strategy to be used for distributed training.
    keras_model: Model used for training.
    optimizer: Optimizer used for training.
    callbacks: Callbacks to be invoked between batches/epochs.
    train_input_dataset: tf.data.Dataset used for training.
    eval_input_dataset: tf.data.Dataset used for evaluation.
    num_train_steps: Total number of steps to run for training.
    num_eval_steps: Total number of steps to run for evaluation.
    generate_input_online: Whether input data was generated by data producer.
      When data is generated by data producer, then train dataset must be
      re-initialized after every epoch.

  Returns:
    A tuple of train loss and a list of training and evaluation results.
  """
  loss_object = tf.keras.losses.SparseCategoricalCrossentropy(
      reduction="sum", from_logits=True)
  train_input_iterator = iter(
      strategy.experimental_distribute_dataset(train_input_dataset))
378

379
380
  def train_step(train_iterator):
    """Called once per step to train the model."""
381

382
383
384
385
    def step_fn(features):
      """Computes loss and applied gradient per replica."""
      with tf.GradientTape() as tape:
        softmax_logits = keras_model(features)
386
387
        # The loss can overflow in float16, so we cast to float32.
        softmax_logits = tf.cast(softmax_logits, "float32")
388
389
390
391
392
393
        labels = features[rconst.TRAIN_LABEL_KEY]
        loss = loss_object(
            labels,
            softmax_logits,
            sample_weight=features[rconst.VALID_POINT_MASK])
        loss *= (1.0 / params["batch_size"])
Nimit Nigania's avatar
Nimit Nigania committed
394
395
        if FLAGS.dtype == "fp16":
          loss = optimizer.get_scaled_loss(loss)
396
397

      grads = tape.gradient(loss, keras_model.trainable_variables)
Nimit Nigania's avatar
Nimit Nigania committed
398
399
      if FLAGS.dtype == "fp16":
        grads = optimizer.get_unscaled_gradients(grads)
400
401
402
403
404
405
      # Converting gradients to dense form helps in perf on GPU for NCF
      grads = neumf_model.sparse_to_dense_grads(
          list(zip(grads, keras_model.trainable_variables)))
      optimizer.apply_gradients(grads)
      return loss

Ken Franko's avatar
Ken Franko committed
406
    per_replica_losses = strategy.run(
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
        step_fn, args=(next(train_iterator),))
    mean_loss = strategy.reduce(
        tf.distribute.ReduceOp.SUM, per_replica_losses, axis=None)
    return mean_loss

  def eval_step(eval_iterator):
    """Called once per eval step to compute eval metrics."""

    def step_fn(features):
      """Computes eval metrics per replica."""
      softmax_logits = keras_model(features)
      in_top_k, metric_weights = metric_fn(softmax_logits,
                                           features[rconst.DUPLICATE_MASK],
                                           params)
      hr_sum = tf.reduce_sum(in_top_k * metric_weights)
      hr_count = tf.reduce_sum(metric_weights)
      return hr_sum, hr_count
424

425
    per_replica_hr_sum, per_replica_hr_count = (
Ken Franko's avatar
Ken Franko committed
426
        strategy.run(
427
428
429
430
431
432
            step_fn, args=(next(eval_iterator),)))
    hr_sum = strategy.reduce(
        tf.distribute.ReduceOp.SUM, per_replica_hr_sum, axis=None)
    hr_count = strategy.reduce(
        tf.distribute.ReduceOp.SUM, per_replica_hr_count, axis=None)
    return hr_sum, hr_count
433

434
435
436
  if not FLAGS.run_eagerly:
    train_step = tf.function(train_step)
    eval_step = tf.function(eval_step)
437

438
439
  for callback in callbacks:
    callback.on_train_begin()
440

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
441
442
443
444
445
446
447
448
449
450
  # Not writing tensorboard summaries if running in MLPerf.
  if FLAGS.ml_perf:
    eval_summary_writer, train_summary_writer = None, None
  else:
    summary_dir = os.path.join(FLAGS.model_dir, "summaries")
    eval_summary_writer = tf.summary.create_file_writer(
        os.path.join(summary_dir, "eval"))
    train_summary_writer = tf.summary.create_file_writer(
        os.path.join(summary_dir, "train"))

451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
  train_loss = 0
  for epoch in range(FLAGS.train_epochs):
    for cb in callbacks:
      cb.on_epoch_begin(epoch)

    # As NCF dataset is sampled with randomness, not repeating
    # data elements in each epoch has significant impact on
    # convergence. As so, offline-generated TF record files
    # contains all epoch worth of data. Thus we do not need
    # to initialize dataset when reading from tf record files.
    if generate_input_online:
      train_input_iterator = iter(
          strategy.experimental_distribute_dataset(train_input_dataset))

    train_loss = 0
    for step in range(num_train_steps):
      current_step = step + epoch * num_train_steps
      for c in callbacks:
        c.on_batch_begin(current_step)

      train_loss += train_step(train_input_iterator)

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
473
474
      # Write train loss once in every 1000 steps.
      if train_summary_writer and step % 1000 == 0:
A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
475
476
477
478
        with train_summary_writer.as_default():
          tf.summary.scalar("training_loss", train_loss/(step + 1),
                            step=current_step)

479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
      for c in callbacks:
        c.on_batch_end(current_step)

    train_loss /= num_train_steps
    logging.info("Done training epoch %s, epoch loss=%s.", epoch + 1,
                 train_loss)

    eval_input_iterator = iter(
        strategy.experimental_distribute_dataset(eval_input_dataset))
    hr_sum = 0
    hr_count = 0
    for _ in range(num_eval_steps):
      step_hr_sum, step_hr_count = eval_step(eval_input_iterator)
      hr_sum += step_hr_sum
      hr_count += step_hr_count

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
495
496
497
498
499
    logging.info("Done eval epoch %s, hit_rate=%s.", epoch + 1,
                 hr_sum / hr_count)
    if eval_summary_writer:
      with eval_summary_writer.as_default():
        tf.summary.scalar("hit_rate", hr_sum / hr_count, step=current_step)
500
501
502
503
504
505
506
507

    if (FLAGS.early_stopping and
        float(hr_sum / hr_count) > params["hr_threshold"]):
      break

  for c in callbacks:
    c.on_train_end()

A. Unique TensorFlower's avatar
A. Unique TensorFlower committed
508
509
510
511
512
513
514
  # Saving the model at the end of training.
  if not FLAGS.ml_perf:
    checkpoint = tf.train.Checkpoint(model=keras_model, optimizer=optimizer)
    checkpoint_path = os.path.join(FLAGS.model_dir, "ctl_checkpoint")
    checkpoint.save(checkpoint_path)
    logging.info("Saving model as TF checkpoint: %s", checkpoint_path)

515
  return train_loss, [None, hr_sum / hr_count]
516
517


518
def build_stats(loss, eval_result, time_callback):
519
520
  """Normalizes and returns dictionary of stats.

Haoyu Zhang's avatar
Haoyu Zhang committed
521
522
523
524
525
526
527
528
  Args:
    loss: The final loss at training time.
    eval_result: Output of the eval step. Assumes first value is eval_loss and
      second value is accuracy_top_1.
    time_callback: Time tracking callback likely used during keras.fit.

  Returns:
    Dictionary of normalized results.
529
530
  """
  stats = {}
531
  if loss:
Haoyu Zhang's avatar
Haoyu Zhang committed
532
    stats["loss"] = loss
533
534

  if eval_result:
Haoyu Zhang's avatar
Haoyu Zhang committed
535
536
    stats["eval_loss"] = eval_result[0]
    stats["eval_hit_rate"] = eval_result[1]
537
538
539

  if time_callback:
    timestamp_log = time_callback.timestamp_log
Haoyu Zhang's avatar
Haoyu Zhang committed
540
541
    stats["step_timestamp_log"] = timestamp_log
    stats["train_finish_time"] = time_callback.train_finish_time
542
    if len(timestamp_log) > 1:
Haoyu Zhang's avatar
Haoyu Zhang committed
543
      stats["avg_exp_per_second"] = (
544
545
546
547
548
          time_callback.batch_size * time_callback.log_steps *
          (len(time_callback.timestamp_log)-1) /
          (timestamp_log[-1].timestamp - timestamp_log[0].timestamp))

  return stats
Shining Sun's avatar
Shining Sun committed
549
550
551


def main(_):
552
  logging.info("Result is %s", run_ncf(FLAGS))
Shining Sun's avatar
Shining Sun committed
553
554
555
556


if __name__ == "__main__":
  ncf_common.define_ncf_flags()
David Chen's avatar
David Chen committed
557
  app.run(main)