Commit 70704b94 authored by guptapriya's avatar guptapriya
Browse files

Add custom loss and metrics to NCF compile/fit version

parent dcdc45bd
......@@ -45,53 +45,30 @@ from official.utils.misc import model_helpers
FLAGS = flags.FLAGS
def _keras_loss(y_true, y_pred):
# Here we are using the exact same loss used by the estimator
loss = tf.keras.losses.sparse_categorical_crossentropy(
y_pred=y_pred,
y_true=tf.cast(y_true, tf.int32),
from_logits=True)
return loss
def _get_metric_fn(params):
"""Get the metrix fn used by model compile."""
batch_size = params["batch_size"]
def metric_fn(y_true, y_pred):
"""Returns the in_top_k metric."""
softmax_logits = y_pred[0, :]
logits = tf.slice(softmax_logits, [0, 1], [batch_size, 1])
# The dup mask should be obtained from input data, but we did not yet find
# a good way of getting it with keras, so we set it to zeros to neglect the
# repetition correction
dup_mask = tf.zeros([batch_size, 1])
_, _, in_top_k, _, _ = (
neumf_model.compute_eval_loss_and_metrics_helper(
logits,
softmax_logits,
dup_mask,
params["num_neg"],
params["match_mlperf"],
params["use_xla_for_gpu"]))
is_training = tf.keras.backend.learning_phase()
if isinstance(is_training, int):
is_training = tf.constant(bool(is_training), dtype=tf.bool)
in_top_k = tf.cond(
is_training,
lambda: tf.zeros(shape=in_top_k.shape, dtype=in_top_k.dtype),
lambda: in_top_k)
return in_top_k
return metric_fn
def _get_train_and_eval_data(producer, params):
class MetricLayer(tf.keras.layers.Layer):
"""Custom layer of metrics for NCF model."""
def __init__(self, params):
super(MetricLayer, self).__init__()
self.params = params
def build(self, input_shape):
self.metric = tf.keras.metrics.Mean(name=rconst.HR_METRIC_NAME)
def call(self, inputs):
logits, dup_mask = inputs
dup_mask = tf.cast(dup_mask, tf.float32)
logits = tf.slice(logits, [0, 0, 1], [-1, -1, -1])
in_top_k, _, metric_weights, _ = neumf_model.compute_top_k_and_ndcg(
logits,
dup_mask,
self.params["match_mlperf"])
metric_weights = tf.cast(metric_weights, tf.float32)
self.add_metric(self.metric(in_top_k, metric_weights))
return inputs[0]
def _get_train_and_eval_data(producer, params):
"""Returns the datasets for training and evalutating."""
def preprocess_train_input(features, labels):
......@@ -104,9 +81,10 @@ def _get_train_and_eval_data(producer, params):
fit.
- The label needs to be extended to be used in the loss fn
"""
if not params["keras_use_ctl"]:
features.pop(rconst.VALID_POINT_MASK)
labels = tf.expand_dims(labels, -1)
fake_dup_mask = tf.zeros_like(features[movielens.USER_COLUMN])
features[rconst.DUPLICATE_MASK] = fake_dup_mask
features[rconst.TRAIN_LABEL_KEY] = labels
return features, labels
train_input_fn = producer.make_input_fn(is_training=True)
......@@ -125,10 +103,12 @@ def _get_train_and_eval_data(producer, params):
fit.
- The label needs to be extended to be used in the loss fn
"""
if not params["keras_use_ctl"]:
features.pop(rconst.DUPLICATE_MASK)
labels = tf.zeros_like(features[movielens.USER_COLUMN])
labels = tf.cast(tf.zeros_like(features[movielens.USER_COLUMN]), tf.bool)
labels = tf.expand_dims(labels, -1)
fake_valit_pt_mask = tf.cast(
tf.zeros_like(features[movielens.USER_COLUMN]), tf.bool)
features[rconst.VALID_POINT_MASK] = fake_valit_pt_mask
features[rconst.TRAIN_LABEL_KEY] = labels
return features, labels
eval_input_fn = producer.make_input_fn(is_training=False)
......@@ -202,6 +182,24 @@ def _get_keras_model(params):
batch_size=params["batches_per_step"],
name=movielens.ITEM_COLUMN,
dtype=tf.int32)
valid_pt_mask_input = tf.keras.layers.Input(
shape=(batch_size,),
batch_size=params["batches_per_step"],
name=rconst.VALID_POINT_MASK,
dtype=tf.bool)
dup_mask_input = tf.keras.layers.Input(
shape=(batch_size,),
batch_size=params["batches_per_step"],
name=rconst.DUPLICATE_MASK,
dtype=tf.int32)
label_input = tf.keras.layers.Input(
shape=(batch_size, 1),
batch_size=params["batches_per_step"],
name=rconst.TRAIN_LABEL_KEY,
dtype=tf.bool)
base_model = neumf_model.construct_model(
user_input, item_input, params, need_strip=True)
......@@ -219,10 +217,26 @@ def _get_keras_model(params):
[zeros, logits],
axis=-1)
softmax_logits = MetricLayer(params)([softmax_logits, dup_mask_input])
keras_model = tf.keras.Model(
inputs=[user_input, item_input],
inputs=[
user_input,
item_input,
valid_pt_mask_input,
dup_mask_input,
label_input],
outputs=softmax_logits)
loss_obj = tf.keras.losses.SparseCategoricalCrossentropy(
from_logits=True,
reduction="sum")
keras_model.add_loss(loss_obj(
y_true=label_input,
y_pred=softmax_logits,
sample_weight=valid_pt_mask_input) * 1.0 / batch_size)
keras_model.summary()
return keras_model
......@@ -269,7 +283,7 @@ def run_ncf(_):
time_callback = keras_utils.TimeHistory(batch_size, FLAGS.log_steps)
per_epoch_callback = IncrementEpochCallback(producer)
callbacks = [per_epoch_callback, time_callback]
callbacks = [per_epoch_callback] #, time_callback]
if FLAGS.early_stopping:
early_stopping_callback = CustomEarlyStopping(
......@@ -374,8 +388,6 @@ def run_ncf(_):
with distribution_utils.get_strategy_scope(strategy):
keras_model.compile(
loss=_keras_loss,
metrics=[_get_metric_fn(params)],
optimizer=optimizer,
cloning=params["clone_model_in_keras_dist_strat"])
......@@ -385,7 +397,7 @@ def run_ncf(_):
callbacks=callbacks,
validation_data=eval_input_dataset,
validation_steps=num_eval_steps,
verbose=2)
verbose=1)
logging.info("Training done. Start evaluating")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment