"examples/vscode:/vscode.git/clone" did not exist on "13ea9ddb069af447f3624dbcd9a1ac5a94973083"
Unverified Commit 0c0860ed authored by Reed's avatar Reed Committed by GitHub
Browse files

Add option to not use estimator. (#5623)

The option is --nouse_estimator
parent 4298c3a3
...@@ -593,49 +593,29 @@ def hash_pipeline(dataset, deterministic): ...@@ -593,49 +593,29 @@ def hash_pipeline(dataset, deterministic):
tf.logging.info(" [pipeline_hash] All batches hash: {}".format(overall_hash)) tf.logging.info(" [pipeline_hash] All batches hash: {}".format(overall_hash))
def make_input_fn(ncf_dataset, is_training): def make_input_fn(
# type: (typing.Optional[NCFDataset], bool) -> (typing.Callable, str, int) ncf_dataset, # type: typing.Optional[NCFDataset]
is_training, # type: bool
record_files=None # type: typing.Optional[tf.Tensor]
):
# type: (...) -> (typing.Callable, str, int)
"""Construct training input_fn for the current epoch.""" """Construct training input_fn for the current epoch."""
if ncf_dataset is None: if ncf_dataset is None:
return make_synthetic_input_fn(is_training) return make_synthetic_input_fn(is_training)
if not tf.gfile.Exists(ncf_dataset.cache_paths.subproc_alive): if record_files is not None:
# The generation subprocess must have been alive at some point, because we epoch_metadata = None
# earlier checked that the subproc_alive file existed. batch_count = None
raise ValueError("Generation subprocess unexpectedly died. Data will not " record_dir = None
"be available; exiting to avoid waiting forever.")
if is_training:
train_epoch_dir = ncf_dataset.cache_paths.train_epoch_dir
while not tf.gfile.Exists(train_epoch_dir):
tf.logging.info("Waiting for {} to exist.".format(train_epoch_dir))
time.sleep(1)
train_data_dirs = tf.gfile.ListDirectory(train_epoch_dir)
while not train_data_dirs:
tf.logging.info("Waiting for data folder to be created.")
time.sleep(1)
train_data_dirs = tf.gfile.ListDirectory(train_epoch_dir)
train_data_dirs.sort() # names are zfilled so that
# lexicographic sort == numeric sort
record_dir = os.path.join(train_epoch_dir, train_data_dirs[0])
template = rconst.TRAIN_RECORD_TEMPLATE
else: else:
record_dir = ncf_dataset.cache_paths.eval_data_subdir epoch_metadata, record_dir, template = get_epoch_info(is_training,
template = rconst.EVAL_RECORD_TEMPLATE ncf_dataset)
record_files = os.path.join(record_dir, template.format("*"))
ready_file = os.path.join(record_dir, rconst.READY_FILE) # This value is used to check that the batch count from the subprocess
while not tf.gfile.Exists(ready_file): # matches the batch count expected by the main thread.
tf.logging.info("Waiting for records in {} to be ready".format(record_dir)) batch_count = epoch_metadata["batch_count"]
time.sleep(1)
with tf.gfile.Open(ready_file, "r") as f:
epoch_metadata = json.load(f)
# This value is used to check that the batch count from the subprocess matches
# the batch count expected by the main thread.
batch_count = epoch_metadata["batch_count"]
def input_fn(params): def input_fn(params):
"""Generated input_fn for the given epoch.""" """Generated input_fn for the given epoch."""
...@@ -646,15 +626,13 @@ def make_input_fn(ncf_dataset, is_training): ...@@ -646,15 +626,13 @@ def make_input_fn(ncf_dataset, is_training):
# populates "batch_size" to the appropriate value. # populates "batch_size" to the appropriate value.
batch_size = params.get("eval_batch_size") or params["batch_size"] batch_size = params.get("eval_batch_size") or params["batch_size"]
if epoch_metadata["batch_size"] != batch_size: if epoch_metadata and epoch_metadata["batch_size"] != batch_size:
raise ValueError( raise ValueError(
"Records were constructed with batch size {}, but input_fn was given " "Records were constructed with batch size {}, but input_fn was given "
"a batch size of {}. This will result in a deserialization error in " "a batch size of {}. This will result in a deserialization error in "
"tf.parse_single_example." "tf.parse_single_example."
.format(epoch_metadata["batch_size"], batch_size)) .format(epoch_metadata["batch_size"], batch_size))
record_files_ds = tf.data.Dataset.list_files(record_files, shuffle=False)
record_files = tf.data.Dataset.list_files(
os.path.join(record_dir, template.format("*")), shuffle=False)
interleave = tf.contrib.data.parallel_interleave( interleave = tf.contrib.data.parallel_interleave(
tf.data.TFRecordDataset, tf.data.TFRecordDataset,
...@@ -665,7 +643,7 @@ def make_input_fn(ncf_dataset, is_training): ...@@ -665,7 +643,7 @@ def make_input_fn(ncf_dataset, is_training):
) )
deserialize = make_deserialize(params, batch_size, is_training) deserialize = make_deserialize(params, batch_size, is_training)
dataset = record_files.apply(interleave) dataset = record_files_ds.apply(interleave)
dataset = dataset.map(deserialize, num_parallel_calls=4) dataset = dataset.map(deserialize, num_parallel_calls=4)
dataset = dataset.prefetch(32) dataset = dataset.prefetch(32)
...@@ -677,6 +655,54 @@ def make_input_fn(ncf_dataset, is_training): ...@@ -677,6 +655,54 @@ def make_input_fn(ncf_dataset, is_training):
return input_fn, record_dir, batch_count return input_fn, record_dir, batch_count
def get_epoch_info(is_training, ncf_dataset):
"""Wait for the epoch input data to be ready and return various info about it.
Args:
is_training: If we should return info for a training or eval epoch.
ncf_dataset: An NCFDataset.
Returns:
epoch_metadata: A dict with epoch metadata.
record_dir: The directory with the TFRecord files storing the input data.
template: A string template of the files in `record_dir`.
`template.format('*')` is a glob that matches all the record files.
"""
if not tf.gfile.Exists(ncf_dataset.cache_paths.subproc_alive):
# The generation subprocess must have been alive at some point, because we
# earlier checked that the subproc_alive file existed.
raise ValueError("Generation subprocess unexpectedly died. Data will not "
"be available; exiting to avoid waiting forever.")
if is_training:
train_epoch_dir = ncf_dataset.cache_paths.train_epoch_dir
while not tf.gfile.Exists(train_epoch_dir):
tf.logging.info("Waiting for {} to exist.".format(train_epoch_dir))
time.sleep(1)
train_data_dirs = tf.gfile.ListDirectory(train_epoch_dir)
while not train_data_dirs:
tf.logging.info("Waiting for data folder to be created.")
time.sleep(1)
train_data_dirs = tf.gfile.ListDirectory(train_epoch_dir)
train_data_dirs.sort() # names are zfilled so that
# lexicographic sort == numeric sort
record_dir = os.path.join(train_epoch_dir, train_data_dirs[0])
template = rconst.TRAIN_RECORD_TEMPLATE
else:
record_dir = ncf_dataset.cache_paths.eval_data_subdir
template = rconst.EVAL_RECORD_TEMPLATE
ready_file = os.path.join(record_dir, rconst.READY_FILE)
while not tf.gfile.Exists(ready_file):
tf.logging.info("Waiting for records in {} to be ready".format(record_dir))
time.sleep(1)
with tf.gfile.Open(ready_file, "r") as f:
epoch_metadata = json.load(f)
return epoch_metadata, record_dir, template
def make_synthetic_input_fn(is_training): def make_synthetic_input_fn(is_training):
"""Construct training input_fn that uses synthetic data.""" """Construct training input_fn that uses synthetic data."""
def input_fn(params): def input_fn(params):
......
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains NcfModelRunner, which can train and evaluate an NCF model."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import namedtuple
import os
import time
import tensorflow as tf
from tensorflow.contrib.compiler import xla
from official.recommendation import data_preprocessing
from official.recommendation import neumf_model
class NcfModelRunner(object):
"""Creates a graph to train/evaluate an NCF model, and runs it.
This class builds both a training model and evaluation model in the graph.
The two models share variables, so that during evaluation, the trained
variables are used.
"""
# _TrainModelProperties and _EvalModelProperties store useful properties of
# the training and evaluation models, respectively.
# _SHARED_MODEL_PROPERTY_FIELDS is their shared fields.
_SHARED_MODEL_PROPERTY_FIELDS = (
# A scalar tf.string placeholder tensor, that will be fed the path to the
# directory storing the TFRecord files for the input data.
"record_files_placeholder",
# The tf.data.Iterator to iterate over the input data.
"iterator",
# A scalar float tensor representing the model loss.
"loss",
# The batch size, as a Python int.
"batch_size",
# The op to run the model. For the training model, this trains the model
# for one step. For the evaluation model, this computes the metrics and
# updates the metric variables.
"run_model_op")
_TrainModelProperties = namedtuple("_TrainModelProperties", # pylint: disable=invalid-name
_SHARED_MODEL_PROPERTY_FIELDS)
_EvalModelProperties = namedtuple( # pylint: disable=invalid-name
"_EvalModelProperties", _SHARED_MODEL_PROPERTY_FIELDS + (
# A dict from metric name to (metric, update_op) tuple.
"metrics",
# Initializes the metric variables.
"metric_initializer",))
def __init__(self, ncf_dataset, params):
with tf.Graph().as_default() as self._graph:
if params["use_xla_for_gpu"]:
# The XLA functions we use require resource variables.
tf.enable_resource_variables()
self._ncf_dataset = ncf_dataset
self._global_step = tf.train.create_global_step()
self._train_model_properties = self._build_model(params, is_training=True)
self._eval_model_properties = self._build_model(params, is_training=False)
initializer = tf.global_variables_initializer()
self._graph.finalize()
self._session = tf.Session(graph=self._graph)
self._session.run(initializer)
def _build_model(self, params, is_training):
"""Builds the NCF model.
Args:
params: A dict of hyperparameters.
is_training: If True, build the training model. If False, build the
evaluation model.
Returns:
A _TrainModelProperties if is_training is True, or an _EvalModelProperties
otherwise.
"""
record_files_placeholder = tf.placeholder(tf.string, ())
input_fn, _, _ = \
data_preprocessing.make_input_fn(
ncf_dataset=self._ncf_dataset, is_training=is_training,
record_files=record_files_placeholder)
dataset = input_fn(params)
iterator = dataset.make_initializable_iterator()
model_fn = neumf_model.neumf_model_fn
if params["use_xla_for_gpu"]:
model_fn = xla.estimator_model_fn(model_fn)
if is_training:
features, labels = iterator.get_next()
estimator_spec = model_fn(
features, labels, tf.estimator.ModeKeys.TRAIN, params)
with tf.control_dependencies([estimator_spec.train_op]):
run_model_op = self._global_step.assign_add(1)
return self._TrainModelProperties(
record_files_placeholder, iterator,
estimator_spec.loss, params["batch_size"], run_model_op)
else:
features = iterator.get_next()
estimator_spec = model_fn(
features, None, tf.estimator.ModeKeys.EVAL, params)
run_model_op = tf.group(*(update_op for _, update_op in
estimator_spec.eval_metric_ops.values()))
metric_initializer = tf.variables_initializer(
tf.get_collection(tf.GraphKeys.METRIC_VARIABLES))
return self._EvalModelProperties(
record_files_placeholder, iterator, estimator_spec.loss,
params["eval_batch_size"], run_model_op,
estimator_spec.eval_metric_ops, metric_initializer)
def _train_or_eval(self, model_properties, num_steps, is_training):
"""Either trains or evaluates, depending on whether `is_training` is True.
Args:
model_properties: _TrainModelProperties or an _EvalModelProperties
containing the properties of the training or evaluation graph.
num_steps: The number of steps to train or evaluate for.
is_training: If True, run the training model. If False, run the evaluation
model.
Returns:
record_dir: The directory of TFRecords where the training/evaluation input
data was read from.
"""
if self._ncf_dataset is not None:
epoch_metadata, record_dir, template = data_preprocessing.get_epoch_info(
is_training=is_training, ncf_dataset=self._ncf_dataset)
batch_count = epoch_metadata["batch_count"]
if batch_count != num_steps:
raise ValueError(
"Step counts do not match. ({} vs. {}) The async process is "
"producing incorrect shards.".format(batch_count, num_steps))
record_files = os.path.join(record_dir, template.format("*"))
initializer_feed_dict = {
model_properties.record_files_placeholder: record_files}
del batch_count
else:
initializer_feed_dict = None
record_dir = None
self._session.run(model_properties.iterator.initializer,
initializer_feed_dict)
fetches = (model_properties.loss, model_properties.run_model_op)
mode = "Train" if is_training else "Eval"
start = None
for i in range(num_steps):
loss, _, = self._session.run(fetches)
if i % 100 == 0:
if start is None:
# Only start the timer after 100 steps so there is a warmup.
start = time.time()
start_step = i
tf.logging.info("{} Loss = {}".format(mode, loss))
end = time.time()
if start is not None:
print("{} peformance: {} examples/sec".format(
mode, (i - start_step) * model_properties.batch_size / (end - start)))
return record_dir
def train(self, num_train_steps):
"""Trains the graph for a single cycle.
Args:
num_train_steps: The number of steps per cycle to train for.
"""
record_dir = self._train_or_eval(self._train_model_properties,
num_train_steps, is_training=True)
if record_dir:
# We delete the record_dir because each cycle, new TFRecords is generated
# by the async process.
tf.gfile.DeleteRecursively(record_dir)
def eval(self, num_eval_steps):
"""Evaluates the graph on the eval data.
Args:
num_eval_steps: The number of steps to evaluate for.
Returns:
A dict of evaluation results.
"""
self._session.run(self._eval_model_properties.metric_initializer)
self._train_or_eval(self._eval_model_properties, num_eval_steps,
is_training=False)
eval_results = {
'global_step': self._session.run(self._global_step)}
for key, (val, _) in self._eval_model_properties.metrics.items():
val_ = self._session.run(val)
tf.logging.info("{} = {}".format(key, self._session.run(val)))
eval_results[key] = val_
return eval_results
...@@ -41,6 +41,7 @@ from tensorflow.contrib.compiler import xla ...@@ -41,6 +41,7 @@ from tensorflow.contrib.compiler import xla
from official.datasets import movielens from official.datasets import movielens
from official.recommendation import constants as rconst from official.recommendation import constants as rconst
from official.recommendation import data_preprocessing from official.recommendation import data_preprocessing
from official.recommendation import model_runner
from official.recommendation import neumf_model from official.recommendation import neumf_model
from official.utils.flags import core as flags_core from official.utils.flags import core as flags_core
from official.utils.logs import hooks_helper from official.utils.logs import hooks_helper
...@@ -177,30 +178,36 @@ def run_ncf(_): ...@@ -177,30 +178,36 @@ def run_ncf(_):
model_helpers.apply_clean(flags.FLAGS) model_helpers.apply_clean(flags.FLAGS)
train_estimator, eval_estimator = construct_estimator( params = {
num_gpus=num_gpus, model_dir=FLAGS.model_dir, params={ "use_seed": FLAGS.seed is not None,
"use_seed": FLAGS.seed is not None, "hash_pipeline": FLAGS.hash_pipeline,
"hash_pipeline": FLAGS.hash_pipeline, "batch_size": batch_size,
"batch_size": batch_size, "eval_batch_size": eval_batch_size,
"eval_batch_size": eval_batch_size, "learning_rate": FLAGS.learning_rate,
"learning_rate": FLAGS.learning_rate, "num_users": num_users,
"num_users": num_users, "num_items": num_items,
"num_items": num_items, "mf_dim": FLAGS.num_factors,
"mf_dim": FLAGS.num_factors, "model_layers": [int(layer) for layer in FLAGS.layers],
"model_layers": [int(layer) for layer in FLAGS.layers], "mf_regularization": FLAGS.mf_regularization,
"mf_regularization": FLAGS.mf_regularization, "mlp_reg_layers": [float(reg) for reg in FLAGS.mlp_regularization],
"mlp_reg_layers": [float(reg) for reg in FLAGS.mlp_regularization], "num_neg": FLAGS.num_neg,
"num_neg": FLAGS.num_neg, "use_tpu": FLAGS.tpu is not None,
"use_tpu": FLAGS.tpu is not None, "tpu": FLAGS.tpu,
"tpu": FLAGS.tpu, "tpu_zone": FLAGS.tpu_zone,
"tpu_zone": FLAGS.tpu_zone, "tpu_gcp_project": FLAGS.tpu_gcp_project,
"tpu_gcp_project": FLAGS.tpu_gcp_project, "beta1": FLAGS.beta1,
"beta1": FLAGS.beta1, "beta2": FLAGS.beta2,
"beta2": FLAGS.beta2, "epsilon": FLAGS.epsilon,
"epsilon": FLAGS.epsilon, "match_mlperf": FLAGS.ml_perf,
"match_mlperf": FLAGS.ml_perf, "use_xla_for_gpu": FLAGS.use_xla_for_gpu,
"use_xla_for_gpu": FLAGS.use_xla_for_gpu, "use_estimator": FLAGS.use_estimator,
}, batch_size=flags.FLAGS.batch_size, eval_batch_size=eval_batch_size) }
if FLAGS.use_estimator:
train_estimator, eval_estimator = construct_estimator(
num_gpus=num_gpus, model_dir=FLAGS.model_dir, params=params,
batch_size=flags.FLAGS.batch_size, eval_batch_size=eval_batch_size)
else:
runner = model_runner.NcfModelRunner(ncf_dataset, params)
# Create hooks that log information about the training and metric values # Create hooks that log information about the training and metric values
train_hooks = hooks_helper.get_train_hooks( train_hooks = hooks_helper.get_train_hooks(
...@@ -237,37 +244,46 @@ def run_ncf(_): ...@@ -237,37 +244,46 @@ def run_ncf(_):
value=cycle_index) value=cycle_index)
# Train the model # Train the model
train_input_fn, train_record_dir, batch_count = \ if FLAGS.use_estimator:
data_preprocessing.make_input_fn( train_input_fn, train_record_dir, batch_count = \
ncf_dataset=ncf_dataset, is_training=True) data_preprocessing.make_input_fn(
ncf_dataset=ncf_dataset, is_training=True)
if batch_count != num_train_steps:
raise ValueError( if batch_count != num_train_steps:
"Step counts do not match. ({} vs. {}) The async process is "
"producing incorrect shards.".format(batch_count, num_train_steps))
train_estimator.train(input_fn=train_input_fn, hooks=train_hooks,
steps=num_train_steps)
if train_record_dir:
tf.gfile.DeleteRecursively(train_record_dir)
tf.logging.info("Beginning evaluation.")
if pred_input_fn is None:
pred_input_fn, _, eval_batch_count = data_preprocessing.make_input_fn(
ncf_dataset=ncf_dataset, is_training=False)
if eval_batch_count != num_eval_steps:
raise ValueError( raise ValueError(
"Step counts do not match. ({} vs. {}) The async process is " "Step counts do not match. ({} vs. {}) The async process is "
"producing incorrect shards.".format( "producing incorrect shards.".format(batch_count, num_train_steps))
eval_batch_count, num_eval_steps))
train_estimator.train(input_fn=train_input_fn, hooks=train_hooks,
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_START, steps=num_train_steps)
value=cycle_index) if train_record_dir:
eval_results = eval_estimator.evaluate(pred_input_fn, steps=num_eval_steps) tf.gfile.DeleteRecursively(train_record_dir)
tf.logging.info("Beginning evaluation.")
if pred_input_fn is None:
pred_input_fn, _, eval_batch_count = data_preprocessing.make_input_fn(
ncf_dataset=ncf_dataset, is_training=False)
if eval_batch_count != num_eval_steps:
raise ValueError(
"Step counts do not match. ({} vs. {}) The async process is "
"producing incorrect shards.".format(
eval_batch_count, num_eval_steps))
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_START,
value=cycle_index)
eval_results = eval_estimator.evaluate(pred_input_fn,
steps=num_eval_steps)
tf.logging.info("Evaluation complete.")
else:
runner.train(num_train_steps)
tf.logging.info("Beginning evaluation.")
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_START,
value=cycle_index)
eval_results = runner.eval(num_eval_steps)
tf.logging.info("Evaluation complete.")
hr = float(eval_results[rconst.HR_KEY]) hr = float(eval_results[rconst.HR_KEY])
ndcg = float(eval_results[rconst.NDCG_KEY]) ndcg = float(eval_results[rconst.NDCG_KEY])
tf.logging.info("Evaluation complete.")
mlperf_helper.ncf_print( mlperf_helper.ncf_print(
key=mlperf_helper.TAGS.EVAL_TARGET, key=mlperf_helper.TAGS.EVAL_TARGET,
...@@ -472,6 +488,15 @@ def define_ncf_flags(): ...@@ -472,6 +488,15 @@ def define_ncf_flags():
def xla_validator(flag_dict): def xla_validator(flag_dict):
return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"] return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]
flags.DEFINE_bool(
name="use_estimator", default=True, help=flags_core.help_wrap(
"If True, use Estimator to train. Setting to False is slightly "
"faster, but when False, the following are currently unsupported:\n"
" * Using TPUs\n"
" * Using more than 1 GPU\n"
" * Reloading from checkpoints\n"
" * Any hooks specified with --hooks\n"))
if __name__ == "__main__": if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
......
...@@ -24,6 +24,7 @@ import mock ...@@ -24,6 +24,7 @@ import mock
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from absl import flags
from absl.testing import flagsaver from absl.testing import flagsaver
from official.recommendation import constants as rconst from official.recommendation import constants as rconst
from official.recommendation import data_preprocessing from official.recommendation import data_preprocessing
...@@ -249,6 +250,13 @@ class NcfTest(tf.test.TestCase): ...@@ -249,6 +250,13 @@ class NcfTest(tf.test.TestCase):
def test_end_to_end_mlperf(self): def test_end_to_end_mlperf(self):
ncf_main.main(None) ncf_main.main(None)
@flagsaver.flagsaver(use_estimator=False, **_BASE_END_TO_END_FLAGS)
@mock.patch.object(data_preprocessing, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
def test_end_to_end_no_estimator(self):
ncf_main.main(None)
flags.FLAGS.ml_perf = True
ncf_main.main(None)
if __name__ == "__main__": if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO) tf.logging.set_verbosity(tf.logging.INFO)
......
...@@ -78,7 +78,18 @@ def neumf_model_fn(features, labels, mode, params): ...@@ -78,7 +78,18 @@ def neumf_model_fn(features, labels, mode, params):
users = features[movielens.USER_COLUMN] users = features[movielens.USER_COLUMN]
items = tf.cast(features[movielens.ITEM_COLUMN], tf.int32) items = tf.cast(features[movielens.ITEM_COLUMN], tf.int32)
logits = construct_model(users=users, items=items, params=params) keras_model = params.get("keras_model")
if keras_model:
logits = keras_model([users, items],
training=mode == tf.estimator.ModeKeys.TRAIN)
else:
keras_model = construct_model(users=users, items=items, params=params)
logits = keras_model.output
if not params["use_estimator"] and "keras_model" not in params:
# When we are not using estimator, we need to reuse the Keras model when
# this model_fn is called again, so that the variables are shared between
# training and eval. So we mutate params to add the Keras model.
params["keras_model"] = keras_model
# Softmax with the first column of zeros is equivalent to sigmoid. # Softmax with the first column of zeros is equivalent to sigmoid.
softmax_logits = tf.concat([tf.zeros(logits.shape, dtype=logits.dtype), softmax_logits = tf.concat([tf.zeros(logits.shape, dtype=logits.dtype),
...@@ -242,10 +253,11 @@ def construct_model(users, items, params): ...@@ -242,10 +253,11 @@ def construct_model(users, items, params):
name=movielens.RATING_COLUMN)(predict_vector) name=movielens.RATING_COLUMN)(predict_vector)
# Print model topology. # Print model topology.
tf.keras.models.Model([user_input, item_input], logits).summary() model = tf.keras.models.Model([user_input, item_input], logits)
model.summary()
sys.stdout.flush() sys.stdout.flush()
return logits return model
def compute_eval_loss_and_metrics(logits, # type: tf.Tensor def compute_eval_loss_and_metrics(logits, # type: tf.Tensor
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment