Commit 441c8f40 authored by qianyj's avatar qianyj
Browse files

update TF code

parent ec90ad8e
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Asynchronous data producer for the NCF pipeline."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import atexit
import functools
import os
import sys
import tempfile
import threading
import time
import timeit
import traceback
import typing
import numpy as np
import six
from six.moves import queue
import tensorflow as tf
from tensorflow.contrib.tpu.python.tpu.datasets import StreamingFilesDataset
from official.datasets import movielens
from official.recommendation import constants as rconst
from official.recommendation import popen_helper
from official.recommendation import stat_utils
SUMMARY_TEMPLATE = """General:
{spacer}Num users: {num_users}
{spacer}Num items: {num_items}
Training:
{spacer}Positive count: {train_pos_ct}
{spacer}Batch size: {train_batch_size} {multiplier}
{spacer}Batch count per epoch: {train_batch_ct}
Eval:
{spacer}Positive count: {eval_pos_ct}
{spacer}Batch size: {eval_batch_size} {multiplier}
{spacer}Batch count per epoch: {eval_batch_ct}"""
_TRAIN_FEATURE_MAP = {
movielens.USER_COLUMN: tf.FixedLenFeature([], dtype=tf.string),
movielens.ITEM_COLUMN: tf.FixedLenFeature([], dtype=tf.string),
rconst.MASK_START_INDEX: tf.FixedLenFeature([1], dtype=tf.string),
"labels": tf.FixedLenFeature([], dtype=tf.string),
}
_EVAL_FEATURE_MAP = {
movielens.USER_COLUMN: tf.FixedLenFeature([], dtype=tf.string),
movielens.ITEM_COLUMN: tf.FixedLenFeature([], dtype=tf.string),
rconst.DUPLICATE_MASK: tf.FixedLenFeature([], dtype=tf.string)
}
class DatasetManager(object):
"""Helper class for handling TensorFlow specific data tasks.
This class takes the (relatively) framework agnostic work done by the data
constructor classes and handles the TensorFlow specific portions (TFRecord
management, tf.Dataset creation, etc.).
"""
def __init__(self, is_training, stream_files, batches_per_epoch,
shard_root=None, deterministic=False):
# type: (bool, bool, int, typing.Optional[str], bool) -> None
"""Constructs a `DatasetManager` instance.
Args:
is_training: Boolean of whether the data provided is training or
evaluation data. This determines whether to reuse the data
(if is_training=False) and the exact structure to use when storing and
yielding data.
stream_files: Boolean indicating whether data should be serialized and
written to file shards.
batches_per_epoch: The number of batches in a single epoch.
shard_root: The base directory to be used when stream_files=True.
deterministic: Forgo non-deterministic speedups. (i.e. sloppy=True)
"""
self._is_training = is_training
self._deterministic = deterministic
self._stream_files = stream_files
self._writers = []
self._write_locks = [threading.RLock() for _ in
range(rconst.NUM_FILE_SHARDS)] if stream_files else []
self._batches_per_epoch = batches_per_epoch
self._epochs_completed = 0
self._epochs_requested = 0
self._shard_root = shard_root
self._result_queue = queue.Queue()
self._result_reuse = []
@property
def current_data_root(self):
subdir = (rconst.TRAIN_FOLDER_TEMPLATE.format(self._epochs_completed)
if self._is_training else rconst.EVAL_FOLDER)
return os.path.join(self._shard_root, subdir)
def buffer_reached(self):
# Only applicable for training.
return (self._epochs_completed - self._epochs_requested >=
rconst.CYCLES_TO_BUFFER and self._is_training)
@staticmethod
def _serialize(data):
"""Convert NumPy arrays into a TFRecords entry."""
feature_dict = {
k: tf.train.Feature(bytes_list=tf.train.BytesList(
value=[memoryview(v).tobytes()])) for k, v in data.items()}
return tf.train.Example(
features=tf.train.Features(feature=feature_dict)).SerializeToString()
def _deserialize(self, serialized_data, batch_size):
"""Convert serialized TFRecords into tensors.
Args:
serialized_data: A tensor containing serialized records.
batch_size: The data arrives pre-batched, so batch size is needed to
deserialize the data.
"""
feature_map = _TRAIN_FEATURE_MAP if self._is_training else _EVAL_FEATURE_MAP
features = tf.parse_single_example(serialized_data, feature_map)
users = tf.reshape(tf.decode_raw(
features[movielens.USER_COLUMN], rconst.USER_DTYPE), (batch_size,))
items = tf.reshape(tf.decode_raw(
features[movielens.ITEM_COLUMN], rconst.ITEM_DTYPE), (batch_size,))
def decode_binary(data_bytes):
# tf.decode_raw does not support bool as a decode type. As a result it is
# necessary to decode to int8 (7 of the bits will be ignored) and then
# cast to bool.
return tf.reshape(tf.cast(tf.decode_raw(data_bytes, tf.int8), tf.bool),
(batch_size,))
if self._is_training:
mask_start_index = tf.decode_raw(
features[rconst.MASK_START_INDEX], tf.int32)[0]
valid_point_mask = tf.less(tf.range(batch_size), mask_start_index)
return {
movielens.USER_COLUMN: users,
movielens.ITEM_COLUMN: items,
rconst.VALID_POINT_MASK: valid_point_mask,
}, decode_binary(features["labels"])
return {
movielens.USER_COLUMN: users,
movielens.ITEM_COLUMN: items,
rconst.DUPLICATE_MASK: decode_binary(features[rconst.DUPLICATE_MASK]),
}
def put(self, index, data):
# type: (int, dict) -> None
"""Store data for later consumption.
Because there are several paths for storing and yielding data (queues,
lists, files) the data producer simply provides the data in a standard
format at which point the dataset manager handles storing it in the correct
form.
Args:
index: Used to select shards when writing to files.
data: A dict of the data to be stored. This method mutates data, and
therefore expects to be the only consumer.
"""
if self._stream_files:
example_bytes = self._serialize(data)
with self._write_locks[index % rconst.NUM_FILE_SHARDS]:
self._writers[index % rconst.NUM_FILE_SHARDS].write(example_bytes)
else:
if self._is_training:
mask_start_index = data.pop(rconst.MASK_START_INDEX)
batch_size = data[movielens.ITEM_COLUMN].shape[0]
data[rconst.VALID_POINT_MASK] = np.less(np.arange(batch_size),
mask_start_index)
data = (data, data.pop("labels"))
self._result_queue.put(data)
def start_construction(self):
if self._stream_files:
tf.gfile.MakeDirs(self.current_data_root)
template = os.path.join(self.current_data_root, rconst.SHARD_TEMPLATE)
self._writers = [tf.io.TFRecordWriter(template.format(i))
for i in range(rconst.NUM_FILE_SHARDS)]
def end_construction(self):
if self._stream_files:
[writer.close() for writer in self._writers]
self._writers = []
self._result_queue.put(self.current_data_root)
self._epochs_completed += 1
def data_generator(self, epochs_between_evals):
"""Yields examples during local training."""
assert not self._stream_files
assert self._is_training or epochs_between_evals == 1
if self._is_training:
for _ in range(self._batches_per_epoch * epochs_between_evals):
yield self._result_queue.get(timeout=300)
else:
if self._result_reuse:
assert len(self._result_reuse) == self._batches_per_epoch
for i in self._result_reuse:
yield i
else:
# First epoch.
for _ in range(self._batches_per_epoch * epochs_between_evals):
result = self._result_queue.get(timeout=300)
self._result_reuse.append(result)
yield result
def get_dataset(self, batch_size, epochs_between_evals):
"""Construct the dataset to be used for training and eval.
For local training, data is provided through Dataset.from_generator. For
remote training (TPUs) the data is first serialized to files and then sent
to the TPU through a StreamingFilesDataset.
Args:
batch_size: The per-device batch size of the dataset.
epochs_between_evals: How many epochs worth of data to yield.
(Generator mode only.)
"""
self._epochs_requested += 1
if self._stream_files:
if epochs_between_evals > 1:
raise ValueError("epochs_between_evals > 1 not supported for file "
"based dataset.")
epoch_data_dir = self._result_queue.get(timeout=300)
if not self._is_training:
self._result_queue.put(epoch_data_dir) # Eval data is reused.
file_pattern = os.path.join(
epoch_data_dir, rconst.SHARD_TEMPLATE.format("*"))
dataset = StreamingFilesDataset(
files=file_pattern, worker_job="worker",
num_parallel_reads=rconst.NUM_FILE_SHARDS, num_epochs=1,
sloppy=not self._deterministic)
map_fn = functools.partial(self._deserialize, batch_size=batch_size)
dataset = dataset.map(map_fn, num_parallel_calls=16)
else:
types = {movielens.USER_COLUMN: rconst.USER_DTYPE,
movielens.ITEM_COLUMN: rconst.ITEM_DTYPE}
shapes = {movielens.USER_COLUMN: tf.TensorShape([batch_size]),
movielens.ITEM_COLUMN: tf.TensorShape([batch_size])}
if self._is_training:
types[rconst.VALID_POINT_MASK] = np.bool
shapes[rconst.VALID_POINT_MASK] = tf.TensorShape([batch_size])
types = (types, np.bool)
shapes = (shapes, tf.TensorShape([batch_size]))
else:
types[rconst.DUPLICATE_MASK] = np.bool
shapes[rconst.DUPLICATE_MASK] = tf.TensorShape([batch_size])
data_generator = functools.partial(
self.data_generator, epochs_between_evals=epochs_between_evals)
dataset = tf.data.Dataset.from_generator(
generator=data_generator, output_types=types,
output_shapes=shapes)
return dataset.prefetch(16)
def make_input_fn(self, batch_size):
"""Create an input_fn which checks for batch size consistency."""
def input_fn(params):
param_batch_size = (params["batch_size"] if self._is_training else
params["eval_batch_size"])
if batch_size != param_batch_size:
raise ValueError("producer batch size ({}) differs from params batch "
"size ({})".format(batch_size, param_batch_size))
epochs_between_evals = (params.get("epochs_between_evals", 1)
if self._is_training else 1)
return self.get_dataset(batch_size=batch_size,
epochs_between_evals=epochs_between_evals)
return input_fn
class BaseDataConstructor(threading.Thread):
"""Data constructor base class.
This class manages the control flow for constructing data. It is not meant
to be used directly, but instead subclasses should implement the following
two methods:
self.construct_lookup_variables
self.lookup_negative_items
"""
def __init__(self,
maximum_number_epochs, # type: int
num_users, # type: int
num_items, # type: int
user_map, # type: dict
item_map, # type: dict
train_pos_users, # type: np.ndarray
train_pos_items, # type: np.ndarray
train_batch_size, # type: int
batches_per_train_step, # type: int
num_train_negatives, # type: int
eval_pos_users, # type: np.ndarray
eval_pos_items, # type: np.ndarray
eval_batch_size, # type: int
batches_per_eval_step, # type: int
stream_files, # type: bool
deterministic=False # type: bool
):
# General constants
self._maximum_number_epochs = maximum_number_epochs
self._num_users = num_users
self._num_items = num_items
self.user_map = user_map
self.item_map = item_map
self._train_pos_users = train_pos_users
self._train_pos_items = train_pos_items
self.train_batch_size = train_batch_size
self._num_train_negatives = num_train_negatives
self._batches_per_train_step = batches_per_train_step
self._eval_pos_users = eval_pos_users
self._eval_pos_items = eval_pos_items
self.eval_batch_size = eval_batch_size
# Training
if self._train_pos_users.shape != self._train_pos_items.shape:
raise ValueError(
"User positives ({}) is different from item positives ({})".format(
self._train_pos_users.shape, self._train_pos_items.shape))
(self._train_pos_count,) = self._train_pos_users.shape
self._elements_in_epoch = (1 + num_train_negatives) * self._train_pos_count
self.train_batches_per_epoch = self._count_batches(
self._elements_in_epoch, train_batch_size, batches_per_train_step)
# Evaluation
if eval_batch_size % (1 + rconst.NUM_EVAL_NEGATIVES):
raise ValueError("Eval batch size {} is not divisible by {}".format(
eval_batch_size, 1 + rconst.NUM_EVAL_NEGATIVES))
self._eval_users_per_batch = int(
eval_batch_size // (1 + rconst.NUM_EVAL_NEGATIVES))
self._eval_elements_in_epoch = num_users * (1 + rconst.NUM_EVAL_NEGATIVES)
self.eval_batches_per_epoch = self._count_batches(
self._eval_elements_in_epoch, eval_batch_size, batches_per_eval_step)
# Intermediate artifacts
self._current_epoch_order = np.empty(shape=(0,))
self._shuffle_iterator = None
self._shuffle_with_forkpool = not stream_files
if stream_files:
self._shard_root = tempfile.mkdtemp(prefix="ncf_")
atexit.register(tf.gfile.DeleteRecursively, dirname=self._shard_root)
else:
self._shard_root = None
self._train_dataset = DatasetManager(
True, stream_files, self.train_batches_per_epoch, self._shard_root,
deterministic)
self._eval_dataset = DatasetManager(
False, stream_files, self.eval_batches_per_epoch, self._shard_root,
deterministic)
# Threading details
super(BaseDataConstructor, self).__init__()
self.daemon = True
self._stop_loop = False
self._fatal_exception = None
self.deterministic = deterministic
def __str__(self):
multiplier = ("(x{} devices)".format(self._batches_per_train_step)
if self._batches_per_train_step > 1 else "")
summary = SUMMARY_TEMPLATE.format(
spacer=" ", num_users=self._num_users, num_items=self._num_items,
train_pos_ct=self._train_pos_count,
train_batch_size=self.train_batch_size,
train_batch_ct=self.train_batches_per_epoch,
eval_pos_ct=self._num_users, eval_batch_size=self.eval_batch_size,
eval_batch_ct=self.eval_batches_per_epoch, multiplier=multiplier)
return super(BaseDataConstructor, self).__str__() + "\n" + summary
@staticmethod
def _count_batches(example_count, batch_size, batches_per_step):
"""Determine the number of batches, rounding up to fill all devices."""
x = (example_count + batch_size - 1) // batch_size
return (x + batches_per_step - 1) // batches_per_step * batches_per_step
def stop_loop(self):
self._stop_loop = True
def construct_lookup_variables(self):
"""Perform any one time pre-compute work."""
raise NotImplementedError
def lookup_negative_items(self, **kwargs):
"""Randomly sample negative items for given users."""
raise NotImplementedError
def _run(self):
atexit.register(self.stop_loop)
self._start_shuffle_iterator()
self.construct_lookup_variables()
self._construct_training_epoch()
self._construct_eval_epoch()
for _ in range(self._maximum_number_epochs - 1):
self._construct_training_epoch()
self.stop_loop()
def run(self):
try:
self._run()
except Exception as e:
# The Thread base class swallows stack traces, so unfortunately it is
# necessary to catch and re-raise to get debug output
traceback.print_exc()
self._fatal_exception = e
sys.stderr.flush()
raise
def _start_shuffle_iterator(self):
if self._shuffle_with_forkpool:
pool = popen_helper.get_forkpool(3, closing=False)
else:
pool = popen_helper.get_threadpool(1, closing=False)
atexit.register(pool.close)
args = [(self._elements_in_epoch, stat_utils.random_int32())
for _ in range(self._maximum_number_epochs)]
imap = pool.imap if self.deterministic else pool.imap_unordered
self._shuffle_iterator = imap(stat_utils.permutation, args)
def _get_training_batch(self, i):
"""Construct a single batch of training data.
Args:
i: The index of the batch. This is used when stream_files=True to assign
data to file shards.
"""
batch_indices = self._current_epoch_order[i * self.train_batch_size:
(i + 1) * self.train_batch_size]
(mask_start_index,) = batch_indices.shape
batch_ind_mod = np.mod(batch_indices, self._train_pos_count)
users = self._train_pos_users[batch_ind_mod]
negative_indices = np.greater_equal(batch_indices, self._train_pos_count)
negative_users = users[negative_indices]
negative_items = self.lookup_negative_items(negative_users=negative_users)
items = self._train_pos_items[batch_ind_mod]
items[negative_indices] = negative_items
labels = np.logical_not(negative_indices)
# Pad last partial batch
pad_length = self.train_batch_size - mask_start_index
if pad_length:
# We pad with arange rather than zeros because the network will still
# compute logits for padded examples, and padding with zeros would create
# a very "hot" embedding key which can have performance implications.
user_pad = np.arange(pad_length, dtype=users.dtype) % self._num_users
item_pad = np.arange(pad_length, dtype=items.dtype) % self._num_items
label_pad = np.zeros(shape=(pad_length,), dtype=labels.dtype)
users = np.concatenate([users, user_pad])
items = np.concatenate([items, item_pad])
labels = np.concatenate([labels, label_pad])
self._train_dataset.put(i, {
movielens.USER_COLUMN: users,
movielens.ITEM_COLUMN: items,
rconst.MASK_START_INDEX: np.array(mask_start_index, dtype=np.int32),
"labels": labels,
})
def _wait_to_construct_train_epoch(self):
count = 0
while self._train_dataset.buffer_reached() and not self._stop_loop:
time.sleep(0.01)
count += 1
if count >= 100 and np.log10(count) == np.round(np.log10(count)):
tf.logging.info(
"Waited {} times for training data to be consumed".format(count))
def _construct_training_epoch(self):
"""Loop to construct a batch of training data."""
self._wait_to_construct_train_epoch()
start_time = timeit.default_timer()
if self._stop_loop:
return
self._train_dataset.start_construction()
map_args = list(range(self.train_batches_per_epoch))
self._current_epoch_order = next(self._shuffle_iterator)
get_pool = (popen_helper.get_fauxpool if self.deterministic else
popen_helper.get_threadpool)
with get_pool(6) as pool:
pool.map(self._get_training_batch, map_args)
self._train_dataset.end_construction()
tf.logging.info("Epoch construction complete. Time: {:.1f} seconds".format(
timeit.default_timer() - start_time))
@staticmethod
def _assemble_eval_batch(users, positive_items, negative_items,
users_per_batch):
"""Construct duplicate_mask and structure data accordingly.
The positive items should be last so that they lose ties. However, they
should not be masked out if the true eval positive happens to be
selected as a negative. So instead, the positive is placed in the first
position, and then switched with the last element after the duplicate
mask has been computed.
Args:
users: An array of users in a batch. (should be identical along axis 1)
positive_items: An array (batch_size x 1) of positive item indices.
negative_items: An array of negative item indices.
users_per_batch: How many users should be in the batch. This is passed
as an argument so that ncf_test.py can use this method.
Returns:
User, item, and duplicate_mask arrays.
"""
items = np.concatenate([positive_items, negative_items], axis=1)
# We pad the users and items here so that the duplicate mask calculation
# will include padding. The metric function relies on all padded elements
# except the positive being marked as duplicate to mask out padded points.
if users.shape[0] < users_per_batch:
pad_rows = users_per_batch - users.shape[0]
padding = np.zeros(shape=(pad_rows, users.shape[1]), dtype=np.int32)
users = np.concatenate([users, padding.astype(users.dtype)], axis=0)
items = np.concatenate([items, padding.astype(items.dtype)], axis=0)
duplicate_mask = stat_utils.mask_duplicates(items, axis=1).astype(np.bool)
items[:, (0, -1)] = items[:, (-1, 0)]
duplicate_mask[:, (0, -1)] = duplicate_mask[:, (-1, 0)]
assert users.shape == items.shape == duplicate_mask.shape
return users, items, duplicate_mask
def _get_eval_batch(self, i):
"""Construct a single batch of evaluation data.
Args:
i: The index of the batch.
"""
low_index = i * self._eval_users_per_batch
high_index = (i + 1) * self._eval_users_per_batch
users = np.repeat(self._eval_pos_users[low_index:high_index, np.newaxis],
1 + rconst.NUM_EVAL_NEGATIVES, axis=1)
positive_items = self._eval_pos_items[low_index:high_index, np.newaxis]
negative_items = (self.lookup_negative_items(negative_users=users[:, :-1])
.reshape(-1, rconst.NUM_EVAL_NEGATIVES))
users, items, duplicate_mask = self._assemble_eval_batch(
users, positive_items, negative_items, self._eval_users_per_batch)
self._eval_dataset.put(i, {
movielens.USER_COLUMN: users.flatten(),
movielens.ITEM_COLUMN: items.flatten(),
rconst.DUPLICATE_MASK: duplicate_mask.flatten(),
})
def _construct_eval_epoch(self):
"""Loop to construct data for evaluation."""
if self._stop_loop:
return
start_time = timeit.default_timer()
self._eval_dataset.start_construction()
map_args = [i for i in range(self.eval_batches_per_epoch)]
get_pool = (popen_helper.get_fauxpool if self.deterministic else
popen_helper.get_threadpool)
with get_pool(6) as pool:
pool.map(self._get_eval_batch, map_args)
self._eval_dataset.end_construction()
tf.logging.info("Eval construction complete. Time: {:.1f} seconds".format(
timeit.default_timer() - start_time))
def make_input_fn(self, is_training):
# It isn't feasible to provide a foolproof check, so this is designed to
# catch most failures rather than provide an exhaustive guard.
if self._fatal_exception is not None:
raise ValueError("Fatal exception in the data production loop: {}"
.format(self._fatal_exception))
return (
self._train_dataset.make_input_fn(self.train_batch_size) if is_training
else self._eval_dataset.make_input_fn(self.eval_batch_size))
class DummyConstructor(threading.Thread):
"""Class for running with synthetic data."""
def run(self):
pass
def stop_loop(self):
pass
@staticmethod
def make_input_fn(is_training):
"""Construct training input_fn that uses synthetic data."""
def input_fn(params):
"""Generated input_fn for the given epoch."""
batch_size = (params["batch_size"] if is_training else
params["eval_batch_size"])
num_users = params["num_users"]
num_items = params["num_items"]
users = tf.random_uniform([batch_size], dtype=tf.int32, minval=0,
maxval=num_users)
items = tf.random_uniform([batch_size], dtype=tf.int32, minval=0,
maxval=num_items)
if is_training:
valid_point_mask = tf.cast(tf.random_uniform(
[batch_size], dtype=tf.int32, minval=0, maxval=2), tf.bool)
labels = tf.cast(tf.random_uniform(
[batch_size], dtype=tf.int32, minval=0, maxval=2), tf.bool)
data = {
movielens.USER_COLUMN: users,
movielens.ITEM_COLUMN: items,
rconst.VALID_POINT_MASK: valid_point_mask,
}, labels
else:
dupe_mask = tf.cast(tf.random_uniform([batch_size], dtype=tf.int32,
minval=0, maxval=2), tf.bool)
data = {
movielens.USER_COLUMN: users,
movielens.ITEM_COLUMN: items,
rconst.DUPLICATE_MASK: dupe_mask,
}
dataset = tf.data.Dataset.from_tensors(data).repeat(
rconst.SYNTHETIC_BATCHES_PER_EPOCH * params["batches_per_step"])
dataset = dataset.prefetch(32)
return dataset
return input_fn
class MaterializedDataConstructor(BaseDataConstructor):
"""Materialize a table of negative examples for fast negative generation.
This class creates a table (num_users x num_items) containing all of the
negative examples for each user. This table is conceptually ragged; that is to
say the items dimension will have a number of unused elements at the end equal
to the number of positive elements for a given user. For instance:
num_users = 3
num_items = 5
positives = [[1, 3], [0], [1, 2, 3, 4]]
will generate a negative table:
[
[0 2 4 int32max int32max],
[1 2 3 4 int32max],
[0 int32max int32max int32max int32max],
]
and a vector of per-user negative counts, which in this case would be:
[3, 4, 1]
When sampling negatives, integers are (nearly) uniformly selected from the
range [0, per_user_neg_count[user]) which gives a column_index, at which
point the negative can be selected as:
negative_table[user, column_index]
This technique will not scale; however MovieLens is small enough that even
a pre-compute which is quadratic in problem size will still fit in memory. A
more scalable lookup method is in the works.
"""
def __init__(self, *args, **kwargs):
super(MaterializedDataConstructor, self).__init__(*args, **kwargs)
self._negative_table = None
self._per_user_neg_count = None
def construct_lookup_variables(self):
# Materialize negatives for fast lookup sampling.
start_time = timeit.default_timer()
inner_bounds = np.argwhere(self._train_pos_users[1:] -
self._train_pos_users[:-1])[:, 0] + 1
(upper_bound,) = self._train_pos_users.shape
index_bounds = [0] + inner_bounds.tolist() + [upper_bound]
self._negative_table = np.zeros(shape=(self._num_users, self._num_items),
dtype=rconst.ITEM_DTYPE)
# Set the table to the max value to make sure the embedding lookup will fail
# if we go out of bounds, rather than just overloading item zero.
self._negative_table += np.iinfo(rconst.ITEM_DTYPE).max
assert self._num_items < np.iinfo(rconst.ITEM_DTYPE).max
# Reuse arange during generation. np.delete will make a copy.
full_set = np.arange(self._num_items, dtype=rconst.ITEM_DTYPE)
self._per_user_neg_count = np.zeros(
shape=(self._num_users,), dtype=np.int32)
# Threading does not improve this loop. For some reason, the np.delete
# call does not parallelize well. Multiprocessing incurs too much
# serialization overhead to be worthwhile.
for i in range(self._num_users):
positives = self._train_pos_items[index_bounds[i]:index_bounds[i+1]]
negatives = np.delete(full_set, positives)
self._per_user_neg_count[i] = self._num_items - positives.shape[0]
self._negative_table[i, :self._per_user_neg_count[i]] = negatives
tf.logging.info("Negative sample table built. Time: {:.1f} seconds".format(
timeit.default_timer() - start_time))
def lookup_negative_items(self, negative_users, **kwargs):
negative_item_choice = stat_utils.very_slightly_biased_randint(
self._per_user_neg_count[negative_users])
return self._negative_table[negative_users, negative_item_choice]
class BisectionDataConstructor(BaseDataConstructor):
"""Use bisection to index within positive examples.
This class tallies the number of negative items which appear before each
positive item for a user. This means that in order to select the ith negative
item for a user, it only needs to determine which two positive items bound
it at which point the item id for the ith negative is a simply algebraic
expression.
"""
def __init__(self, *args, **kwargs):
super(BisectionDataConstructor, self).__init__(*args, **kwargs)
self.index_bounds = None
self._sorted_train_pos_items = None
self._total_negatives = None
def _index_segment(self, user):
lower, upper = self.index_bounds[user:user+2]
items = self._sorted_train_pos_items[lower:upper]
negatives_since_last_positive = np.concatenate(
[items[0][np.newaxis], items[1:] - items[:-1] - 1])
return np.cumsum(negatives_since_last_positive)
def construct_lookup_variables(self):
start_time = timeit.default_timer()
inner_bounds = np.argwhere(self._train_pos_users[1:] -
self._train_pos_users[:-1])[:, 0] + 1
(upper_bound,) = self._train_pos_users.shape
self.index_bounds = np.array([0] + inner_bounds.tolist() + [upper_bound])
# Later logic will assume that the users are in sequential ascending order.
assert np.array_equal(self._train_pos_users[self.index_bounds[:-1]],
np.arange(self._num_users))
self._sorted_train_pos_items = self._train_pos_items.copy()
for i in range(self._num_users):
lower, upper = self.index_bounds[i:i+2]
self._sorted_train_pos_items[lower:upper].sort()
self._total_negatives = np.concatenate([
self._index_segment(i) for i in range(self._num_users)])
tf.logging.info("Negative total vector built. Time: {:.1f} seconds".format(
timeit.default_timer() - start_time))
def lookup_negative_items(self, negative_users, **kwargs):
output = np.zeros(shape=negative_users.shape, dtype=rconst.ITEM_DTYPE) - 1
left_index = self.index_bounds[negative_users]
right_index = self.index_bounds[negative_users + 1] - 1
num_positives = right_index - left_index + 1
num_negatives = self._num_items - num_positives
neg_item_choice = stat_utils.very_slightly_biased_randint(num_negatives)
# Shortcuts:
# For points where the negative is greater than or equal to the tally before
# the last positive point there is no need to bisect. Instead the item id
# corresponding to the negative item choice is simply:
# last_postive_index + 1 + (neg_choice - last_negative_tally)
# Similarly, if the selection is less than the tally at the first positive
# then the item_id is simply the selection.
#
# Because MovieLens organizes popular movies into low integers (which is
# preserved through the preprocessing), the first shortcut is very
# efficient, allowing ~60% of samples to bypass the bisection. For the same
# reason, the second shortcut is rarely triggered (<0.02%) and is therefore
# not worth implementing.
use_shortcut = neg_item_choice >= self._total_negatives[right_index]
output[use_shortcut] = (
self._sorted_train_pos_items[right_index] + 1 +
(neg_item_choice - self._total_negatives[right_index])
)[use_shortcut]
if np.all(use_shortcut):
# The bisection code is ill-posed when there are no elements.
return output
not_use_shortcut = np.logical_not(use_shortcut)
left_index = left_index[not_use_shortcut]
right_index = right_index[not_use_shortcut]
neg_item_choice = neg_item_choice[not_use_shortcut]
num_loops = np.max(
np.ceil(np.log2(num_positives[not_use_shortcut])).astype(np.int32))
for i in range(num_loops):
mid_index = (left_index + right_index) // 2
right_criteria = self._total_negatives[mid_index] > neg_item_choice
left_criteria = np.logical_not(right_criteria)
right_index[right_criteria] = mid_index[right_criteria]
left_index[left_criteria] = mid_index[left_criteria]
# Expected state after bisection pass:
# The right index is the smallest index whose tally is greater than the
# negative item choice index.
assert np.all((right_index - left_index) <= 1)
output[not_use_shortcut] = (
self._sorted_train_pos_items[right_index] -
(self._total_negatives[right_index] - neg_item_choice)
)
assert np.all(output >= 0)
return output
def get_constructor(name):
if name == "bisection":
return BisectionDataConstructor
if name == "materialized":
return MaterializedDataConstructor
raise ValueError("Unrecognized constructor: {}".format(name))
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocess dataset and construct any necessary artifacts."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import pickle
import time
import timeit
import typing
# pylint: disable=wrong-import-order
import numpy as np
import pandas as pd
import tensorflow as tf
# pylint: enable=wrong-import-order
from official.datasets import movielens
from official.recommendation import constants as rconst
from official.recommendation import data_pipeline
from official.utils.logs import mlperf_helper
DATASET_TO_NUM_USERS_AND_ITEMS = {
"ml-1m": (6040, 3706),
"ml-20m": (138493, 26744)
}
_EXPECTED_CACHE_KEYS = (
rconst.TRAIN_USER_KEY, rconst.TRAIN_ITEM_KEY, rconst.EVAL_USER_KEY,
rconst.EVAL_ITEM_KEY, rconst.USER_MAP, rconst.ITEM_MAP)
def _filter_index_sort(raw_rating_path, cache_path):
# type: (str, str, bool) -> (dict, bool)
"""Read in data CSV, and output structured data.
This function reads in the raw CSV of positive items, and performs three
preprocessing transformations:
1) Filter out all users who have not rated at least a certain number
of items. (Typically 20 items)
2) Zero index the users and items such that the largest user_id is
`num_users - 1` and the largest item_id is `num_items - 1`
3) Sort the dataframe by user_id, with timestamp as a secondary sort key.
This allows the dataframe to be sliced by user in-place, and for the last
item to be selected simply by calling the `-1` index of a user's slice.
While all of these transformations are performed by Pandas (and are therefore
single-threaded), they only take ~2 minutes, and the overhead to apply a
MapReduce pattern to parallel process the dataset adds significant complexity
for no computational gain. For a larger dataset parallelizing this
preprocessing could yield speedups. (Also, this preprocessing step is only
performed once for an entire run.
Args:
raw_rating_path: The path to the CSV which contains the raw dataset.
cache_path: The path to the file where results of this function are saved.
Returns:
A filtered, zero-index remapped, sorted dataframe, a dict mapping raw user
IDs to regularized user IDs, and a dict mapping raw item IDs to regularized
item IDs.
"""
valid_cache = tf.gfile.Exists(cache_path)
if valid_cache:
with tf.gfile.Open(cache_path, "rb") as f:
cached_data = pickle.load(f)
cache_age = time.time() - cached_data.get("create_time", 0)
if cache_age > rconst.CACHE_INVALIDATION_SEC:
valid_cache = False
for key in _EXPECTED_CACHE_KEYS:
if key not in cached_data:
valid_cache = False
if not valid_cache:
tf.logging.info("Removing stale raw data cache file.")
tf.gfile.Remove(cache_path)
if valid_cache:
data = cached_data
else:
with tf.gfile.Open(raw_rating_path) as f:
df = pd.read_csv(f)
# Get the info of users who have more than 20 ratings on items
grouped = df.groupby(movielens.USER_COLUMN)
df = grouped.filter(
lambda x: len(x) >= rconst.MIN_NUM_RATINGS) # type: pd.DataFrame
original_users = df[movielens.USER_COLUMN].unique()
original_items = df[movielens.ITEM_COLUMN].unique()
# Map the ids of user and item to 0 based index for following processing
tf.logging.info("Generating user_map and item_map...")
user_map = {user: index for index, user in enumerate(original_users)}
item_map = {item: index for index, item in enumerate(original_items)}
df[movielens.USER_COLUMN] = df[movielens.USER_COLUMN].apply(
lambda user: user_map[user])
df[movielens.ITEM_COLUMN] = df[movielens.ITEM_COLUMN].apply(
lambda item: item_map[item])
num_users = len(original_users)
num_items = len(original_items)
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.PREPROC_HP_NUM_EVAL,
value=rconst.NUM_EVAL_NEGATIVES)
assert num_users <= np.iinfo(rconst.USER_DTYPE).max
assert num_items <= np.iinfo(rconst.ITEM_DTYPE).max
assert df[movielens.USER_COLUMN].max() == num_users - 1
assert df[movielens.ITEM_COLUMN].max() == num_items - 1
# This sort is used to shard the dataframe by user, and later to select
# the last item for a user to be used in validation.
tf.logging.info("Sorting by user, timestamp...")
# This sort is equivalent to
# df.sort_values([movielens.USER_COLUMN, movielens.TIMESTAMP_COLUMN],
# inplace=True)
# except that the order of items with the same user and timestamp are
# sometimes different. For some reason, this sort results in a better
# hit-rate during evaluation, matching the performance of the MLPerf
# reference implementation.
df.sort_values(by=movielens.TIMESTAMP_COLUMN, inplace=True)
df.sort_values([movielens.USER_COLUMN, movielens.TIMESTAMP_COLUMN],
inplace=True, kind="mergesort")
df = df.reset_index() # The dataframe does not reconstruct indices in the
# sort or filter steps.
grouped = df.groupby(movielens.USER_COLUMN, group_keys=False)
eval_df, train_df = grouped.tail(1), grouped.apply(lambda x: x.iloc[:-1])
data = {
rconst.TRAIN_USER_KEY: train_df[movielens.USER_COLUMN]
.values.astype(rconst.USER_DTYPE),
rconst.TRAIN_ITEM_KEY: train_df[movielens.ITEM_COLUMN]
.values.astype(rconst.ITEM_DTYPE),
rconst.EVAL_USER_KEY: eval_df[movielens.USER_COLUMN]
.values.astype(rconst.USER_DTYPE),
rconst.EVAL_ITEM_KEY: eval_df[movielens.ITEM_COLUMN]
.values.astype(rconst.ITEM_DTYPE),
rconst.USER_MAP: user_map,
rconst.ITEM_MAP: item_map,
"create_time": time.time(),
}
tf.logging.info("Writing raw data cache.")
with tf.gfile.Open(cache_path, "wb") as f:
pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
# TODO(robieta): MLPerf cache clear.
return data, valid_cache
def instantiate_pipeline(dataset, data_dir, params, constructor_type=None,
deterministic=False):
# type: (str, str, dict, typing.Optional[str], bool) -> (NCFDataset, typing.Callable)
"""Load and digest data CSV into a usable form.
Args:
dataset: The name of the dataset to be used.
data_dir: The root directory of the dataset.
params: dict of parameters for the run.
constructor_type: The name of the constructor subclass that should be used
for the input pipeline.
deterministic: Tell the data constructor to produce deterministically.
"""
tf.logging.info("Beginning data preprocessing.")
st = timeit.default_timer()
raw_rating_path = os.path.join(data_dir, dataset, movielens.RATINGS_FILE)
cache_path = os.path.join(data_dir, dataset, rconst.RAW_CACHE_FILE)
raw_data, _ = _filter_index_sort(raw_rating_path, cache_path)
user_map, item_map = raw_data["user_map"], raw_data["item_map"]
num_users, num_items = DATASET_TO_NUM_USERS_AND_ITEMS[dataset]
if num_users != len(user_map):
raise ValueError("Expected to find {} users, but found {}".format(
num_users, len(user_map)))
if num_items != len(item_map):
raise ValueError("Expected to find {} items, but found {}".format(
num_items, len(item_map)))
producer = data_pipeline.get_constructor(constructor_type or "materialized")(
maximum_number_epochs=params["train_epochs"],
num_users=num_users,
num_items=num_items,
user_map=user_map,
item_map=item_map,
train_pos_users=raw_data[rconst.TRAIN_USER_KEY],
train_pos_items=raw_data[rconst.TRAIN_ITEM_KEY],
train_batch_size=params["batch_size"],
batches_per_train_step=params["batches_per_step"],
num_train_negatives=params["num_neg"],
eval_pos_users=raw_data[rconst.EVAL_USER_KEY],
eval_pos_items=raw_data[rconst.EVAL_ITEM_KEY],
eval_batch_size=params["eval_batch_size"],
batches_per_eval_step=params["batches_per_step"],
stream_files=params["use_tpu"],
deterministic=deterministic
)
run_time = timeit.default_timer() - st
tf.logging.info("Data preprocessing complete. Time: {:.1f} sec."
.format(run_time))
print(producer)
return num_users, num_items, producer
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test NCF data pipeline."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import defaultdict
import hashlib
import os
import mock
import numpy as np
import scipy.stats
import tensorflow as tf
from official.datasets import movielens
from official.recommendation import constants as rconst
from official.recommendation import data_preprocessing
from official.recommendation import popen_helper
DATASET = "ml-test"
NUM_USERS = 1000
NUM_ITEMS = 2000
NUM_PTS = 50000
BATCH_SIZE = 2048
EVAL_BATCH_SIZE = 4000
NUM_NEG = 4
END_TO_END_TRAIN_MD5 = "b218738e915e825d03939c5e305a2698"
END_TO_END_EVAL_MD5 = "d753d0f3186831466d6e218163a9501e"
FRESH_RANDOMNESS_MD5 = "63d0dff73c0e5f1048fbdc8c65021e22"
def mock_download(*args, **kwargs):
return
# The forkpool used by data producers interacts badly with the threading
# used by TestCase. Without this patch tests will hang, and no amount
# of diligent closing and joining within the producer will prevent it.
@mock.patch.object(popen_helper, "get_forkpool", popen_helper.get_fauxpool)
class BaseTest(tf.test.TestCase):
def setUp(self):
self.temp_data_dir = self.get_temp_dir()
ratings_folder = os.path.join(self.temp_data_dir, DATASET)
tf.gfile.MakeDirs(ratings_folder)
np.random.seed(0)
raw_user_ids = np.arange(NUM_USERS * 3)
np.random.shuffle(raw_user_ids)
raw_user_ids = raw_user_ids[:NUM_USERS]
raw_item_ids = np.arange(NUM_ITEMS * 3)
np.random.shuffle(raw_item_ids)
raw_item_ids = raw_item_ids[:NUM_ITEMS]
users = np.random.choice(raw_user_ids, NUM_PTS)
items = np.random.choice(raw_item_ids, NUM_PTS)
scores = np.random.randint(low=0, high=5, size=NUM_PTS)
times = np.random.randint(low=1000000000, high=1200000000, size=NUM_PTS)
self.rating_file = os.path.join(ratings_folder, movielens.RATINGS_FILE)
self.seen_pairs = set()
self.holdout = {}
with tf.gfile.Open(self.rating_file, "w") as f:
f.write("user_id,item_id,rating,timestamp\n")
for usr, itm, scr, ts in zip(users, items, scores, times):
pair = (usr, itm)
if pair in self.seen_pairs:
continue
self.seen_pairs.add(pair)
if usr not in self.holdout or (ts, itm) > self.holdout[usr]:
self.holdout[usr] = (ts, itm)
f.write("{},{},{},{}\n".format(usr, itm, scr, ts))
movielens.download = mock_download
movielens.NUM_RATINGS[DATASET] = NUM_PTS
data_preprocessing.DATASET_TO_NUM_USERS_AND_ITEMS[DATASET] = (NUM_USERS,
NUM_ITEMS)
def make_params(self, train_epochs=1):
return {
"train_epochs": train_epochs,
"batches_per_step": 1,
"use_seed": False,
"batch_size": BATCH_SIZE,
"eval_batch_size": EVAL_BATCH_SIZE,
"num_neg": NUM_NEG,
"match_mlperf": True,
"use_tpu": False,
"use_xla_for_gpu": False,
}
def test_preprocessing(self):
# For the most part the necessary checks are performed within
# _filter_index_sort()
cache_path = os.path.join(self.temp_data_dir, "test_cache.pickle")
data, valid_cache = data_preprocessing._filter_index_sort(
self.rating_file, cache_path=cache_path)
assert len(data[rconst.USER_MAP]) == NUM_USERS
assert len(data[rconst.ITEM_MAP]) == NUM_ITEMS
def drain_dataset(self, dataset, g):
# type: (tf.data.Dataset, tf.Graph) -> list
with self.test_session(graph=g) as sess:
with g.as_default():
batch = dataset.make_one_shot_iterator().get_next()
output = []
while True:
try:
output.append(sess.run(batch))
except tf.errors.OutOfRangeError:
break
return output
def _test_end_to_end(self, constructor_type):
params = self.make_params(train_epochs=1)
_, _, producer = data_preprocessing.instantiate_pipeline(
dataset=DATASET, data_dir=self.temp_data_dir, params=params,
constructor_type=constructor_type, deterministic=True)
producer.start()
producer.join()
assert producer._fatal_exception is None
user_inv_map = {v: k for k, v in producer.user_map.items()}
item_inv_map = {v: k for k, v in producer.item_map.items()}
# ==========================================================================
# == Training Data =========================================================
# ==========================================================================
g = tf.Graph()
with g.as_default():
input_fn = producer.make_input_fn(is_training=True)
dataset = input_fn(params)
first_epoch = self.drain_dataset(dataset=dataset, g=g)
counts = defaultdict(int)
train_examples = {
True: set(),
False: set(),
}
md5 = hashlib.md5()
for features, labels in first_epoch:
data_list = [
features[movielens.USER_COLUMN], features[movielens.ITEM_COLUMN],
features[rconst.VALID_POINT_MASK], labels]
for i in data_list:
md5.update(i.tobytes())
for u, i, v, l in zip(*data_list):
if not v:
continue # ignore padding
u_raw = user_inv_map[u]
i_raw = item_inv_map[i]
if ((u_raw, i_raw) in self.seen_pairs) != l:
# The evaluation item is not considered during false negative
# generation, so it will occasionally appear as a negative example
# during training.
assert not l
self.assertEqual(i_raw, self.holdout[u_raw][1])
train_examples[l].add((u_raw, i_raw))
counts[(u_raw, i_raw)] += 1
self.assertRegexpMatches(md5.hexdigest(), END_TO_END_TRAIN_MD5)
num_positives_seen = len(train_examples[True])
self.assertEqual(producer._train_pos_users.shape[0], num_positives_seen)
# This check is more heuristic because negatives are sampled with
# replacement. It only checks that negative generation is reasonably random.
self.assertGreater(
len(train_examples[False]) / NUM_NEG / num_positives_seen, 0.9)
# This checks that the samples produced are independent by checking the
# number of duplicate entries. If workers are not properly independent there
# will be lots of repeated pairs.
self.assertLess(np.mean(list(counts.values())), 1.1)
# ==========================================================================
# == Eval Data =============================================================
# ==========================================================================
with g.as_default():
input_fn = producer.make_input_fn(is_training=False)
dataset = input_fn(params)
eval_data = self.drain_dataset(dataset=dataset, g=g)
current_user = None
md5 = hashlib.md5()
for features in eval_data:
data_list = [
features[movielens.USER_COLUMN], features[movielens.ITEM_COLUMN],
features[rconst.DUPLICATE_MASK]]
for i in data_list:
md5.update(i.tobytes())
for idx, (u, i, d) in enumerate(zip(*data_list)):
u_raw = user_inv_map[u]
i_raw = item_inv_map[i]
if current_user is None:
current_user = u
# Ensure that users appear in blocks, as the evaluation logic expects
# this structure.
self.assertEqual(u, current_user)
# The structure of evaluation data is 999 negative examples followed
# by the holdout positive.
if not (idx + 1) % (rconst.NUM_EVAL_NEGATIVES + 1):
# Check that the last element in each chunk is the holdout item.
self.assertEqual(i_raw, self.holdout[u_raw][1])
current_user = None
elif i_raw == self.holdout[u_raw][1]:
# Because the holdout item is not given to the negative generation
# process, it can appear as a negative. In that case, it should be
# masked out as a duplicate. (Since the true positive is placed at
# the end and would therefore lose the tie.)
assert d
else:
# Otherwise check that the other 999 points for a user are selected
# from the negatives.
assert (u_raw, i_raw) not in self.seen_pairs
self.assertRegexpMatches(md5.hexdigest(), END_TO_END_EVAL_MD5)
def _test_fresh_randomness(self, constructor_type):
train_epochs = 5
params = self.make_params(train_epochs=train_epochs)
_, _, producer = data_preprocessing.instantiate_pipeline(
dataset=DATASET, data_dir=self.temp_data_dir, params=params,
constructor_type=constructor_type, deterministic=True)
producer.start()
results = []
g = tf.Graph()
with g.as_default():
for _ in range(train_epochs):
input_fn = producer.make_input_fn(is_training=True)
dataset = input_fn(params)
results.extend(self.drain_dataset(dataset=dataset, g=g))
producer.join()
assert producer._fatal_exception is None
positive_counts, negative_counts = defaultdict(int), defaultdict(int)
md5 = hashlib.md5()
for features, labels in results:
data_list = [
features[movielens.USER_COLUMN], features[movielens.ITEM_COLUMN],
features[rconst.VALID_POINT_MASK], labels]
for i in data_list:
md5.update(i.tobytes())
for u, i, v, l in zip(*data_list):
if not v:
continue # ignore padding
if l:
positive_counts[(u, i)] += 1
else:
negative_counts[(u, i)] += 1
self.assertRegexpMatches(md5.hexdigest(), FRESH_RANDOMNESS_MD5)
# The positive examples should appear exactly once each epoch
self.assertAllEqual(list(positive_counts.values()),
[train_epochs for _ in positive_counts])
# The threshold for the negatives is heuristic, but in general repeats are
# expected, but should not appear too frequently.
pair_cardinality = NUM_USERS * NUM_ITEMS
neg_pair_cardinality = pair_cardinality - len(self.seen_pairs)
# Approximation for the expectation number of times that a particular
# negative will appear in a given epoch. Implicit in this calculation is the
# treatment of all negative pairs as equally likely. Normally is not
# necessarily reasonable; however the generation in self.setUp() will
# approximate this behavior sufficiently for heuristic testing.
e_sample = len(self.seen_pairs) * NUM_NEG / neg_pair_cardinality
# The frequency of occurance of a given negative pair should follow an
# approximately binomial distribution in the limit that the cardinality of
# the negative pair set >> number of samples per epoch.
approx_pdf = scipy.stats.binom.pmf(k=np.arange(train_epochs+1),
n=train_epochs, p=e_sample)
# Tally the actual observed counts.
count_distribution = [0 for _ in range(train_epochs + 1)]
for i in negative_counts.values():
i = min([i, train_epochs]) # round down tail for simplicity.
count_distribution[i] += 1
count_distribution[0] = neg_pair_cardinality - sum(count_distribution[1:])
# Check that the frequency of negative pairs is approximately binomial.
for i in range(train_epochs + 1):
if approx_pdf[i] < 0.05:
continue # Variance will be high at the tails.
observed_fraction = count_distribution[i] / neg_pair_cardinality
deviation = (2 * abs(observed_fraction - approx_pdf[i]) /
(observed_fraction + approx_pdf[i]))
self.assertLess(deviation, 0.2)
def test_end_to_end_materialized(self):
self._test_end_to_end("materialized")
def test_end_to_end_bisection(self):
self._test_end_to_end("bisection")
def test_fresh_randomness_materialized(self):
self._test_fresh_randomness("materialized")
def test_fresh_randomness_bisection(self):
self._test_fresh_randomness("bisection")
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""NCF framework to train and evaluate the NeuMF model.
The NeuMF model assembles both MF and MLP models under the NCF framework. Check
`neumf_model.py` for more details about the models.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import contextlib
import heapq
import json
import logging
import math
import multiprocessing
import os
import signal
import typing
# pylint: disable=g-bad-import-order
import numpy as np
from absl import app as absl_app
from absl import flags
import tensorflow as tf
# pylint: enable=g-bad-import-order
from tensorflow.contrib.compiler import xla
from official.datasets import movielens
from official.recommendation import constants as rconst
from official.recommendation import data_pipeline
from official.recommendation import data_preprocessing
from official.recommendation import neumf_model
from official.utils.flags import core as flags_core
from official.utils.logs import hooks_helper
from official.utils.logs import logger
from official.utils.logs import mlperf_helper
from official.utils.misc import distribution_utils
from official.utils.misc import model_helpers
FLAGS = flags.FLAGS
def construct_estimator(model_dir, params):
"""Construct either an Estimator or TPUEstimator for NCF.
Args:
model_dir: The model directory for the estimator
params: The params dict for the estimator
Returns:
An Estimator or TPUEstimator.
"""
if params["use_tpu"]:
# Some of the networking libraries are quite chatty.
for name in ["googleapiclient.discovery", "googleapiclient.discovery_cache",
"oauth2client.transport"]:
logging.getLogger(name).setLevel(logging.ERROR)
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(
tpu=params["tpu"],
zone=params["tpu_zone"],
project=params["tpu_gcp_project"],
coordinator_name="coordinator"
)
tf.logging.info("Issuing reset command to TPU to ensure a clean state.")
tf.Session.reset(tpu_cluster_resolver.get_master())
# Estimator looks at the master it connects to for MonitoredTrainingSession
# by reading the `TF_CONFIG` environment variable, and the coordinator
# is used by StreamingFilesDataset.
tf_config_env = {
"session_master": tpu_cluster_resolver.get_master(),
"eval_session_master": tpu_cluster_resolver.get_master(),
"coordinator": tpu_cluster_resolver.cluster_spec()
.as_dict()["coordinator"]
}
os.environ['TF_CONFIG'] = json.dumps(tf_config_env)
distribution = tf.contrib.distribute.TPUStrategy(
tpu_cluster_resolver, steps_per_run=100)
else:
distribution = distribution_utils.get_distribution_strategy(
num_gpus=params["num_gpus"])
run_config = tf.estimator.RunConfig(train_distribute=distribution,
eval_distribute=distribution)
model_fn = neumf_model.neumf_model_fn
if params["use_xla_for_gpu"]:
tf.logging.info("Using XLA for GPU for training and evaluation.")
model_fn = xla.estimator_model_fn(model_fn)
estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir,
config=run_config, params=params)
return estimator
def log_and_get_hooks(eval_batch_size):
"""Convenience function for hook and logger creation."""
# Create hooks that log information about the training and metric values
train_hooks = hooks_helper.get_train_hooks(
FLAGS.hooks,
model_dir=FLAGS.model_dir,
batch_size=FLAGS.batch_size, # for ExamplesPerSecondHook
tensors_to_log={"cross_entropy": "cross_entropy"}
)
run_params = {
"batch_size": FLAGS.batch_size,
"eval_batch_size": eval_batch_size,
"number_factors": FLAGS.num_factors,
"hr_threshold": FLAGS.hr_threshold,
"train_epochs": FLAGS.train_epochs,
}
benchmark_logger = logger.get_benchmark_logger()
benchmark_logger.log_run_info(
model_name="recommendation",
dataset_name=FLAGS.dataset,
run_params=run_params,
test_id=FLAGS.benchmark_test_id)
return benchmark_logger, train_hooks
def parse_flags(flags_obj):
"""Convenience function to turn flags into params."""
num_gpus = flags_core.get_num_gpus(flags_obj)
num_devices = FLAGS.num_tpu_shards if FLAGS.tpu else num_gpus or 1
batch_size = (flags_obj.batch_size + num_devices - 1) // num_devices
eval_divisor = (rconst.NUM_EVAL_NEGATIVES + 1) * num_devices
eval_batch_size = flags_obj.eval_batch_size or flags_obj.batch_size
eval_batch_size = ((eval_batch_size + eval_divisor - 1) //
eval_divisor * eval_divisor // num_devices)
return {
"train_epochs": flags_obj.train_epochs,
"batches_per_step": num_devices,
"use_seed": flags_obj.seed is not None,
"batch_size": batch_size,
"eval_batch_size": eval_batch_size,
"learning_rate": flags_obj.learning_rate,
"mf_dim": flags_obj.num_factors,
"model_layers": [int(layer) for layer in flags_obj.layers],
"mf_regularization": flags_obj.mf_regularization,
"mlp_reg_layers": [float(reg) for reg in flags_obj.mlp_regularization],
"num_neg": flags_obj.num_neg,
"num_gpus": num_gpus,
"use_tpu": flags_obj.tpu is not None,
"tpu": flags_obj.tpu,
"tpu_zone": flags_obj.tpu_zone,
"tpu_gcp_project": flags_obj.tpu_gcp_project,
"beta1": flags_obj.beta1,
"beta2": flags_obj.beta2,
"epsilon": flags_obj.epsilon,
"match_mlperf": flags_obj.ml_perf,
"use_xla_for_gpu": flags_obj.use_xla_for_gpu,
"epochs_between_evals": FLAGS.epochs_between_evals,
}
def main(_):
with logger.benchmark_context(FLAGS), \
mlperf_helper.LOGGER(FLAGS.output_ml_perf_compliance_logging):
mlperf_helper.set_ncf_root(os.path.split(os.path.abspath(__file__))[0])
run_ncf(FLAGS)
def run_ncf(_):
"""Run NCF training and eval loop."""
if FLAGS.download_if_missing and not FLAGS.use_synthetic_data:
movielens.download(FLAGS.dataset, FLAGS.data_dir)
if FLAGS.seed is not None:
np.random.seed(FLAGS.seed)
params = parse_flags(FLAGS)
total_training_cycle = FLAGS.train_epochs // FLAGS.epochs_between_evals
if FLAGS.use_synthetic_data:
producer = data_pipeline.DummyConstructor()
num_users, num_items = data_preprocessing.DATASET_TO_NUM_USERS_AND_ITEMS[
FLAGS.dataset]
num_train_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH
num_eval_steps = rconst.SYNTHETIC_BATCHES_PER_EPOCH
else:
num_users, num_items, producer = data_preprocessing.instantiate_pipeline(
dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, params=params,
constructor_type=FLAGS.constructor_type,
deterministic=FLAGS.seed is not None)
num_train_steps = (producer.train_batches_per_epoch //
params["batches_per_step"])
num_eval_steps = (producer.eval_batches_per_epoch //
params["batches_per_step"])
assert not producer.train_batches_per_epoch % params["batches_per_step"]
assert not producer.eval_batches_per_epoch % params["batches_per_step"]
producer.start()
params["num_users"], params["num_items"] = num_users, num_items
model_helpers.apply_clean(flags.FLAGS)
estimator = construct_estimator(model_dir=FLAGS.model_dir, params=params)
benchmark_logger, train_hooks = log_and_get_hooks(params["eval_batch_size"])
target_reached = False
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.TRAIN_LOOP)
for cycle_index in range(total_training_cycle):
assert FLAGS.epochs_between_evals == 1 or not mlperf_helper.LOGGER.enabled
tf.logging.info("Starting a training cycle: {}/{}".format(
cycle_index + 1, total_training_cycle))
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.TRAIN_EPOCH,
value=cycle_index)
train_input_fn = producer.make_input_fn(is_training=True)
estimator.train(input_fn=train_input_fn, hooks=train_hooks,
steps=num_train_steps)
tf.logging.info("Beginning evaluation.")
eval_input_fn = producer.make_input_fn(is_training=False)
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_START,
value=cycle_index)
eval_results = estimator.evaluate(eval_input_fn, steps=num_eval_steps)
tf.logging.info("Evaluation complete.")
hr = float(eval_results[rconst.HR_KEY])
ndcg = float(eval_results[rconst.NDCG_KEY])
loss = float(eval_results["loss"])
mlperf_helper.ncf_print(
key=mlperf_helper.TAGS.EVAL_TARGET,
value={"epoch": cycle_index, "value": FLAGS.hr_threshold})
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_ACCURACY,
value={"epoch": cycle_index, "value": hr})
mlperf_helper.ncf_print(
key=mlperf_helper.TAGS.EVAL_HP_NUM_NEG,
value={"epoch": cycle_index, "value": rconst.NUM_EVAL_NEGATIVES})
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.EVAL_STOP, value=cycle_index)
# Benchmark the evaluation results
benchmark_logger.log_evaluation_result(eval_results)
# Log the HR and NDCG results.
tf.logging.info(
"Iteration {}: HR = {:.4f}, NDCG = {:.4f}, Loss = {:.4f}".format(
cycle_index + 1, hr, ndcg, loss))
# If some evaluation threshold is met
if model_helpers.past_stop_threshold(FLAGS.hr_threshold, hr):
target_reached = True
break
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.RUN_STOP,
value={"success": target_reached})
producer.stop_loop()
producer.join()
# Clear the session explicitly to avoid session delete error
tf.keras.backend.clear_session()
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.RUN_FINAL)
def define_ncf_flags():
"""Add flags for running ncf_main."""
# Add common flags
flags_core.define_base(export_dir=False)
flags_core.define_performance(
num_parallel_calls=False,
inter_op=False,
intra_op=False,
synthetic_data=True,
max_train_steps=False,
dtype=False,
all_reduce_alg=False
)
flags_core.define_device(tpu=True)
flags_core.define_benchmark()
flags.adopt_module_key_flags(flags_core)
flags_core.set_defaults(
model_dir="/tmp/ncf/",
data_dir="/tmp/movielens-data/",
train_epochs=2,
batch_size=256,
hooks="ProfilerHook",
tpu=None
)
# Add ncf-specific flags
flags.DEFINE_enum(
name="dataset", default="ml-1m",
enum_values=["ml-1m", "ml-20m"], case_sensitive=False,
help=flags_core.help_wrap(
"Dataset to be trained and evaluated."))
flags.DEFINE_boolean(
name="download_if_missing", default=True, help=flags_core.help_wrap(
"Download data to data_dir if it is not already present."))
flags.DEFINE_integer(
name="eval_batch_size", default=None, help=flags_core.help_wrap(
"The batch size used for evaluation. This should generally be larger"
"than the training batch size as the lack of back propagation during"
"evaluation can allow for larger batch sizes to fit in memory. If not"
"specified, the training batch size (--batch_size) will be used."))
flags.DEFINE_integer(
name="num_factors", default=8,
help=flags_core.help_wrap("The Embedding size of MF model."))
# Set the default as a list of strings to be consistent with input arguments
flags.DEFINE_list(
name="layers", default=["64", "32", "16", "8"],
help=flags_core.help_wrap(
"The sizes of hidden layers for MLP. Example "
"to specify different sizes of MLP layers: --layers=32,16,8,4"))
flags.DEFINE_float(
name="mf_regularization", default=0.,
help=flags_core.help_wrap(
"The regularization factor for MF embeddings. The factor is used by "
"regularizer which allows to apply penalties on layer parameters or "
"layer activity during optimization."))
flags.DEFINE_list(
name="mlp_regularization", default=["0.", "0.", "0.", "0."],
help=flags_core.help_wrap(
"The regularization factor for each MLP layer. See mf_regularization "
"help for more info about regularization factor."))
flags.DEFINE_integer(
name="num_neg", default=4,
help=flags_core.help_wrap(
"The Number of negative instances to pair with a positive instance."))
flags.DEFINE_float(
name="learning_rate", default=0.001,
help=flags_core.help_wrap("The learning rate."))
flags.DEFINE_float(
name="beta1", default=0.9,
help=flags_core.help_wrap("beta1 hyperparameter for the Adam optimizer."))
flags.DEFINE_float(
name="beta2", default=0.999,
help=flags_core.help_wrap("beta2 hyperparameter for the Adam optimizer."))
flags.DEFINE_float(
name="epsilon", default=1e-8,
help=flags_core.help_wrap("epsilon hyperparameter for the Adam "
"optimizer."))
flags.DEFINE_float(
name="hr_threshold", default=None,
help=flags_core.help_wrap(
"If passed, training will stop when the evaluation metric HR is "
"greater than or equal to hr_threshold. For dataset ml-1m, the "
"desired hr_threshold is 0.68 which is the result from the paper; "
"For dataset ml-20m, the threshold can be set as 0.95 which is "
"achieved by MLPerf implementation."))
flags.DEFINE_enum(
name="constructor_type", default="bisection",
enum_values=["bisection", "materialized"], case_sensitive=False,
help=flags_core.help_wrap(
"Strategy to use for generating false negatives. materialized has a"
"precompute that scales badly, but a faster per-epoch construction"
"time and can be faster on very large systems."))
flags.DEFINE_bool(
name="ml_perf", default=False,
help=flags_core.help_wrap(
"If set, changes the behavior of the model slightly to match the "
"MLPerf reference implementations here: \n"
"https://github.com/mlperf/reference/tree/master/recommendation/"
"pytorch\n"
"The two changes are:\n"
"1. When computing the HR and NDCG during evaluation, remove "
"duplicate user-item pairs before the computation. This results in "
"better HRs and NDCGs.\n"
"2. Use a different soring algorithm when sorting the input data, "
"which performs better due to the fact the sorting algorithms are "
"not stable."))
flags.DEFINE_bool(
name="output_ml_perf_compliance_logging", default=False,
help=flags_core.help_wrap(
"If set, output the MLPerf compliance logging. This is only useful "
"if one is running the model for MLPerf. See "
"https://github.com/mlperf/policies/blob/master/training_rules.adoc"
"#submission-compliance-logs for details. This uses sudo and so may "
"ask for your password, as root access is needed to clear the system "
"caches, which is required for MLPerf compliance."
)
)
flags.DEFINE_integer(
name="seed", default=None, help=flags_core.help_wrap(
"This value will be used to seed both NumPy and TensorFlow."))
@flags.validator("eval_batch_size", "eval_batch_size must be at least {}"
.format(rconst.NUM_EVAL_NEGATIVES + 1))
def eval_size_check(eval_batch_size):
return (eval_batch_size is None or
int(eval_batch_size) > rconst.NUM_EVAL_NEGATIVES)
flags.DEFINE_bool(
name="use_xla_for_gpu", default=False, help=flags_core.help_wrap(
"If True, use XLA for the model function. Only works when using a "
"GPU. On TPUs, XLA is always used"))
xla_message = "--use_xla_for_gpu is incompatible with --tpu"
@flags.multi_flags_validator(["use_xla_for_gpu", "tpu"], message=xla_message)
def xla_validator(flag_dict):
return not flag_dict["use_xla_for_gpu"] or not flag_dict["tpu"]
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
define_ncf_flags()
absl_app.run(main)
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests NCF."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import math
import mock
import numpy as np
import tensorflow as tf
from absl.testing import flagsaver
from official.recommendation import constants as rconst
from official.recommendation import data_pipeline
from official.recommendation import neumf_model
from official.recommendation import ncf_main
NUM_TRAIN_NEG = 4
class NcfTest(tf.test.TestCase):
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(NcfTest, cls).setUpClass()
ncf_main.define_ncf_flags()
def setUp(self):
self.top_k_old = rconst.TOP_K
self.num_eval_negatives_old = rconst.NUM_EVAL_NEGATIVES
rconst.NUM_EVAL_NEGATIVES = 2
def tearDown(self):
rconst.NUM_EVAL_NEGATIVES = self.num_eval_negatives_old
rconst.TOP_K = self.top_k_old
def get_hit_rate_and_ndcg(self, predicted_scores_by_user, items_by_user,
top_k=rconst.TOP_K, match_mlperf=False):
rconst.TOP_K = top_k
rconst.NUM_EVAL_NEGATIVES = predicted_scores_by_user.shape[1] - 1
batch_size = items_by_user.shape[0]
users = np.repeat(np.arange(batch_size)[:, np.newaxis],
rconst.NUM_EVAL_NEGATIVES + 1, axis=1)
users, items, duplicate_mask = \
data_pipeline.BaseDataConstructor._assemble_eval_batch(
users, items_by_user[:, -1:], items_by_user[:, :-1], batch_size)
g = tf.Graph()
with g.as_default():
logits = tf.convert_to_tensor(
predicted_scores_by_user.reshape((-1, 1)), tf.float32)
softmax_logits = tf.concat([tf.zeros(logits.shape, dtype=logits.dtype),
logits], axis=1)
duplicate_mask = tf.convert_to_tensor(duplicate_mask, tf.float32)
metric_ops = neumf_model.compute_eval_loss_and_metrics(
logits=logits, softmax_logits=softmax_logits,
duplicate_mask=duplicate_mask, num_training_neg=NUM_TRAIN_NEG,
match_mlperf=match_mlperf).eval_metric_ops
hr = metric_ops[rconst.HR_KEY]
ndcg = metric_ops[rconst.NDCG_KEY]
init = [tf.global_variables_initializer(),
tf.local_variables_initializer()]
with self.test_session(graph=g) as sess:
sess.run(init)
return sess.run([hr[1], ndcg[1]])
def test_hit_rate_and_ndcg(self):
# Test with no duplicate items
predictions = np.array([
[2., 0., 1.], # In top 2
[1., 0., 2.], # In top 1
[2., 1., 0.], # In top 3
[3., 4., 2.] # In top 3
])
items = np.array([
[2, 3, 1],
[3, 1, 2],
[2, 1, 3],
[1, 3, 2],
])
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1)
self.assertAlmostEqual(hr, 1 / 4)
self.assertAlmostEqual(ndcg, 1 / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2)
self.assertAlmostEqual(hr, 2 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3)
self.assertAlmostEqual(hr, 4 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
2 * math.log(2) / math.log(4)) / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1,
match_mlperf=True)
self.assertAlmostEqual(hr, 1 / 4)
self.assertAlmostEqual(ndcg, 1 / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2,
match_mlperf=True)
self.assertAlmostEqual(hr, 2 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3,
match_mlperf=True)
self.assertAlmostEqual(hr, 4 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
2 * math.log(2) / math.log(4)) / 4)
# Test with duplicate items. In the MLPerf case, we treat the duplicates as
# a single item. Otherwise, we treat the duplicates as separate items.
predictions = np.array([
[2., 2., 3., 1.], # In top 4. MLPerf: In top 3
[1., 0., 2., 3.], # In top 1. MLPerf: In top 1
[2., 3., 2., 0.], # In top 4. MLPerf: In top 3
[2., 4., 2., 3.] # In top 2. MLPerf: In top 2
])
items = np.array([
[2, 2, 3, 1],
[2, 3, 4, 1],
[2, 3, 2, 1],
[3, 2, 1, 4],
])
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1)
self.assertAlmostEqual(hr, 1 / 4)
self.assertAlmostEqual(ndcg, 1 / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2)
self.assertAlmostEqual(hr, 2 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3)
self.assertAlmostEqual(hr, 2 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 4)
self.assertAlmostEqual(hr, 4 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
2 * math.log(2) / math.log(5)) / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 1,
match_mlperf=True)
self.assertAlmostEqual(hr, 1 / 4)
self.assertAlmostEqual(ndcg, 1 / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 2,
match_mlperf=True)
self.assertAlmostEqual(hr, 2 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3)) / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 3,
match_mlperf=True)
self.assertAlmostEqual(hr, 4 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
2 * math.log(2) / math.log(4)) / 4)
hr, ndcg = self.get_hit_rate_and_ndcg(predictions, items, 4,
match_mlperf=True)
self.assertAlmostEqual(hr, 4 / 4)
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
2 * math.log(2) / math.log(4)) / 4)
_BASE_END_TO_END_FLAGS = {
"batch_size": 1024,
"train_epochs": 1,
"use_synthetic_data": True
}
@flagsaver.flagsaver(**_BASE_END_TO_END_FLAGS)
@mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
def test_end_to_end(self):
ncf_main.main(None)
@flagsaver.flagsaver(ml_perf=True, **_BASE_END_TO_END_FLAGS)
@mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
def test_end_to_end_mlperf(self):
ncf_main.main(None)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Defines NeuMF model for NCF framework.
Some abbreviations used in the code base:
NeuMF: Neural Matrix Factorization
NCF: Neural Collaborative Filtering
GMF: Generalized Matrix Factorization
MLP: Multi-Layer Perceptron
GMF applies a linear kernel to model the latent feature interactions, and MLP
uses a nonlinear kernel to learn the interaction function from data. NeuMF model
is a fused model of GMF and MLP to better model the complex user-item
interactions, and unifies the strengths of linearity of MF and non-linearity of
MLP for modeling the user-item latent structures.
In NeuMF model, it allows GMF and MLP to learn separate embeddings, and combine
the two models by concatenating their last hidden layer.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import sys
import typing
from six.moves import xrange # pylint: disable=redefined-builtin
import tensorflow as tf
from official.datasets import movielens # pylint: disable=g-bad-import-order
from official.recommendation import constants as rconst
from official.recommendation import stat_utils
from official.utils.logs import mlperf_helper
def _sparse_to_dense_grads(grads_and_vars):
"""Convert sparse gradients to dense gradients.
All sparse gradients, which are represented as instances of tf.IndexedSlices,
are converted to dense Tensors. Dense gradients, which are represents as
Tensors, are unchanged.
The purpose of this conversion is that for small embeddings, which are used by
this model, applying dense gradients with the AdamOptimizer is faster than
applying sparse gradients.
Args
grads_and_vars: A list of (gradient, variable) tuples. Each gradient can
be a Tensor or an IndexedSlices. Tensors are unchanged, and IndexedSlices
are converted to dense Tensors.
Returns:
The same list of (gradient, variable) as `grads_and_vars`, except each
IndexedSlices gradient is converted to a Tensor.
"""
# Calling convert_to_tensor changes IndexedSlices into Tensors, and leaves
# Tensors unchanged.
return [(tf.convert_to_tensor(g), v) for g, v in grads_and_vars]
def neumf_model_fn(features, labels, mode, params):
"""Model Function for NeuMF estimator."""
if params.get("use_seed"):
tf.set_random_seed(stat_utils.random_int32())
users = features[movielens.USER_COLUMN]
items = features[movielens.ITEM_COLUMN]
logits = construct_model(users, items, params).output
# Softmax with the first column of zeros is equivalent to sigmoid.
softmax_logits = tf.concat([tf.zeros(logits.shape, dtype=logits.dtype),
logits], axis=1)
if mode == tf.estimator.ModeKeys.EVAL:
duplicate_mask = tf.cast(features[rconst.DUPLICATE_MASK], tf.float32)
return compute_eval_loss_and_metrics(
logits, softmax_logits, duplicate_mask, params["num_neg"],
params["match_mlperf"],
use_tpu_spec=params["use_xla_for_gpu"])
elif mode == tf.estimator.ModeKeys.TRAIN:
labels = tf.cast(labels, tf.int32)
valid_pt_mask = features[rconst.VALID_POINT_MASK]
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_NAME, value="adam")
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_LR,
value=params["learning_rate"])
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_BETA1,
value=params["beta1"])
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_BETA2,
value=params["beta2"])
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_EPSILON,
value=params["epsilon"])
optimizer = tf.train.AdamOptimizer(
learning_rate=params["learning_rate"], beta1=params["beta1"],
beta2=params["beta2"], epsilon=params["epsilon"])
if params["use_tpu"]:
optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.MODEL_HP_LOSS_FN,
value=mlperf_helper.TAGS.BCE)
loss = tf.losses.sparse_softmax_cross_entropy(
labels=labels,
logits=softmax_logits,
weights=tf.cast(valid_pt_mask, tf.float32)
)
# This tensor is used by logging hooks.
tf.identity(loss, name="cross_entropy")
global_step = tf.train.get_global_step()
tvars = tf.trainable_variables()
gradients = optimizer.compute_gradients(
loss, tvars, colocate_gradients_with_ops=True)
gradients = _sparse_to_dense_grads(gradients)
minimize_op = optimizer.apply_gradients(
gradients, global_step=global_step, name="train")
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
train_op = tf.group(minimize_op, update_ops)
return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op)
else:
raise NotImplementedError
def construct_model(users, items, params):
# type: (tf.Tensor, tf.Tensor, dict) -> tf.keras.Model
"""Initialize NeuMF model.
Args:
users: Tensor of user ids.
items: Tensor of item ids.
params: Dict of hyperparameters.
Raises:
ValueError: if the first model layer is not even.
Returns:
model: a keras Model for computing the logits
"""
num_users = params["num_users"]
num_items = params["num_items"]
model_layers = params["model_layers"]
mf_regularization = params["mf_regularization"]
mlp_reg_layers = params["mlp_reg_layers"]
mf_dim = params["mf_dim"]
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.MODEL_HP_MF_DIM, value=mf_dim)
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.MODEL_HP_MLP_LAYER_SIZES,
value=model_layers)
if model_layers[0] % 2 != 0:
raise ValueError("The first layer size should be multiple of 2!")
# Input variables
user_input = tf.keras.layers.Input(tensor=users, name="user_input")
item_input = tf.keras.layers.Input(tensor=items, name="item_input")
# Initializer for embedding layers
embedding_initializer = "glorot_uniform"
# It turns out to be significantly more effecient to store the MF and MLP
# embedding portions in the same table, and then slice as needed.
mf_slice_fn = lambda x: x[:, :mf_dim]
mlp_slice_fn = lambda x: x[:, mf_dim:]
embedding_user = tf.keras.layers.Embedding(
num_users, mf_dim + model_layers[0] // 2,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
input_length=1, name="embedding_user")(user_input)
embedding_item = tf.keras.layers.Embedding(
num_items, mf_dim + model_layers[0] // 2,
embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
input_length=1, name="embedding_item")(item_input)
# GMF part
mf_user_latent = tf.keras.layers.Lambda(
mf_slice_fn, name="embedding_user_mf")(embedding_user)
mf_item_latent = tf.keras.layers.Lambda(
mf_slice_fn, name="embedding_item_mf")(embedding_item)
# MLP part
mlp_user_latent = tf.keras.layers.Lambda(
mlp_slice_fn, name="embedding_user_mlp")(embedding_user)
mlp_item_latent = tf.keras.layers.Lambda(
mlp_slice_fn, name="embedding_item_mlp")(embedding_item)
# Element-wise multiply
mf_vector = tf.keras.layers.multiply([mf_user_latent, mf_item_latent])
# Concatenation of two latent features
mlp_vector = tf.keras.layers.concatenate([mlp_user_latent, mlp_item_latent])
num_layer = len(model_layers) # Number of layers in the MLP
for layer in xrange(1, num_layer):
model_layer = tf.keras.layers.Dense(
model_layers[layer],
kernel_regularizer=tf.keras.regularizers.l2(mlp_reg_layers[layer]),
activation="relu")
mlp_vector = model_layer(mlp_vector)
# Concatenate GMF and MLP parts
predict_vector = tf.keras.layers.concatenate([mf_vector, mlp_vector])
# Final prediction layer
logits = tf.keras.layers.Dense(
1, activation=None, kernel_initializer="lecun_uniform",
name=movielens.RATING_COLUMN)(predict_vector)
# Print model topology.
model = tf.keras.models.Model([user_input, item_input], logits)
model.summary()
sys.stdout.flush()
return model
def compute_eval_loss_and_metrics(logits, # type: tf.Tensor
softmax_logits, # type: tf.Tensor
duplicate_mask, # type: tf.Tensor
num_training_neg, # type: int
match_mlperf=False, # type: bool
use_tpu_spec=False # type: bool
):
# type: (...) -> tf.estimator.EstimatorSpec
"""Model evaluation with HR and NDCG metrics.
The evaluation protocol is to rank the test interacted item (truth items)
among the randomly chosen 999 items that are not interacted by the user.
The performance of the ranked list is judged by Hit Ratio (HR) and Normalized
Discounted Cumulative Gain (NDCG).
For evaluation, the ranked list is truncated at 10 for both metrics. As such,
the HR intuitively measures whether the test item is present on the top-10
list, and the NDCG accounts for the position of the hit by assigning higher
scores to hits at top ranks. Both metrics are calculated for each test user,
and the average scores are reported.
If `match_mlperf` is True, then the HR and NDCG computations are done in a
slightly unusual way to match the MLPerf reference implementation.
Specifically, if the evaluation negatives contain duplicate items, it will be
treated as if the item only appeared once. Effectively, for duplicate items in
a row, the predicted score for all but one of the items will be set to
-infinity
For example, suppose we have that following inputs:
logits_by_user: [[ 2, 3, 3],
[ 5, 4, 4]]
items_by_user: [[10, 20, 20],
[30, 40, 40]]
# Note: items_by_user is not explicitly present. Instead the relevant \
information is contained within `duplicate_mask`
top_k: 2
Then with match_mlperf=True, the HR would be 2/2 = 1.0. With
match_mlperf=False, the HR would be 1/2 = 0.5. This is because each user has
predicted scores for only 2 unique items: 10 and 20 for the first user, and 30
and 40 for the second. Therefore, with match_mlperf=True, it's guaranteed the
first item's score is in the top 2. With match_mlperf=False, this function
would compute the first user's first item is not in the top 2, because item 20
has a higher score, and item 20 occurs twice.
Args:
logits: A tensor containing the predicted logits for each user. The shape
of logits is (num_users_per_batch * (1 + NUM_EVAL_NEGATIVES),) Logits
for a user are grouped, and the last element of the group is the true
element.
softmax_logits: The same tensor, but with zeros left-appended.
duplicate_mask: A vector with the same shape as logits, with a value of 1
if the item corresponding to the logit at that position has already
appeared for that user.
num_training_neg: The number of negatives per positive during training.
match_mlperf: Use the MLPerf reference convention for computing rank.
use_tpu_spec: Should a TPUEstimatorSpec be returned instead of an
EstimatorSpec. Required for TPUs and if XLA is done on a GPU. Despite its
name, TPUEstimatorSpecs work with GPUs
Returns:
An EstimatorSpec for evaluation.
"""
in_top_k, ndcg, metric_weights, logits_by_user = compute_top_k_and_ndcg(
logits, duplicate_mask, match_mlperf)
# Examples are provided by the eval Dataset in a structured format, so eval
# labels can be reconstructed on the fly.
eval_labels = tf.reshape(shape=(-1,), tensor=tf.one_hot(
tf.zeros(shape=(logits_by_user.shape[0],), dtype=tf.int32) +
rconst.NUM_EVAL_NEGATIVES, logits_by_user.shape[1], dtype=tf.int32))
eval_labels_float = tf.cast(eval_labels, tf.float32)
# During evaluation, the ratio of negatives to positives is much higher
# than during training. (Typically 999 to 1 vs. 4 to 1) By adjusting the
# weights for the negative examples we compute a loss which is consistent with
# the training data. (And provides apples-to-apples comparison)
negative_scale_factor = num_training_neg / rconst.NUM_EVAL_NEGATIVES
example_weights = (
(eval_labels_float + (1 - eval_labels_float) * negative_scale_factor) *
(1 + rconst.NUM_EVAL_NEGATIVES) / (1 + num_training_neg))
# Tile metric weights back to logit dimensions
expanded_metric_weights = tf.reshape(tf.tile(
metric_weights[:, tf.newaxis], (1, rconst.NUM_EVAL_NEGATIVES + 1)), (-1,))
# ignore padded examples
example_weights *= tf.cast(expanded_metric_weights, tf.float32)
cross_entropy = tf.losses.sparse_softmax_cross_entropy(
logits=softmax_logits, labels=eval_labels, weights=example_weights)
def metric_fn(top_k_tensor, ndcg_tensor, weight_tensor):
return {
rconst.HR_KEY: tf.metrics.mean(top_k_tensor, weights=weight_tensor,
name=rconst.HR_METRIC_NAME),
rconst.NDCG_KEY: tf.metrics.mean(ndcg_tensor, weights=weight_tensor,
name=rconst.NDCG_METRIC_NAME),
}
if use_tpu_spec:
return tf.contrib.tpu.TPUEstimatorSpec(
mode=tf.estimator.ModeKeys.EVAL, loss=cross_entropy,
eval_metrics=(metric_fn, [in_top_k, ndcg, metric_weights]))
return tf.estimator.EstimatorSpec(
mode=tf.estimator.ModeKeys.EVAL,
loss=cross_entropy,
eval_metric_ops=metric_fn(in_top_k, ndcg, metric_weights)
)
def compute_top_k_and_ndcg(logits, # type: tf.Tensor
duplicate_mask, # type: tf.Tensor
match_mlperf=False # type: bool
):
"""Compute inputs of metric calculation.
Args:
logits: A tensor containing the predicted logits for each user. The shape
of logits is (num_users_per_batch * (1 + NUM_EVAL_NEGATIVES),) Logits
for a user are grouped, and the first element of the group is the true
element.
duplicate_mask: A vector with the same shape as logits, with a value of 1
if the item corresponding to the logit at that position has already
appeared for that user.
match_mlperf: Use the MLPerf reference convention for computing rank.
Returns:
is_top_k, ndcg and weights, all of which has size (num_users_in_batch,), and
logits_by_user which has size
(num_users_in_batch, (rconst.NUM_EVAL_NEGATIVES + 1)).
"""
logits_by_user = tf.reshape(logits, (-1, rconst.NUM_EVAL_NEGATIVES + 1))
duplicate_mask_by_user = tf.reshape(duplicate_mask,
(-1, rconst.NUM_EVAL_NEGATIVES + 1))
if match_mlperf:
# Set duplicate logits to the min value for that dtype. The MLPerf
# reference dedupes during evaluation.
logits_by_user *= (1 - duplicate_mask_by_user)
logits_by_user += duplicate_mask_by_user * logits_by_user.dtype.min
# Determine the location of the first element in each row after the elements
# are sorted.
sort_indices = tf.contrib.framework.argsort(
logits_by_user, axis=1, direction="DESCENDING")
# Use matrix multiplication to extract the position of the true item from the
# tensor of sorted indices. This approach is chosen because both GPUs and TPUs
# perform matrix multiplications very quickly. This is similar to np.argwhere.
# However this is a special case because the target will only appear in
# sort_indices once.
one_hot_position = tf.cast(tf.equal(sort_indices, rconst.NUM_EVAL_NEGATIVES),
tf.int32)
sparse_positions = tf.multiply(
one_hot_position, tf.range(logits_by_user.shape[1])[tf.newaxis, :])
position_vector = tf.reduce_sum(sparse_positions, axis=1)
in_top_k = tf.cast(tf.less(position_vector, rconst.TOP_K), tf.float32)
ndcg = tf.log(2.) / tf.log(tf.cast(position_vector, tf.float32) + 2)
ndcg *= in_top_k
# If a row is a padded row, all but the first element will be a duplicate.
metric_weights = tf.not_equal(tf.reduce_sum(duplicate_mask_by_user, axis=1),
rconst.NUM_EVAL_NEGATIVES)
return in_top_k, ndcg, metric_weights, logits_by_user
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper file for running the async data generation process in OSS."""
import contextlib
import multiprocessing
import multiprocessing.pool
def get_forkpool(num_workers, init_worker=None, closing=True):
pool = multiprocessing.Pool(processes=num_workers, initializer=init_worker)
return contextlib.closing(pool) if closing else pool
def get_threadpool(num_workers, init_worker=None, closing=True):
pool = multiprocessing.pool.ThreadPool(processes=num_workers,
initializer=init_worker)
return contextlib.closing(pool) if closing else pool
class FauxPool(object):
"""Mimic a pool using for loops.
This class is used in place of proper pools when true determinism is desired
for testing or debugging.
"""
def __init__(self, *args, **kwargs):
pass
def map(self, func, iterable, chunksize=None):
return [func(i) for i in iterable]
def imap(self, func, iterable, chunksize=1):
for i in iterable:
yield func(i)
def close(self):
pass
def terminate(self):
pass
def join(self):
pass
def get_fauxpool(num_workers, init_worker=None, closing=True):
pool = FauxPool(processes=num_workers, initializer=init_worker)
return contextlib.closing(pool) if closing else pool
#!/bin/bash
set -e
if [ `id -u` != 0 ]; then
echo "Calling sudo to gain root for this shell. (Needed to clear caches.)"
sudo echo "Success"
fi
SCRIPT_DIR=`dirname "$BASH_SOURCE"`
export PYTHONPATH="${SCRIPT_DIR}/../../"
DATASET="ml-20m"
BUCKET=${BUCKET:-""}
ROOT_DIR="${BUCKET:-/tmp}/MLPerf_NCF"
echo "Root directory: ${ROOT_DIR}"
if [[ -z ${BUCKET} ]]; then
LOCAL_ROOT=${ROOT_DIR}
else
LOCAL_ROOT="/tmp/MLPerf_NCF"
mkdir -p ${LOCAL_ROOT}
echo "Local root (for files which cannot use GCS): ${LOCAL_ROOT}"
fi
DATE=$(date '+%Y-%m-%d_%H:%M:%S')
TEST_DIR="${ROOT_DIR}/${DATE}"
LOCAL_TEST_DIR="${LOCAL_ROOT}/${DATE}"
mkdir -p ${LOCAL_TEST_DIR}
TPU=${TPU:-""}
if [[ -z ${TPU} ]]; then
DEVICE_FLAG="--num_gpus -1" # --use_xla_for_gpu"
else
DEVICE_FLAG="--tpu ${TPU} --num_gpus 0"
fi
DATA_DIR="${ROOT_DIR}/movielens_data"
python "${SCRIPT_DIR}/../datasets/movielens.py" --data_dir ${DATA_DIR} --dataset ${DATASET}
{
for i in `seq 0 4`;
do
START_TIME=$(date +%s)
MODEL_DIR="${TEST_DIR}/model_dir_${i}"
RUN_LOG="${LOCAL_TEST_DIR}/run_${i}.log"
export COMPLIANCE_FILE="${LOCAL_TEST_DIR}/run_${i}_compliance_raw.log"
export STITCHED_COMPLIANCE_FILE="${LOCAL_TEST_DIR}/run_${i}_compliance_submission.log"
echo ""
echo "Beginning run ${i}"
echo " Complete output logs are in ${RUN_LOG}"
echo " Compliance logs: (submission log is created after run.)"
echo " ${COMPLIANCE_FILE}"
echo " ${STITCHED_COMPLIANCE_FILE}"
# To reduce variation set the seed flag:
# --seed ${i}
python -u "${SCRIPT_DIR}/ncf_main.py" \
--model_dir ${MODEL_DIR} \
--data_dir ${DATA_DIR} \
--dataset ${DATASET} --hooks "" \
${DEVICE_FLAG} \
--clean \
--train_epochs 14 \
--batch_size 98304 \
--eval_batch_size 160000 \
--learning_rate 0.00382059 \
--beta1 0.783529 \
--beta2 0.909003 \
--epsilon 1.45439e-07 \
--layers 256,256,128,64 --num_factors 64 \
--hr_threshold 0.635 \
--ml_perf \
|& tee ${RUN_LOG} \
| grep --line-buffered -E --regexp="(Iteration [0-9]+: HR = [0-9\.]+, NDCG = [0-9\.]+, Loss = [0-9\.]+)|(pipeline_hash)|(MLPerf time:)"
END_TIME=$(date +%s)
echo "Run ${i} complete: $(( $END_TIME - $START_TIME )) seconds."
# Don't fill up the local hard drive.
if [[ -z ${BUCKET} ]]; then
echo "Removing model directory to save space."
rm -r ${MODEL_DIR}
fi
done
} |& tee "${LOCAL_TEST_DIR}/summary.log"
#!/bin/bash
set -e
# Example settings:
# export TPU="taylorrobie-tpu-0"
# export BUCKET="gs://taylorrobie-tpu-test-bucket-2"
# Remove IDE "not assigned" warning highlights.
TPU=${TPU:-""}
BUCKET=${BUCKET:-""}
if [[ -z ${TPU} ]]; then
echo "Please set 'TPU' to the name of the TPU to be used."
exit 1
fi
if [[ -z ${BUCKET} ]]; then
echo "Please set 'BUCKET' to the GCS bucket to be used."
exit 1
fi
./run.sh
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Statistics utility functions of NCF."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
def random_int32():
return np.random.randint(low=0, high=np.iinfo(np.int32).max, dtype=np.int32)
def permutation(args):
"""Fork safe permutation function.
This function can be called within a multiprocessing worker and give
appropriately random results.
Args:
args: A size two tuple that will unpacked into the size of the permutation
and the random seed. This form is used because starmap is not universally
available.
returns:
A NumPy array containing a random permutation.
"""
x, seed = args
# If seed is None NumPy will seed randomly.
state = np.random.RandomState(seed=seed) # pylint: disable=no-member
output = np.arange(x, dtype=np.int32)
state.shuffle(output)
return output
def very_slightly_biased_randint(max_val_vector):
sample_dtype = np.uint64
out_dtype = max_val_vector.dtype
samples = np.random.randint(low=0, high=np.iinfo(sample_dtype).max,
size=max_val_vector.shape, dtype=sample_dtype)
return np.mod(samples, max_val_vector.astype(sample_dtype)).astype(out_dtype)
def mask_duplicates(x, axis=1): # type: (np.ndarray, int) -> np.ndarray
"""Identify duplicates from sampling with replacement.
Args:
x: A 2D NumPy array of samples
axis: The axis along which to de-dupe.
Returns:
A NumPy array with the same shape as x with one if an element appeared
previously along axis 1, else zero.
"""
if axis != 1:
raise NotImplementedError
x_sort_ind = np.argsort(x, axis=1, kind="mergesort")
sorted_x = x[np.arange(x.shape[0])[:, np.newaxis], x_sort_ind]
# compute the indices needed to map values back to their original position.
inv_x_sort_ind = np.argsort(x_sort_ind, axis=1, kind="mergesort")
# Compute the difference of adjacent sorted elements.
diffs = sorted_x[:, :-1] - sorted_x[:, 1:]
# We are only interested in whether an element is zero. Therefore left padding
# with ones to restore the original shape is sufficient.
diffs = np.concatenate(
[np.ones((diffs.shape[0], 1), dtype=diffs.dtype), diffs], axis=1)
# Duplicate values will have a difference of zero. By definition the first
# element is never a duplicate.
return np.where(diffs[np.arange(x.shape[0])[:, np.newaxis],
inv_x_sort_ind], 0, 1)
google-api-python-client>=1.6.7
google-cloud-bigquery>=0.31.0
kaggle>=1.3.9
mlperf_compliance==0.0.10
numpy>=1.15.4
oauth2client>=4.1.2
pandas>=0.22.0
psutil>=5.4.3
py-cpuinfo>=3.3.0
scipy>=0.19.1
typing
# ResNet in TensorFlow
Deep residual networks, or ResNets for short, provided the breakthrough idea of
identity mappings in order to enable training of very deep convolutional neural
networks. This folder contains an implementation of ResNet for the ImageNet
dataset written in TensorFlow.
See the following papers for more background:
[1] [Deep Residual Learning for Image Recognition](https://arxiv.org/pdf/1512.03385.pdf) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Dec 2015.
[2] [Identity Mappings in Deep Residual Networks](https://arxiv.org/pdf/1603.05027.pdf) by Kaiming He, Xiangyu Zhang, Shaoqing Ren, and Jian Sun, Jul 2016.
In code, v1 refers to the ResNet defined in [1] but where a stride 2 is used on
the 3x3 conv rather than the first 1x1 in the bottleneck. This change results
in higher and more stable accuracy with less epochs than the original v1 and has
shown to scale to higher batch sizes with minimal degradation in accuracy.
There is no originating paper. The first mention we are aware of was in the
torch version of [ResNetv1](https://github.com/facebook/fb.resnet.torch). Most
popular v1 implementations are this implementation which we call ResNetv1.5.
In testing we found v1.5 requires ~12% more compute to train and has 6% reduced
throughput for inference compared to ResNetv1. CIFAR-10 ResNet does not use the
bottleneck and is thus the same for v1 as v1.5.
v2 refers to [2]. The principle difference between the two versions is that v1
applies batch normalization and activation after convolution, while v2 applies
batch normalization, then activation, and finally convolution. A schematic
comparison is presented in Figure 1 (left) of [2].
Please proceed according to which dataset you would like to train/evaluate on:
## CIFAR-10
### Setup
You simply need to have the latest version of TensorFlow installed.
First make sure you've [added the models folder to your Python path](/official/#running-the-models); otherwise you may encounter an error like `ImportError: No module named official.resnet`.
Then download and extract the CIFAR-10 data from Alex's website, specifying the location with the `--data_dir` flag. Run the following:
```bash
python cifar10_download_and_extract.py
# Then to train the model, run the following:
python cifar10_main.py
```
Use `--data_dir` to specify the location of the CIFAR-10 data used in the previous step. There are more flag options as described in `cifar10_main.py`.
## ImageNet
### Setup
To begin, you will need to download the ImageNet dataset and convert it to
TFRecord format. The following [script](https://github.com/tensorflow/tpu/blob/master/tools/datasets/imagenet_to_gcs.py)
and [README](https://github.com/tensorflow/tpu/tree/master/tools/datasets#imagenet_to_gcspy)
provide a few options.
Once your dataset is ready, you can begin training the model as follows:
```bash
python imagenet_main.py --data_dir=/path/to/imagenet
```
The model will begin training and will automatically evaluate itself on the
validation data roughly once per epoch.
Note that there are a number of other options you can specify, including
`--model_dir` to choose where to store the model and `--resnet_size` to choose
the model size (options include ResNet-18 through ResNet-200). See
[`resnet_run_loop.py`](resnet_run_loop.py) for the full list of options.
## Compute Devices
Training is accomplished using the DistributionStrategies API. (https://github.com/tensorflow/tensorflow/blob/master/tensorflow/contrib/distribute/README.md)
The appropriate distribution strategy is chosen based on the `--num_gpus` flag.
By default this flag is one if TensorFlow is compiled with CUDA, and zero
otherwise.
num_gpus:
+ 0: Use OneDeviceStrategy and train on CPU.
+ 1: Use OneDeviceStrategy and train on GPU.
+ 2+: Use MirroredStrategy (data parallelism) to distribute a batch between devices.
### Pre-trained model
You can download pre-trained versions of ResNet-50. Reported accuracies are top-1 single-crop accuracy for the ImageNet validation set.
Models are reported as both checkpoints produced by Estimator during training, and as SavedModels which are more portable. Checkpoints are fragile,
and these are not guaranteed to work with future versions of the code. Both ResNet v1
and ResNet v2 have been trained in both fp16 and fp32 precision. (Here v1 refers to "v1.5". See the note above.) Furthermore, SavedModels
are generated to accept either tensor or JPG inputs, and with channels_first (NCHW) and channels_last (NHWC) convolutions. NCHW is generally
better for GPUs, while NHWC is generally better for CPUs. See the TensorFlow [performance guide](https://www.tensorflow.org/performance/performance_guide#data_formats)
for more details.
ResNet-50 v2 (fp32, Accuracy 76.47%):
* [Checkpoint](http://download.tensorflow.org/models/official/20181001_resnet/checkpoints/resnet_imagenet_v2_fp32_20181001.tar.gz)
* SavedModel [(NCHW)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NCHW.tar.gz),
[(NCHW, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NCHW_jpg.tar.gz),
[(NHWC)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NHWC.tar.gz),
[(NHWC, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp32_savedmodel_NHWC_jpg.tar.gz)
ResNet-50 v2 (fp16, Accuracy 76.56%):
* [Checkpoint](http://download.tensorflow.org/models/official/20181001_resnet/checkpoints/resnet_imagenet_v2_fp16_20180928.tar.gz)
* SavedModel [(NCHW)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp16_savedmodel_NCHW.tar.gz),
[(NCHW, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp16_savedmodel_NCHW_jpg.tar.gz),
[(NHWC)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp16_savedmodel_NHWC.tar.gz),
[(NHWC, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v2_fp16_savedmodel_NHWC_jpg.tar.gz)
ResNet-50 v1 (fp32, Accuracy 76.53%):
* [Checkpoint](http://download.tensorflow.org/models/official/20181001_resnet/checkpoints/resnet_imagenet_v1_fp32_20181001.tar.gz)
* SavedModel [(NCHW)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW.tar.gz),
[(NCHW, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NCHW_jpg.tar.gz),
[(NHWC)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NHWC.tar.gz),
[(NHWC, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp32_savedmodel_NHWC_jpg.tar.gz)
ResNet-50 v1 (fp16, Accuracy 76.18%):
* [Checkpoint](http://download.tensorflow.org/models/official/20181001_resnet/checkpoints/resnet_imagenet_v1_fp16_20181001.tar.gz)
* SavedModel [(NCHW)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp16_savedmodel_NCHW.tar.gz),
[(NCHW, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp16_savedmodel_NCHW_jpg.tar.gz),
[(NHWC)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp16_savedmodel_NHWC.tar.gz),
[(NHWC, JPG)](http://download.tensorflow.org/models/official/20181001_resnet/savedmodels/resnet_v1_fp16_savedmodel_NHWC_jpg.tar.gz)
### Transfer Learning
You can use a pretrained model to initialize a training process. In addition you are able to freeze all but the final fully connected layers to fine tune your model. Transfer Learning is useful when training on your own small datasets. For a brief look at transfer learning in the context of convolutional neural networks, we recommend reading these [short notes](http://cs231n.github.io/transfer-learning/).
To fine tune a pretrained resnet you must make three changes to your training procedure:
1) Build the exact same model as previously except we change the number of labels in the final classification layer.
2) Restore all weights from the pre-trained resnet except for the final classification layer; this will get randomly initialized instead.
3) Freeze earlier layers of the network
We can perform these three operations by specifying two flags: ```--pretrained_model_checkpoint_path``` and ```--fine_tune```. The first flag is a string that points to the path of a pre-trained resnet model. If this flag is specified, it will load all but the final classification layer. A key thing to note: if both ```--pretrained_model_checkpoint_path``` and a non empty ```model_dir``` directory are passed, the tensorflow estimator will load only the ```model_dir```. For more on this please see [WarmStartSettings](https://www.tensorflow.org/versions/master/api_docs/python/tf/estimator/WarmStartSettings) and [Estimators](https://www.tensorflow.org/guide/estimators).
The second flag ```--fine_tune``` is a boolean that indicates whether earlier layers of the network should be frozen. You may set this flag to false if you wish to continue training a pre-trained model from a checkpoint. If you set this flag to true, you can train a new classification layer from scratch.
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the CIFAR-10 dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import app as absl_app
from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet import resnet_model
from official.resnet import resnet_run_loop
from official.utils.flags import core as flags_core
from official.utils.logs import logger
HEIGHT = 32
WIDTH = 32
NUM_CHANNELS = 3
_DEFAULT_IMAGE_BYTES = HEIGHT * WIDTH * NUM_CHANNELS
# The record is the image plus a one-byte label
_RECORD_BYTES = _DEFAULT_IMAGE_BYTES + 1
NUM_CLASSES = 10
_NUM_DATA_FILES = 5
# TODO(tobyboyd): Change to best practice 45K(train)/5K(val)/10K(test) splits.
NUM_IMAGES = {
'train': 50000,
'validation': 10000,
}
DATASET_NAME = 'CIFAR-10'
###############################################################################
# Data processing
###############################################################################
def get_filenames(is_training, data_dir):
"""Returns a list of filenames."""
assert tf.gfile.Exists(data_dir), (
'Run cifar10_download_and_extract.py first to download and extract the '
'CIFAR-10 data.')
if is_training:
return [
os.path.join(data_dir, 'data_batch_%d.bin' % i)
for i in range(1, _NUM_DATA_FILES + 1)
]
else:
return [os.path.join(data_dir, 'test_batch.bin')]
def parse_record(raw_record, is_training, dtype):
"""Parse CIFAR-10 image and label from a raw record."""
# Convert bytes to a vector of uint8 that is record_bytes long.
record_vector = tf.decode_raw(raw_record, tf.uint8)
# The first byte represents the label, which we convert from uint8 to int32
# and then to one-hot.
label = tf.cast(record_vector[0], tf.int32)
# The remaining bytes after the label represent the image, which we reshape
# from [depth * height * width] to [depth, height, width].
depth_major = tf.reshape(record_vector[1:_RECORD_BYTES],
[NUM_CHANNELS, HEIGHT, WIDTH])
# Convert from [depth, height, width] to [height, width, depth], and cast as
# float32.
image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
image = preprocess_image(image, is_training)
image = tf.cast(image, dtype)
return image, label
def preprocess_image(image, is_training):
"""Preprocess a single image of layout [height, width, depth]."""
if is_training:
# Resize the image to add four extra pixels on each side.
image = tf.image.resize_image_with_crop_or_pad(
image, HEIGHT + 8, WIDTH + 8)
# Randomly crop a [HEIGHT, WIDTH] section of the image.
image = tf.random_crop(image, [HEIGHT, WIDTH, NUM_CHANNELS])
# Randomly flip the image horizontally.
image = tf.image.random_flip_left_right(image)
# Subtract off the mean and divide by the variance of the pixels.
image = tf.image.per_image_standardization(image)
return image
def input_fn(is_training, data_dir, batch_size, num_epochs=1,
dtype=tf.float32, datasets_num_private_threads=None,
num_parallel_batches=1, parse_record_fn=parse_record):
"""Input function which provides batches for train or eval.
Args:
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
num_epochs: The number of epochs to repeat the dataset.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
num_parallel_batches: Number of parallel batches for tf.data.
parse_record_fn: Function to use for parsing the records.
Returns:
A dataset that can be used for iteration.
"""
filenames = get_filenames(is_training, data_dir)
dataset = tf.data.FixedLengthRecordDataset(filenames, _RECORD_BYTES)
return resnet_run_loop.process_record_dataset(
dataset=dataset,
is_training=is_training,
batch_size=batch_size,
shuffle_buffer=NUM_IMAGES['train'],
parse_record_fn=parse_record_fn,
num_epochs=num_epochs,
dtype=dtype,
datasets_num_private_threads=datasets_num_private_threads,
num_parallel_batches=num_parallel_batches
)
def get_synth_input_fn(dtype):
return resnet_run_loop.get_synth_input_fn(
HEIGHT, WIDTH, NUM_CHANNELS, NUM_CLASSES, dtype=dtype)
###############################################################################
# Running the model
###############################################################################
class Cifar10Model(resnet_model.Model):
"""Model class with appropriate defaults for CIFAR-10 data."""
def __init__(self, resnet_size, data_format=None, num_classes=NUM_CLASSES,
resnet_version=resnet_model.DEFAULT_VERSION,
dtype=resnet_model.DEFAULT_DTYPE):
"""These are the parameters that work for CIFAR-10 data.
Args:
resnet_size: The number of convolutional layers needed in the model.
data_format: Either 'channels_first' or 'channels_last', specifying which
data format to use when setting up the model.
num_classes: The number of output classes needed from the model. This
enables users to extend the same model to their own datasets.
resnet_version: Integer representing which version of the ResNet network
to use. See README for details. Valid values: [1, 2]
dtype: The TensorFlow dtype to use for calculations.
Raises:
ValueError: if invalid resnet_size is chosen
"""
if resnet_size % 6 != 2:
raise ValueError('resnet_size must be 6n + 2:', resnet_size)
num_blocks = (resnet_size - 2) // 6
super(Cifar10Model, self).__init__(
resnet_size=resnet_size,
bottleneck=False,
num_classes=num_classes,
num_filters=16,
kernel_size=3,
conv_stride=1,
first_pool_size=None,
first_pool_stride=None,
block_sizes=[num_blocks] * 3,
block_strides=[1, 2, 2],
resnet_version=resnet_version,
data_format=data_format,
dtype=dtype
)
def cifar10_model_fn(features, labels, mode, params):
"""Model function for CIFAR-10."""
features = tf.reshape(features, [-1, HEIGHT, WIDTH, NUM_CHANNELS])
# Learning rate schedule follows arXiv:1512.03385 for ResNet-56 and under.
learning_rate_fn = resnet_run_loop.learning_rate_with_decay(
batch_size=params['batch_size'], batch_denom=128,
num_images=NUM_IMAGES['train'], boundary_epochs=[91, 136, 182],
decay_rates=[1, 0.1, 0.01, 0.001])
# Weight decay of 2e-4 diverges from 1e-4 decay used in the ResNet paper
# and seems more stable in testing. The difference was nominal for ResNet-56.
weight_decay = 2e-4
# Empirical testing showed that including batch_normalization variables
# in the calculation of regularized loss helped validation accuracy
# for the CIFAR-10 dataset, perhaps because the regularization prevents
# overfitting on the small data set. We therefore include all vars when
# regularizing and computing loss during training.
def loss_filter_fn(_):
return True
return resnet_run_loop.resnet_model_fn(
features=features,
labels=labels,
mode=mode,
model_class=Cifar10Model,
resnet_size=params['resnet_size'],
weight_decay=weight_decay,
learning_rate_fn=learning_rate_fn,
momentum=0.9,
data_format=params['data_format'],
resnet_version=params['resnet_version'],
loss_scale=params['loss_scale'],
loss_filter_fn=loss_filter_fn,
dtype=params['dtype'],
fine_tune=params['fine_tune']
)
def define_cifar_flags():
resnet_run_loop.define_resnet_flags()
flags.adopt_module_key_flags(resnet_run_loop)
flags_core.set_defaults(data_dir='/tmp/cifar10_data/cifar-10-batches-bin',
model_dir='/tmp/cifar10_model',
resnet_size='56',
train_epochs=182,
epochs_between_evals=10,
batch_size=128,
image_bytes_as_serving_input=False)
def run_cifar(flags_obj):
"""Run ResNet CIFAR-10 training and eval loop.
Args:
flags_obj: An object containing parsed flag values.
Returns:
Dictionary of results. Including final accuracy.
"""
if flags_obj.image_bytes_as_serving_input:
tf.logging.fatal('--image_bytes_as_serving_input cannot be set to True '
'for CIFAR. This flag is only applicable to ImageNet.')
return
input_function = (flags_obj.use_synthetic_data and
get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
input_fn)
result = resnet_run_loop.resnet_main(
flags_obj, cifar10_model_fn, input_function, DATASET_NAME,
shape=[HEIGHT, WIDTH, NUM_CHANNELS])
return result
def main(_):
with logger.benchmark_context(flags.FLAGS):
run_cifar(flags.FLAGS)
if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.INFO)
define_cifar_flags()
absl_app.run(main)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from tempfile import mkstemp
import numpy as np
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet import cifar10_main
from official.utils.testing import integration
tf.logging.set_verbosity(tf.logging.ERROR)
_BATCH_SIZE = 128
_HEIGHT = 32
_WIDTH = 32
_NUM_CHANNELS = 3
class BaseTest(tf.test.TestCase):
"""Tests for the Cifar10 version of Resnet.
"""
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(BaseTest, cls).setUpClass()
cifar10_main.define_cifar_flags()
def tearDown(self):
super(BaseTest, self).tearDown()
tf.gfile.DeleteRecursively(self.get_temp_dir())
def test_dataset_input_fn(self):
fake_data = bytearray()
fake_data.append(7)
for i in range(_NUM_CHANNELS):
for _ in range(_HEIGHT * _WIDTH):
fake_data.append(i)
_, filename = mkstemp(dir=self.get_temp_dir())
data_file = open(filename, 'wb')
data_file.write(fake_data)
data_file.close()
fake_dataset = tf.data.FixedLengthRecordDataset(
filename, cifar10_main._RECORD_BYTES) # pylint: disable=protected-access
fake_dataset = fake_dataset.map(
lambda val: cifar10_main.parse_record(val, False, tf.float32))
image, label = fake_dataset.make_one_shot_iterator().get_next()
self.assertAllEqual(label.shape, ())
self.assertAllEqual(image.shape, (_HEIGHT, _WIDTH, _NUM_CHANNELS))
with self.test_session() as sess:
image, label = sess.run([image, label])
self.assertEqual(label, 7)
for row in image:
for pixel in row:
self.assertAllClose(pixel, np.array([-1.225, 0., 1.225]), rtol=1e-3)
def cifar10_model_fn_helper(self, mode, resnet_version, dtype):
input_fn = cifar10_main.get_synth_input_fn(dtype)
dataset = input_fn(True, '', _BATCH_SIZE)
iterator = dataset.make_initializable_iterator()
features, labels = iterator.get_next()
spec = cifar10_main.cifar10_model_fn(
features, labels, mode, {
'dtype': dtype,
'resnet_size': 32,
'data_format': 'channels_last',
'batch_size': _BATCH_SIZE,
'resnet_version': resnet_version,
'loss_scale': 128 if dtype == tf.float16 else 1,
'fine_tune': False,
})
predictions = spec.predictions
self.assertAllEqual(predictions['probabilities'].shape,
(_BATCH_SIZE, 10))
self.assertEqual(predictions['probabilities'].dtype, tf.float32)
self.assertAllEqual(predictions['classes'].shape, (_BATCH_SIZE,))
self.assertEqual(predictions['classes'].dtype, tf.int64)
if mode != tf.estimator.ModeKeys.PREDICT:
loss = spec.loss
self.assertAllEqual(loss.shape, ())
self.assertEqual(loss.dtype, tf.float32)
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = spec.eval_metric_ops
self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ())
self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ())
self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32)
self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32)
def test_cifar10_model_fn_train_mode_v1(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=1,
dtype=tf.float32)
def test_cifar10_model_fn_trainmode__v2(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=2,
dtype=tf.float32)
def test_cifar10_model_fn_eval_mode_v1(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=1,
dtype=tf.float32)
def test_cifar10_model_fn_eval_mode_v2(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=2,
dtype=tf.float32)
def test_cifar10_model_fn_predict_mode_v1(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.PREDICT,
resnet_version=1, dtype=tf.float32)
def test_cifar10_model_fn_predict_mode_v2(self):
self.cifar10_model_fn_helper(tf.estimator.ModeKeys.PREDICT,
resnet_version=2, dtype=tf.float32)
def _test_cifar10model_shape(self, resnet_version):
batch_size = 135
num_classes = 246
model = cifar10_main.Cifar10Model(32, data_format='channels_last',
num_classes=num_classes,
resnet_version=resnet_version)
fake_input = tf.random_uniform([batch_size, _HEIGHT, _WIDTH, _NUM_CHANNELS])
output = model(fake_input, training=True)
self.assertAllEqual(output.shape, (batch_size, num_classes))
def test_cifar10model_shape_v1(self):
self._test_cifar10model_shape(resnet_version=1)
def test_cifar10model_shape_v2(self):
self._test_cifar10model_shape(resnet_version=2)
def test_cifar10_end_to_end_synthetic_v1(self):
integration.run_synthetic(
main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '1']
)
def test_cifar10_end_to_end_synthetic_v2(self):
integration.run_synthetic(
main=cifar10_main.run_cifar, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '2']
)
if __name__ == '__main__':
tf.test.main()
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Estimator benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import json
import time
import os
from absl import flags
from absl.testing import flagsaver
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet import cifar10_main as cifar_main
DATA_DIR = '/data/cifar10_data/cifar-10-batches-bin'
class EstimatorCifar10BenchmarkTests(tf.test.Benchmark):
"""Benchmarks and accuracy tests for Estimator ResNet56."""
local_flags = None
def __init__(self, output_dir=None):
self.output_dir = output_dir
def resnet56_1_gpu(self):
"""Test layers model with Estimator and distribution strategies."""
self._setup()
flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = DATA_DIR
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('resnet56_1_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def resnet56_fp16_1_gpu(self):
"""Test layers FP16 model with Estimator and distribution strategies."""
self._setup()
flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = DATA_DIR
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('resnet56_fp16_1_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp16'
self._run_and_report_benchmark()
def resnet56_2_gpu(self):
"""Test layers model with Estimator and dist_strat. 2 GPUs."""
self._setup()
flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = DATA_DIR
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('resnet56_2_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def resnet56_fp16_2_gpu(self):
"""Test layers FP16 model with Estimator and dist_strat. 2 GPUs."""
self._setup()
flags.FLAGS.num_gpus = 2
flags.FLAGS.data_dir = DATA_DIR
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 182
flags.FLAGS.model_dir = self._get_model_dir('resnet56_fp16_2_gpu')
flags.FLAGS.resnet_size = 56
flags.FLAGS.dtype = 'fp16'
self._run_and_report_benchmark()
def unit_test(self):
"""A lightweigth test that can finish quickly"""
self._setup()
flags.FLAGS.num_gpus = 1
flags.FLAGS.data_dir = DATA_DIR
flags.FLAGS.batch_size = 128
flags.FLAGS.train_epochs = 1
flags.FLAGS.model_dir = self._get_model_dir('resnet56_1_gpu')
flags.FLAGS.resnet_size = 8
flags.FLAGS.dtype = 'fp32'
self._run_and_report_benchmark()
def _run_and_report_benchmark(self):
start_time_sec = time.time()
stats = cifar_main.run_cifar(flags.FLAGS)
wall_time_sec = time.time() - start_time_sec
self.report_benchmark(
iters=stats['global_step'],
wall_time=wall_time_sec,
extras={
'accuracy':
self._json_description(stats['accuracy'].item(), priority=0),
'accuracy_top_5':
self._json_description(stats['accuracy_top_5'].item()),
})
def _json_description(self,
value,
priority=None,
min_value=None,
max_value=None):
"""Get a json-formatted string describing the attributes for a metric"""
attributes = {}
attributes['value'] = value
if priority:
attributes['priority'] = priority
if min_value:
attributes['min_value'] = min_value
if max_value:
attributes['max_value'] = max_value
if min_value or max_value:
succeeded = True
if min_value and value < min_value:
succeeded = False
if max_value and value > max_value:
succeeded = False
attributes['succeeded'] = succeeded
return json.dumps(attributes)
def _get_model_dir(self, folder_name):
return os.path.join(self.output_dir, folder_name)
def _setup(self):
tf.logging.set_verbosity(tf.logging.DEBUG)
if EstimatorCifar10BenchmarkTests.local_flags is None:
cifar_main.define_cifar_flags()
# Loads flags to get defaults to then override.
flags.FLAGS(['foo'])
saved_flag_values = flagsaver.save_flag_values()
EstimatorCifar10BenchmarkTests.local_flags = saved_flag_values
return
flagsaver.restore_flag_values(EstimatorCifar10BenchmarkTests.local_flags)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Runs a ResNet model on the ImageNet dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from absl import app as absl_app
from absl import flags
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.utils.flags import core as flags_core
from official.utils.logs import logger
from official.resnet import imagenet_preprocessing
from official.resnet import resnet_model
from official.resnet import resnet_run_loop
DEFAULT_IMAGE_SIZE = 224
NUM_CHANNELS = 3
NUM_CLASSES = 1001
NUM_IMAGES = {
'train': 1281167,
'validation': 50000,
}
_NUM_TRAIN_FILES = 1024
_SHUFFLE_BUFFER = 10000
DATASET_NAME = 'ImageNet'
###############################################################################
# Data processing
###############################################################################
def get_filenames(is_training, data_dir):
"""Return filenames for dataset."""
if is_training:
return [
os.path.join(data_dir, 'train-%05d-of-01024' % i)
for i in range(_NUM_TRAIN_FILES)]
else:
return [
os.path.join(data_dir, 'validation-%05d-of-00128' % i)
for i in range(128)]
def _parse_example_proto(example_serialized):
"""Parses an Example proto containing a training example of an image.
The output of the build_image_data.py image preprocessing script is a dataset
containing serialized Example protocol buffers. Each Example proto contains
the following fields (values are included as examples):
image/height: 462
image/width: 581
image/colorspace: 'RGB'
image/channels: 3
image/class/label: 615
image/class/synset: 'n03623198'
image/class/text: 'knee pad'
image/object/bbox/xmin: 0.1
image/object/bbox/xmax: 0.9
image/object/bbox/ymin: 0.2
image/object/bbox/ymax: 0.6
image/object/bbox/label: 615
image/format: 'JPEG'
image/filename: 'ILSVRC2012_val_00041207.JPEG'
image/encoded: <JPEG encoded string>
Args:
example_serialized: scalar Tensor tf.string containing a serialized
Example protocol buffer.
Returns:
image_buffer: Tensor tf.string containing the contents of a JPEG file.
label: Tensor tf.int32 containing the label.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
"""
# Dense features in Example proto.
feature_map = {
'image/encoded': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
'image/class/label': tf.FixedLenFeature([], dtype=tf.int64,
default_value=-1),
'image/class/text': tf.FixedLenFeature([], dtype=tf.string,
default_value=''),
}
sparse_float32 = tf.VarLenFeature(dtype=tf.float32)
# Sparse features in Example proto.
feature_map.update(
{k: sparse_float32 for k in ['image/object/bbox/xmin',
'image/object/bbox/ymin',
'image/object/bbox/xmax',
'image/object/bbox/ymax']})
features = tf.parse_single_example(example_serialized, feature_map)
label = tf.cast(features['image/class/label'], dtype=tf.int32)
xmin = tf.expand_dims(features['image/object/bbox/xmin'].values, 0)
ymin = tf.expand_dims(features['image/object/bbox/ymin'].values, 0)
xmax = tf.expand_dims(features['image/object/bbox/xmax'].values, 0)
ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
# Note that we impose an ordering of (y, x) just to make life difficult.
bbox = tf.concat([ymin, xmin, ymax, xmax], 0)
# Force the variable number of bounding boxes into the shape
# [1, num_boxes, coords].
bbox = tf.expand_dims(bbox, 0)
bbox = tf.transpose(bbox, [0, 2, 1])
return features['image/encoded'], label, bbox
def parse_record(raw_record, is_training, dtype):
"""Parses a record containing a training example of an image.
The input record is parsed into a label and image, and the image is passed
through preprocessing steps (cropping, flipping, and so on).
Args:
raw_record: scalar Tensor tf.string containing a serialized
Example protocol buffer.
is_training: A boolean denoting whether the input is for training.
dtype: data type to use for images/features.
Returns:
Tuple with processed image tensor and one-hot-encoded label tensor.
"""
image_buffer, label, bbox = _parse_example_proto(raw_record)
image = imagenet_preprocessing.preprocess_image(
image_buffer=image_buffer,
bbox=bbox,
output_height=DEFAULT_IMAGE_SIZE,
output_width=DEFAULT_IMAGE_SIZE,
num_channels=NUM_CHANNELS,
is_training=is_training)
image = tf.cast(image, dtype)
return image, label
def input_fn(is_training, data_dir, batch_size, num_epochs=1,
dtype=tf.float32, datasets_num_private_threads=None,
num_parallel_batches=1, parse_record_fn=parse_record):
"""Input function which provides batches for train or eval.
Args:
is_training: A boolean denoting whether the input is for training.
data_dir: The directory containing the input data.
batch_size: The number of samples per batch.
num_epochs: The number of epochs to repeat the dataset.
dtype: Data type to use for images/features
datasets_num_private_threads: Number of private threads for tf.data.
num_parallel_batches: Number of parallel batches for tf.data.
parse_record_fn: Function to use for parsing the records.
Returns:
A dataset that can be used for iteration.
"""
filenames = get_filenames(is_training, data_dir)
dataset = tf.data.Dataset.from_tensor_slices(filenames)
if is_training:
# Shuffle the input files
dataset = dataset.shuffle(buffer_size=_NUM_TRAIN_FILES)
# Convert to individual records.
# cycle_length = 10 means 10 files will be read and deserialized in parallel.
# This number is low enough to not cause too much contention on small systems
# but high enough to provide the benefits of parallelization. You may want
# to increase this number if you have a large number of CPU cores.
dataset = dataset.apply(tf.contrib.data.parallel_interleave(
tf.data.TFRecordDataset, cycle_length=10))
return resnet_run_loop.process_record_dataset(
dataset=dataset,
is_training=is_training,
batch_size=batch_size,
shuffle_buffer=_SHUFFLE_BUFFER,
parse_record_fn=parse_record_fn,
num_epochs=num_epochs,
dtype=dtype,
datasets_num_private_threads=datasets_num_private_threads,
num_parallel_batches=num_parallel_batches
)
def get_synth_input_fn(dtype):
return resnet_run_loop.get_synth_input_fn(
DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS, NUM_CLASSES,
dtype=dtype)
###############################################################################
# Running the model
###############################################################################
class ImagenetModel(resnet_model.Model):
"""Model class with appropriate defaults for Imagenet data."""
def __init__(self, resnet_size, data_format=None, num_classes=NUM_CLASSES,
resnet_version=resnet_model.DEFAULT_VERSION,
dtype=resnet_model.DEFAULT_DTYPE):
"""These are the parameters that work for Imagenet data.
Args:
resnet_size: The number of convolutional layers needed in the model.
data_format: Either 'channels_first' or 'channels_last', specifying which
data format to use when setting up the model.
num_classes: The number of output classes needed from the model. This
enables users to extend the same model to their own datasets.
resnet_version: Integer representing which version of the ResNet network
to use. See README for details. Valid values: [1, 2]
dtype: The TensorFlow dtype to use for calculations.
"""
# For bigger models, we want to use "bottleneck" layers
if resnet_size < 50:
bottleneck = False
else:
bottleneck = True
super(ImagenetModel, self).__init__(
resnet_size=resnet_size,
bottleneck=bottleneck,
num_classes=num_classes,
num_filters=64,
kernel_size=7,
conv_stride=2,
first_pool_size=3,
first_pool_stride=2,
block_sizes=_get_block_sizes(resnet_size),
block_strides=[1, 2, 2, 2],
resnet_version=resnet_version,
data_format=data_format,
dtype=dtype
)
def _get_block_sizes(resnet_size):
"""Retrieve the size of each block_layer in the ResNet model.
The number of block layers used for the Resnet model varies according
to the size of the model. This helper grabs the layer set we want, throwing
an error if a non-standard size has been selected.
Args:
resnet_size: The number of convolutional layers needed in the model.
Returns:
A list of block sizes to use in building the model.
Raises:
KeyError: if invalid resnet_size is received.
"""
choices = {
18: [2, 2, 2, 2],
34: [3, 4, 6, 3],
50: [3, 4, 6, 3],
101: [3, 4, 23, 3],
152: [3, 8, 36, 3],
200: [3, 24, 36, 3]
}
try:
return choices[resnet_size]
except KeyError:
err = ('Could not find layers for selected Resnet size.\n'
'Size received: {}; sizes allowed: {}.'.format(
resnet_size, choices.keys()))
raise ValueError(err)
def imagenet_model_fn(features, labels, mode, params):
"""Our model_fn for ResNet to be used with our Estimator."""
# Warmup and higher lr may not be valid for fine tuning with small batches
# and smaller numbers of training images.
if params['fine_tune']:
warmup = False
base_lr = .1
else:
warmup = True
base_lr = .128
learning_rate_fn = resnet_run_loop.learning_rate_with_decay(
batch_size=params['batch_size'], batch_denom=256,
num_images=NUM_IMAGES['train'], boundary_epochs=[30, 60, 80, 90],
decay_rates=[1, 0.1, 0.01, 0.001, 1e-4], warmup=warmup, base_lr=base_lr)
return resnet_run_loop.resnet_model_fn(
features=features,
labels=labels,
mode=mode,
model_class=ImagenetModel,
resnet_size=params['resnet_size'],
weight_decay=1e-4,
learning_rate_fn=learning_rate_fn,
momentum=0.9,
data_format=params['data_format'],
resnet_version=params['resnet_version'],
loss_scale=params['loss_scale'],
loss_filter_fn=None,
dtype=params['dtype'],
fine_tune=params['fine_tune']
)
def define_imagenet_flags():
resnet_run_loop.define_resnet_flags(
resnet_size_choices=['18', '34', '50', '101', '152', '200'])
flags.adopt_module_key_flags(resnet_run_loop)
flags_core.set_defaults(train_epochs=90)
def run_imagenet(flags_obj):
"""Run ResNet ImageNet training and eval loop.
Args:
flags_obj: An object containing parsed flag values.
"""
input_function = (flags_obj.use_synthetic_data and
get_synth_input_fn(flags_core.get_tf_dtype(flags_obj)) or
input_fn)
resnet_run_loop.resnet_main(
flags_obj, imagenet_model_fn, input_function, DATASET_NAME,
shape=[DEFAULT_IMAGE_SIZE, DEFAULT_IMAGE_SIZE, NUM_CHANNELS])
def main(_):
with logger.benchmark_context(flags.FLAGS):
run_imagenet(flags.FLAGS)
if __name__ == '__main__':
tf.logging.set_verbosity(tf.logging.INFO)
define_imagenet_flags()
absl_app.run(main)
# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides utilities to preprocess images.
Training images are sampled using the provided bounding boxes, and subsequently
cropped to the sampled bounding box. Images are additionally flipped randomly,
then resized to the target output size (without aspect-ratio preservation).
Images used during evaluation are resized (with aspect-ratio preservation) and
centrally cropped.
All images undergo mean color subtraction.
Note that these steps are colloquially referred to as "ResNet preprocessing,"
and they differ from "VGG preprocessing," which does not use bounding boxes
and instead does an aspect-preserving resize followed by random crop during
training. (These both differ from "Inception preprocessing," which introduces
color distortion steps.)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import tensorflow as tf
_R_MEAN = 123.68
_G_MEAN = 116.78
_B_MEAN = 103.94
_CHANNEL_MEANS = [_R_MEAN, _G_MEAN, _B_MEAN]
# The lower bound for the smallest side of the image for aspect-preserving
# resizing. For example, if an image is 500 x 1000, it will be resized to
# _RESIZE_MIN x (_RESIZE_MIN * 2).
_RESIZE_MIN = 256
def _decode_crop_and_flip(image_buffer, bbox, num_channels):
"""Crops the given image to a random part of the image, and randomly flips.
We use the fused decode_and_crop op, which performs better than the two ops
used separately in series, but note that this requires that the image be
passed in as an un-decoded string Tensor.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
num_channels: Integer depth of the image buffer for decoding.
Returns:
3-D tensor with cropped image.
"""
# A large fraction of image datasets contain a human-annotated bounding box
# delineating the region of the image containing the object of interest. We
# choose to create a new bounding box for the object which is a randomly
# distorted version of the human-annotated bounding box that obeys an
# allowed range of aspect ratios, sizes and overlap with the human-annotated
# bounding box. If no box is supplied, then we assume the bounding box is
# the entire image.
sample_distorted_bounding_box = tf.image.sample_distorted_bounding_box(
tf.image.extract_jpeg_shape(image_buffer),
bounding_boxes=bbox,
min_object_covered=0.1,
aspect_ratio_range=[0.75, 1.33],
area_range=[0.05, 1.0],
max_attempts=100,
use_image_if_no_bounding_boxes=True)
bbox_begin, bbox_size, _ = sample_distorted_bounding_box
# Reassemble the bounding box in the format the crop op requires.
offset_y, offset_x, _ = tf.unstack(bbox_begin)
target_height, target_width, _ = tf.unstack(bbox_size)
crop_window = tf.stack([offset_y, offset_x, target_height, target_width])
# Use the fused decode and crop op here, which is faster than each in series.
cropped = tf.image.decode_and_crop_jpeg(
image_buffer, crop_window, channels=num_channels)
# Flip to add a little more random distortion in.
cropped = tf.image.random_flip_left_right(cropped)
return cropped
def _central_crop(image, crop_height, crop_width):
"""Performs central crops of the given image list.
Args:
image: a 3-D image tensor
crop_height: the height of the image following the crop.
crop_width: the width of the image following the crop.
Returns:
3-D tensor with cropped image.
"""
shape = tf.shape(image)
height, width = shape[0], shape[1]
amount_to_be_cropped_h = (height - crop_height)
crop_top = amount_to_be_cropped_h // 2
amount_to_be_cropped_w = (width - crop_width)
crop_left = amount_to_be_cropped_w // 2
return tf.slice(
image, [crop_top, crop_left, 0], [crop_height, crop_width, -1])
def _mean_image_subtraction(image, means, num_channels):
"""Subtracts the given means from each image channel.
For example:
means = [123.68, 116.779, 103.939]
image = _mean_image_subtraction(image, means)
Note that the rank of `image` must be known.
Args:
image: a tensor of size [height, width, C].
means: a C-vector of values to subtract from each channel.
num_channels: number of color channels in the image that will be distorted.
Returns:
the centered image.
Raises:
ValueError: If the rank of `image` is unknown, if `image` has a rank other
than three or if the number of channels in `image` doesn't match the
number of values in `means`.
"""
if image.get_shape().ndims != 3:
raise ValueError('Input must be of size [height, width, C>0]')
if len(means) != num_channels:
raise ValueError('len(means) must match the number of channels')
# We have a 1-D tensor of means; convert to 3-D.
means = tf.expand_dims(tf.expand_dims(means, 0), 0)
return image - means
def _smallest_size_at_least(height, width, resize_min):
"""Computes new shape with the smallest side equal to `smallest_side`.
Computes new shape with the smallest side equal to `smallest_side` while
preserving the original aspect ratio.
Args:
height: an int32 scalar tensor indicating the current height.
width: an int32 scalar tensor indicating the current width.
resize_min: A python integer or scalar `Tensor` indicating the size of
the smallest side after resize.
Returns:
new_height: an int32 scalar tensor indicating the new height.
new_width: an int32 scalar tensor indicating the new width.
"""
resize_min = tf.cast(resize_min, tf.float32)
# Convert to floats to make subsequent calculations go smoothly.
height, width = tf.cast(height, tf.float32), tf.cast(width, tf.float32)
smaller_dim = tf.minimum(height, width)
scale_ratio = resize_min / smaller_dim
# Convert back to ints to make heights and widths that TF ops will accept.
new_height = tf.cast(height * scale_ratio, tf.int32)
new_width = tf.cast(width * scale_ratio, tf.int32)
return new_height, new_width
def _aspect_preserving_resize(image, resize_min):
"""Resize images preserving the original aspect ratio.
Args:
image: A 3-D image `Tensor`.
resize_min: A python integer or scalar `Tensor` indicating the size of
the smallest side after resize.
Returns:
resized_image: A 3-D tensor containing the resized image.
"""
shape = tf.shape(image)
height, width = shape[0], shape[1]
new_height, new_width = _smallest_size_at_least(height, width, resize_min)
return _resize_image(image, new_height, new_width)
def _resize_image(image, height, width):
"""Simple wrapper around tf.resize_images.
This is primarily to make sure we use the same `ResizeMethod` and other
details each time.
Args:
image: A 3-D image `Tensor`.
height: The target height for the resized image.
width: The target width for the resized image.
Returns:
resized_image: A 3-D tensor containing the resized image. The first two
dimensions have the shape [height, width].
"""
return tf.image.resize_images(
image, [height, width], method=tf.image.ResizeMethod.BILINEAR,
align_corners=False)
def preprocess_image(image_buffer, bbox, output_height, output_width,
num_channels, is_training=False):
"""Preprocesses the given image.
Preprocessing includes decoding, cropping, and resizing for both training
and eval images. Training preprocessing, however, introduces some random
distortion of the image to improve accuracy.
Args:
image_buffer: scalar string Tensor representing the raw JPEG image buffer.
bbox: 3-D float Tensor of bounding boxes arranged [1, num_boxes, coords]
where each coordinate is [0, 1) and the coordinates are arranged as
[ymin, xmin, ymax, xmax].
output_height: The height of the image after preprocessing.
output_width: The width of the image after preprocessing.
num_channels: Integer depth of the image buffer for decoding.
is_training: `True` if we're preprocessing the image for training and
`False` otherwise.
Returns:
A preprocessed image.
"""
if is_training:
# For training, we want to randomize some of the distortions.
image = _decode_crop_and_flip(image_buffer, bbox, num_channels)
image = _resize_image(image, output_height, output_width)
else:
# For validation, we want to decode, resize, then just crop the middle.
image = tf.image.decode_jpeg(image_buffer, channels=num_channels)
image = _aspect_preserving_resize(image, _RESIZE_MIN)
image = _central_crop(image, output_height, output_width)
image.set_shape([output_height, output_width, num_channels])
return _mean_image_subtraction(image, _CHANNEL_MEANS, num_channels)
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import unittest
import tensorflow as tf # pylint: disable=g-bad-import-order
from official.resnet import imagenet_main
from official.utils.testing import integration
tf.logging.set_verbosity(tf.logging.ERROR)
_BATCH_SIZE = 32
_LABEL_CLASSES = 1001
class BaseTest(tf.test.TestCase):
@classmethod
def setUpClass(cls): # pylint: disable=invalid-name
super(BaseTest, cls).setUpClass()
imagenet_main.define_imagenet_flags()
def tearDown(self):
super(BaseTest, self).tearDown()
tf.gfile.DeleteRecursively(self.get_temp_dir())
def _tensor_shapes_helper(self, resnet_size, resnet_version, dtype, with_gpu):
"""Checks the tensor shapes after each phase of the ResNet model."""
def reshape(shape):
"""Returns the expected dimensions depending on if a GPU is being used."""
# If a GPU is used for the test, the shape is returned (already in NCHW
# form). When GPU is not used, the shape is converted to NHWC.
if with_gpu:
return shape
return shape[0], shape[2], shape[3], shape[1]
graph = tf.Graph()
with graph.as_default(), self.test_session(
graph=graph, use_gpu=with_gpu, force_gpu=with_gpu):
model = imagenet_main.ImagenetModel(
resnet_size=resnet_size,
data_format='channels_first' if with_gpu else 'channels_last',
resnet_version=resnet_version,
dtype=dtype
)
inputs = tf.random_uniform([1, 224, 224, 3])
output = model(inputs, training=True)
initial_conv = graph.get_tensor_by_name('resnet_model/initial_conv:0')
max_pool = graph.get_tensor_by_name('resnet_model/initial_max_pool:0')
block_layer1 = graph.get_tensor_by_name('resnet_model/block_layer1:0')
block_layer2 = graph.get_tensor_by_name('resnet_model/block_layer2:0')
block_layer3 = graph.get_tensor_by_name('resnet_model/block_layer3:0')
block_layer4 = graph.get_tensor_by_name('resnet_model/block_layer4:0')
reduce_mean = graph.get_tensor_by_name('resnet_model/final_reduce_mean:0')
dense = graph.get_tensor_by_name('resnet_model/final_dense:0')
self.assertAllEqual(initial_conv.shape, reshape((1, 64, 112, 112)))
self.assertAllEqual(max_pool.shape, reshape((1, 64, 56, 56)))
# The number of channels after each block depends on whether we're
# using the building_block or the bottleneck_block.
if resnet_size < 50:
self.assertAllEqual(block_layer1.shape, reshape((1, 64, 56, 56)))
self.assertAllEqual(block_layer2.shape, reshape((1, 128, 28, 28)))
self.assertAllEqual(block_layer3.shape, reshape((1, 256, 14, 14)))
self.assertAllEqual(block_layer4.shape, reshape((1, 512, 7, 7)))
self.assertAllEqual(reduce_mean.shape, reshape((1, 512, 1, 1)))
else:
self.assertAllEqual(block_layer1.shape, reshape((1, 256, 56, 56)))
self.assertAllEqual(block_layer2.shape, reshape((1, 512, 28, 28)))
self.assertAllEqual(block_layer3.shape, reshape((1, 1024, 14, 14)))
self.assertAllEqual(block_layer4.shape, reshape((1, 2048, 7, 7)))
self.assertAllEqual(reduce_mean.shape, reshape((1, 2048, 1, 1)))
self.assertAllEqual(dense.shape, (1, _LABEL_CLASSES))
self.assertAllEqual(output.shape, (1, _LABEL_CLASSES))
def tensor_shapes_helper(self, resnet_size, resnet_version, with_gpu=False):
self._tensor_shapes_helper(resnet_size=resnet_size,
resnet_version=resnet_version,
dtype=tf.float32, with_gpu=with_gpu)
self._tensor_shapes_helper(resnet_size=resnet_size,
resnet_version=resnet_version,
dtype=tf.float16, with_gpu=with_gpu)
def test_tensor_shapes_resnet_18_v1(self):
self.tensor_shapes_helper(18, resnet_version=1)
def test_tensor_shapes_resnet_18_v2(self):
self.tensor_shapes_helper(18, resnet_version=2)
def test_tensor_shapes_resnet_34_v1(self):
self.tensor_shapes_helper(34, resnet_version=1)
def test_tensor_shapes_resnet_34_v2(self):
self.tensor_shapes_helper(34, resnet_version=2)
def test_tensor_shapes_resnet_50_v1(self):
self.tensor_shapes_helper(50, resnet_version=1)
def test_tensor_shapes_resnet_50_v2(self):
self.tensor_shapes_helper(50, resnet_version=2)
def test_tensor_shapes_resnet_101_v1(self):
self.tensor_shapes_helper(101, resnet_version=1)
def test_tensor_shapes_resnet_101_v2(self):
self.tensor_shapes_helper(101, resnet_version=2)
def test_tensor_shapes_resnet_152_v1(self):
self.tensor_shapes_helper(152, resnet_version=1)
def test_tensor_shapes_resnet_152_v2(self):
self.tensor_shapes_helper(152, resnet_version=2)
def test_tensor_shapes_resnet_200_v1(self):
self.tensor_shapes_helper(200, resnet_version=1)
def test_tensor_shapes_resnet_200_v2(self):
self.tensor_shapes_helper(200, resnet_version=2)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_18_with_gpu_v1(self):
self.tensor_shapes_helper(18, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_18_with_gpu_v2(self):
self.tensor_shapes_helper(18, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_34_with_gpu_v1(self):
self.tensor_shapes_helper(34, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_34_with_gpu_v2(self):
self.tensor_shapes_helper(34, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_50_with_gpu_v1(self):
self.tensor_shapes_helper(50, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_50_with_gpu_v2(self):
self.tensor_shapes_helper(50, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_101_with_gpu_v1(self):
self.tensor_shapes_helper(101, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_101_with_gpu_v2(self):
self.tensor_shapes_helper(101, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_152_with_gpu_v1(self):
self.tensor_shapes_helper(152, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_152_with_gpu_v2(self):
self.tensor_shapes_helper(152, resnet_version=2, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_200_with_gpu_v1(self):
self.tensor_shapes_helper(200, resnet_version=1, with_gpu=True)
@unittest.skipUnless(tf.test.is_built_with_cuda(), 'requires GPU')
def test_tensor_shapes_resnet_200_with_gpu_v2(self):
self.tensor_shapes_helper(200, resnet_version=2, with_gpu=True)
def resnet_model_fn_helper(self, mode, resnet_version, dtype):
"""Tests that the EstimatorSpec is given the appropriate arguments."""
tf.train.create_global_step()
input_fn = imagenet_main.get_synth_input_fn(dtype)
dataset = input_fn(True, '', _BATCH_SIZE)
iterator = dataset.make_initializable_iterator()
features, labels = iterator.get_next()
spec = imagenet_main.imagenet_model_fn(
features, labels, mode, {
'dtype': dtype,
'resnet_size': 50,
'data_format': 'channels_last',
'batch_size': _BATCH_SIZE,
'resnet_version': resnet_version,
'loss_scale': 128 if dtype == tf.float16 else 1,
'fine_tune': False,
})
predictions = spec.predictions
self.assertAllEqual(predictions['probabilities'].shape,
(_BATCH_SIZE, _LABEL_CLASSES))
self.assertEqual(predictions['probabilities'].dtype, tf.float32)
self.assertAllEqual(predictions['classes'].shape, (_BATCH_SIZE,))
self.assertEqual(predictions['classes'].dtype, tf.int64)
if mode != tf.estimator.ModeKeys.PREDICT:
loss = spec.loss
self.assertAllEqual(loss.shape, ())
self.assertEqual(loss.dtype, tf.float32)
if mode == tf.estimator.ModeKeys.EVAL:
eval_metric_ops = spec.eval_metric_ops
self.assertAllEqual(eval_metric_ops['accuracy'][0].shape, ())
self.assertAllEqual(eval_metric_ops['accuracy'][1].shape, ())
self.assertEqual(eval_metric_ops['accuracy'][0].dtype, tf.float32)
self.assertEqual(eval_metric_ops['accuracy'][1].dtype, tf.float32)
def test_resnet_model_fn_train_mode_v1(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=1,
dtype=tf.float32)
def test_resnet_model_fn_train_mode_v2(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.TRAIN, resnet_version=2,
dtype=tf.float32)
def test_resnet_model_fn_eval_mode_v1(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=1,
dtype=tf.float32)
def test_resnet_model_fn_eval_mode_v2(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.EVAL, resnet_version=2,
dtype=tf.float32)
def test_resnet_model_fn_predict_mode_v1(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.PREDICT, resnet_version=1,
dtype=tf.float32)
def test_resnet_model_fn_predict_mode_v2(self):
self.resnet_model_fn_helper(tf.estimator.ModeKeys.PREDICT, resnet_version=2,
dtype=tf.float32)
def _test_imagenetmodel_shape(self, resnet_version):
batch_size = 135
num_classes = 246
model = imagenet_main.ImagenetModel(
50, data_format='channels_last', num_classes=num_classes,
resnet_version=resnet_version)
fake_input = tf.random_uniform([batch_size, 224, 224, 3])
output = model(fake_input, training=True)
self.assertAllEqual(output.shape, (batch_size, num_classes))
def test_imagenetmodel_shape_v1(self):
self._test_imagenetmodel_shape(resnet_version=1)
def test_imagenetmodel_shape_v2(self):
self._test_imagenetmodel_shape(resnet_version=2)
def test_imagenet_end_to_end_synthetic_v1(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-v', '1']
)
def test_imagenet_end_to_end_synthetic_v2(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-v', '2']
)
def test_imagenet_end_to_end_synthetic_v1_tiny(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '1', '-resnet_size', '18']
)
def test_imagenet_end_to_end_synthetic_v2_tiny(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '2', '-resnet_size', '18']
)
def test_imagenet_end_to_end_synthetic_v1_huge(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '1', '-resnet_size', '200']
)
def test_imagenet_end_to_end_synthetic_v2_huge(self):
integration.run_synthetic(
main=imagenet_main.run_imagenet, tmp_root=self.get_temp_dir(),
extra_flags=['-resnet_version', '2', '-resnet_size', '200']
)
if __name__ == '__main__':
tf.test.main()
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Executes Keras benchmarks and accuracy tests."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import json
from absl import flags
from absl.testing import flagsaver
import tensorflow as tf # pylint: disable=g-bad-import-order
FLAGS = flags.FLAGS
class KerasBenchmark(tf.test.Benchmark):
"""Base benchmark class with methods to simplify testing."""
local_flags = None
def __init__(self, output_dir=None, default_flags=None, flag_methods=None):
self.output_dir = output_dir
self.default_flags = default_flags or {}
self.flag_methods = flag_methods or {}
def _get_model_dir(self, folder_name):
return os.path.join(self.output_dir, folder_name)
def _setup(self):
"""Sets up and resets flags before each test."""
tf.logging.set_verbosity(tf.logging.DEBUG)
if KerasBenchmark.local_flags is None:
for flag_method in self.flag_methods:
flag_method()
# Loads flags to get defaults to then override. List cannot be empty.
flags.FLAGS(['foo'])
# Overrides flag values with defaults for the class of tests.
for k, v in self.default_flags.items():
setattr(FLAGS, k, v)
saved_flag_values = flagsaver.save_flag_values()
KerasBenchmark.local_flags = saved_flag_values
else:
flagsaver.restore_flag_values(KerasBenchmark.local_flags)
def _report_benchmark(self,
stats,
wall_time_sec,
top_1_max=None,
top_1_min=None,
log_steps=None,
total_batch_size=None,
warmup=1):
"""Report benchmark results by writing to local protobuf file
Args:
stats: dict returned from keras models with known entries.
wall_time_sec: the during of the benchmark execution in seconds
top_1_max: highest passing level for top_1 accuracy.
top_1_min: lowest passing level for top_1 accuracy.
log_steps: How often the log was created for stats['step_timestamp_log'].
total_batch_size: Global batch-size.
warmup: number of entries in stats['step_timestamp_log'] to ignore.
"""
extras = {}
if 'accuracy_top_1' in stats:
extras['accuracy'] = self._json_description(
stats['accuracy_top_1'],
priority=0,
min_value=top_1_min,
max_value=top_1_max)
extras['top_1_train_accuracy'] = self._json_description(
stats['training_accuracy_top_1'], priority=1)
if (warmup and 'step_timestamp_log' in stats and
len(stats['step_timestamp_log']) > warmup):
# first entry in the time_log is start of step 1. The rest of the
# entries are the end of each step recorded
time_log = stats['step_timestamp_log']
elapsed = time_log[-1].timestamp - time_log[warmup].timestamp
num_examples = (
total_batch_size * log_steps * (len(time_log) - warmup - 1))
examples_per_sec = num_examples / elapsed
extras['exp_per_second'] = self._json_description(
examples_per_sec, priority=2)
if 'avg_exp_per_second' in stats:
extras['avg_exp_per_second'] = self._json_description(
stats['avg_exp_per_second'], priority=3)
self.report_benchmark(iters=-1, wall_time=wall_time_sec, extras=extras)
def _json_description(self,
value,
priority=None,
min_value=None,
max_value=None):
"""Get a json-formatted string describing the attributes for a metric"""
attributes = {}
attributes['value'] = value
if priority:
attributes['priority'] = priority
if min_value:
attributes['min_value'] = min_value
if max_value:
attributes['max_value'] = max_value
if min_value or max_value:
succeeded = True
if min_value and value < min_value:
succeeded = False
if max_value and value > max_value:
succeeded = False
attributes['succeeded'] = succeeded
return json.dumps(attributes)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment