"examples/vscode:/vscode.git/clone" did not exist on "dfd7eafbced75029555172515cb92daed48dd21a"
Commit 7c732da7 authored by Nimit Nigania's avatar Nimit Nigania
Browse files

Merge remote-tracking branch 'upstream/master'

parents cb8ce606 e36934b3
...@@ -152,7 +152,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -152,7 +152,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup() self._setup()
self.num_gpus = 1 self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad') FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_squad')
FLAGS.train_batch_size = 4 FLAGS.train_batch_size = 3
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -174,7 +174,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -174,7 +174,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup() self._setup()
self.num_gpus = 1 self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat_squad') FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_no_dist_strat_squad')
FLAGS.train_batch_size = 4 FLAGS.train_batch_size = 3
self._run_and_report_benchmark(use_ds=False) self._run_and_report_benchmark(use_ds=False)
...@@ -185,7 +185,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -185,7 +185,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self.num_gpus = 1 self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir( FLAGS.model_dir = self._get_model_dir(
'benchmark_1_gpu_eager_no_dist_strat_squad') 'benchmark_1_gpu_eager_no_dist_strat_squad')
FLAGS.train_batch_size = 4 FLAGS.train_batch_size = 3
self._run_and_report_benchmark(use_ds=False, run_eagerly=True) self._run_and_report_benchmark(use_ds=False, run_eagerly=True)
...@@ -195,7 +195,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -195,7 +195,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup() self._setup()
self.num_gpus = 2 self.num_gpus = 2
FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad') FLAGS.model_dir = self._get_model_dir('benchmark_2_gpu_squad')
FLAGS.train_batch_size = 8 FLAGS.train_batch_size = 6
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -205,7 +205,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -205,7 +205,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup() self._setup()
self.num_gpus = 4 self.num_gpus = 4
FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad') FLAGS.model_dir = self._get_model_dir('benchmark_4_gpu_squad')
FLAGS.train_batch_size = 16 FLAGS.train_batch_size = 12
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -215,7 +215,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -215,7 +215,7 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._setup() self._setup()
self.num_gpus = 8 self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad')
FLAGS.train_batch_size = 32 FLAGS.train_batch_size = 24
self._run_and_report_benchmark() self._run_and_report_benchmark()
...@@ -231,6 +231,19 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase): ...@@ -231,6 +231,19 @@ class BertSquadBenchmarkReal(BertSquadBenchmarkBase):
self._run_and_report_benchmark() self._run_and_report_benchmark()
def benchmark_1_gpu_xla_fp16(self):
"""Tests BERT SQuAD model performance with 1 GPU with XLA and FP16."""
self._setup()
self.num_gpus = 1
FLAGS.model_dir = self._get_model_dir('benchmark_1_gpu_xla_squad_fp16')
FLAGS.train_batch_size = 4
FLAGS.enable_xla = True
FLAGS.dtype = 'fp16'
FLAGS.loss_scale = 'dynamic'
self._run_and_report_benchmark()
def benchmark_2_gpu_fp16(self): def benchmark_2_gpu_fp16(self):
"""Tests BERT SQuAD model performance with 2 GPUs and FP16.""" """Tests BERT SQuAD model performance with 2 GPUs and FP16."""
...@@ -324,7 +337,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase): ...@@ -324,7 +337,7 @@ class BertSquadAccuracy(BertSquadBenchmarkBase):
self._setup() self._setup()
self.num_gpus = 8 self.num_gpus = 8
FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad') FLAGS.model_dir = self._get_model_dir('benchmark_8_gpu_squad')
FLAGS.train_batch_size = 32 FLAGS.train_batch_size = 24
self._run_and_report_benchmark() self._run_and_report_benchmark()
......
...@@ -276,6 +276,7 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer): ...@@ -276,6 +276,7 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
max_position_embeddings=512, max_position_embeddings=512,
dropout_prob=0.0, dropout_prob=0.0,
initializer_range=0.02, initializer_range=0.02,
initializer=None,
**kwargs): **kwargs):
super(EmbeddingPostprocessor, self).__init__(**kwargs) super(EmbeddingPostprocessor, self).__init__(**kwargs)
self.use_type_embeddings = use_type_embeddings self.use_type_embeddings = use_type_embeddings
...@@ -285,6 +286,11 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer): ...@@ -285,6 +286,11 @@ class EmbeddingPostprocessor(tf.keras.layers.Layer):
self.dropout_prob = dropout_prob self.dropout_prob = dropout_prob
self.initializer_range = initializer_range self.initializer_range = initializer_range
if not initializer:
self.initializer = get_initializer(self.initializer_range)
else:
self.initializer = initializer
if self.use_type_embeddings and not self.token_type_vocab_size: if self.use_type_embeddings and not self.token_type_vocab_size:
raise ValueError("If `use_type_embeddings` is True, then " raise ValueError("If `use_type_embeddings` is True, then "
"`token_type_vocab_size` must be specified.") "`token_type_vocab_size` must be specified.")
...@@ -723,6 +729,15 @@ class TransformerBlock(tf.keras.layers.Layer): ...@@ -723,6 +729,15 @@ class TransformerBlock(tf.keras.layers.Layer):
name="output_layer_norm", axis=-1, epsilon=1e-12) name="output_layer_norm", axis=-1, epsilon=1e-12)
super(TransformerBlock, self).build(unused_input_shapes) super(TransformerBlock, self).build(unused_input_shapes)
def common_layers(self):
"""Explicitly gets all layer objects inside a Transformer encoder block."""
return [
self.attention_layer, self.attention_output_dense,
self.attention_dropout, self.attention_layer_norm,
self.intermediate_dense, self.output_dense, self.output_dropout,
self.output_layer_norm
]
def __call__(self, input_tensor, attention_mask=None): def __call__(self, input_tensor, attention_mask=None):
inputs = pack_inputs([input_tensor, attention_mask]) inputs = pack_inputs([input_tensor, attention_mask])
return super(TransformerBlock, self).__call__(inputs) return super(TransformerBlock, self).__call__(inputs)
......
...@@ -35,8 +35,8 @@ from official.bert import model_saving_utils ...@@ -35,8 +35,8 @@ from official.bert import model_saving_utils
from official.bert import model_training_utils from official.bert import model_training_utils
from official.bert import modeling from official.bert import modeling
from official.bert import optimization from official.bert import optimization
from official.bert import tpu_lib
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
from official.utils.misc import tpu_lib
flags.DEFINE_enum( flags.DEFINE_enum(
'mode', 'train_and_eval', ['train_and_eval', 'export_only'], 'mode', 'train_and_eval', ['train_and_eval', 'export_only'],
......
...@@ -33,7 +33,7 @@ from official.bert import model_saving_utils ...@@ -33,7 +33,7 @@ from official.bert import model_saving_utils
from official.bert import model_training_utils from official.bert import model_training_utils
from official.bert import modeling from official.bert import modeling
from official.bert import optimization from official.bert import optimization
from official.bert import tpu_lib from official.utils.misc import tpu_lib
flags.DEFINE_string('input_files', None, flags.DEFINE_string('input_files', None,
'File path to retrieve training data for pre-training.') 'File path to retrieve training data for pre-training.')
......
...@@ -36,8 +36,8 @@ from official.bert import modeling ...@@ -36,8 +36,8 @@ from official.bert import modeling
from official.bert import optimization from official.bert import optimization
from official.bert import squad_lib from official.bert import squad_lib
from official.bert import tokenization from official.bert import tokenization
from official.bert import tpu_lib
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
from official.utils.misc import tpu_lib
flags.DEFINE_bool('do_train', False, 'Whether to run training.') flags.DEFINE_bool('do_train', False, 'Whether to run training.')
flags.DEFINE_bool('do_predict', False, 'Whether to run eval on the dev set.') flags.DEFINE_bool('do_predict', False, 'Whether to run eval on the dev set.')
......
...@@ -143,37 +143,32 @@ class DatasetManager(object): ...@@ -143,37 +143,32 @@ class DatasetManager(object):
if is_training: if is_training:
return { return {
movielens.USER_COLUMN: movielens.USER_COLUMN:
tf.io.FixedLenFeature([batch_size], dtype=tf.int64), tf.io.FixedLenFeature([batch_size, 1], dtype=tf.int64),
movielens.ITEM_COLUMN: movielens.ITEM_COLUMN:
tf.io.FixedLenFeature([batch_size], dtype=tf.int64), tf.io.FixedLenFeature([batch_size, 1], dtype=tf.int64),
rconst.VALID_POINT_MASK: rconst.VALID_POINT_MASK:
tf.io.FixedLenFeature([batch_size], dtype=tf.int64), tf.io.FixedLenFeature([batch_size, 1], dtype=tf.int64),
"labels": "labels":
tf.io.FixedLenFeature([batch_size], dtype=tf.int64) tf.io.FixedLenFeature([batch_size, 1], dtype=tf.int64)
} }
else: else:
return { return {
movielens.USER_COLUMN: movielens.USER_COLUMN:
tf.io.FixedLenFeature([batch_size], dtype=tf.int64), tf.io.FixedLenFeature([batch_size, 1], dtype=tf.int64),
movielens.ITEM_COLUMN: movielens.ITEM_COLUMN:
tf.io.FixedLenFeature([batch_size], dtype=tf.int64), tf.io.FixedLenFeature([batch_size, 1], dtype=tf.int64),
rconst.DUPLICATE_MASK: rconst.DUPLICATE_MASK:
tf.io.FixedLenFeature([batch_size], dtype=tf.int64) tf.io.FixedLenFeature([batch_size, 1], dtype=tf.int64)
} }
features = tf.io.parse_single_example( features = tf.io.parse_single_example(
serialized_data, _get_feature_map(batch_size, is_training=is_training)) serialized_data, _get_feature_map(batch_size, is_training=is_training))
users = tf.reshape( users = tf.cast(features[movielens.USER_COLUMN], rconst.USER_DTYPE)
tf.cast(features[movielens.USER_COLUMN], rconst.USER_DTYPE), items = tf.cast(features[movielens.ITEM_COLUMN], rconst.ITEM_DTYPE)
(batch_size,))
items = tf.reshape(
tf.cast(features[movielens.ITEM_COLUMN], rconst.ITEM_DTYPE),
(batch_size,))
if is_training: if is_training:
valid_point_mask = tf.reshape( valid_point_mask = tf.cast(features[rconst.VALID_POINT_MASK], tf.bool)
tf.cast(features[movielens.ITEM_COLUMN], tf.bool), (batch_size,)) fake_dup_mask = tf.zeros_like(users)
fake_dup_mask = tf.zeros_like(features[movielens.USER_COLUMN])
return { return {
movielens.USER_COLUMN: users, movielens.USER_COLUMN: users,
movielens.ITEM_COLUMN: items, movielens.ITEM_COLUMN: items,
...@@ -184,20 +179,15 @@ class DatasetManager(object): ...@@ -184,20 +179,15 @@ class DatasetManager(object):
rconst.DUPLICATE_MASK: fake_dup_mask rconst.DUPLICATE_MASK: fake_dup_mask
} }
else: else:
labels = tf.reshape( labels = tf.cast(tf.zeros_like(users), tf.bool)
tf.cast(tf.zeros_like(features[movielens.USER_COLUMN]), tf.bool), fake_valid_pt_mask = tf.cast(tf.zeros_like(users), tf.bool)
(batch_size, 1))
fake_valid_pt_mask = tf.cast(
tf.zeros_like(features[movielens.USER_COLUMN]), tf.bool)
return { return {
movielens.USER_COLUMN: movielens.USER_COLUMN:
users, users,
movielens.ITEM_COLUMN: movielens.ITEM_COLUMN:
items, items,
rconst.DUPLICATE_MASK: rconst.DUPLICATE_MASK:
tf.reshape( tf.cast(features[rconst.DUPLICATE_MASK], tf.bool),
tf.cast(features[rconst.DUPLICATE_MASK], tf.bool),
(batch_size,)),
rconst.VALID_POINT_MASK: rconst.VALID_POINT_MASK:
fake_valid_pt_mask, fake_valid_pt_mask,
rconst.TRAIN_LABEL_KEY: rconst.TRAIN_LABEL_KEY:
...@@ -221,8 +211,8 @@ class DatasetManager(object): ...@@ -221,8 +211,8 @@ class DatasetManager(object):
if self._is_training: if self._is_training:
mask_start_index = data.pop(rconst.MASK_START_INDEX) mask_start_index = data.pop(rconst.MASK_START_INDEX)
batch_size = data[movielens.ITEM_COLUMN].shape[0] batch_size = data[movielens.ITEM_COLUMN].shape[0]
data[rconst.VALID_POINT_MASK] = np.less( data[rconst.VALID_POINT_MASK] = np.expand_dims(
np.arange(batch_size), mask_start_index) np.less(np.arange(batch_size), mask_start_index), -1)
if self._stream_files: if self._stream_files:
example_bytes = self.serialize(data) example_bytes = self.serialize(data)
...@@ -313,19 +303,21 @@ class DatasetManager(object): ...@@ -313,19 +303,21 @@ class DatasetManager(object):
else: else:
types = {movielens.USER_COLUMN: rconst.USER_DTYPE, types = {movielens.USER_COLUMN: rconst.USER_DTYPE,
movielens.ITEM_COLUMN: rconst.ITEM_DTYPE} movielens.ITEM_COLUMN: rconst.ITEM_DTYPE}
shapes = {movielens.USER_COLUMN: tf.TensorShape([batch_size]), shapes = {
movielens.ITEM_COLUMN: tf.TensorShape([batch_size])} movielens.USER_COLUMN: tf.TensorShape([batch_size, 1]),
movielens.ITEM_COLUMN: tf.TensorShape([batch_size, 1])
}
if self._is_training: if self._is_training:
types[rconst.VALID_POINT_MASK] = np.bool types[rconst.VALID_POINT_MASK] = np.bool
shapes[rconst.VALID_POINT_MASK] = tf.TensorShape([batch_size]) shapes[rconst.VALID_POINT_MASK] = tf.TensorShape([batch_size, 1])
types = (types, np.bool) types = (types, np.bool)
shapes = (shapes, tf.TensorShape([batch_size])) shapes = (shapes, tf.TensorShape([batch_size, 1]))
else: else:
types[rconst.DUPLICATE_MASK] = np.bool types[rconst.DUPLICATE_MASK] = np.bool
shapes[rconst.DUPLICATE_MASK] = tf.TensorShape([batch_size]) shapes[rconst.DUPLICATE_MASK] = tf.TensorShape([batch_size, 1])
data_generator = functools.partial( data_generator = functools.partial(
self.data_generator, epochs_between_evals=epochs_between_evals) self.data_generator, epochs_between_evals=epochs_between_evals)
...@@ -554,12 +546,17 @@ class BaseDataConstructor(threading.Thread): ...@@ -554,12 +546,17 @@ class BaseDataConstructor(threading.Thread):
items = np.concatenate([items, item_pad]) items = np.concatenate([items, item_pad])
labels = np.concatenate([labels, label_pad]) labels = np.concatenate([labels, label_pad])
self._train_dataset.put(i, { self._train_dataset.put(
movielens.USER_COLUMN: users, i, {
movielens.ITEM_COLUMN: items, movielens.USER_COLUMN:
rconst.MASK_START_INDEX: np.array(mask_start_index, dtype=np.int32), np.reshape(users, (self.train_batch_size, 1)),
"labels": labels, movielens.ITEM_COLUMN:
}) np.reshape(items, (self.train_batch_size, 1)),
rconst.MASK_START_INDEX:
np.array(mask_start_index, dtype=np.int32),
"labels":
np.reshape(labels, (self.train_batch_size, 1)),
})
def _wait_to_construct_train_epoch(self): def _wait_to_construct_train_epoch(self):
count = 0 count = 0
...@@ -649,11 +646,15 @@ class BaseDataConstructor(threading.Thread): ...@@ -649,11 +646,15 @@ class BaseDataConstructor(threading.Thread):
users, items, duplicate_mask = self._assemble_eval_batch( users, items, duplicate_mask = self._assemble_eval_batch(
users, positive_items, negative_items, self._eval_users_per_batch) users, positive_items, negative_items, self._eval_users_per_batch)
self._eval_dataset.put(i, { self._eval_dataset.put(
movielens.USER_COLUMN: users.flatten(), i, {
movielens.ITEM_COLUMN: items.flatten(), movielens.USER_COLUMN:
rconst.DUPLICATE_MASK: duplicate_mask.flatten(), np.reshape(users.flatten(), (self.eval_batch_size, 1)),
}) movielens.ITEM_COLUMN:
np.reshape(items.flatten(), (self.eval_batch_size, 1)),
rconst.DUPLICATE_MASK:
np.reshape(duplicate_mask.flatten(), (self.eval_batch_size, 1)),
})
def _construct_eval_epoch(self): def _construct_eval_epoch(self):
"""Loop to construct data for evaluation.""" """Loop to construct data for evaluation."""
...@@ -720,24 +721,37 @@ class DummyConstructor(threading.Thread): ...@@ -720,24 +721,37 @@ class DummyConstructor(threading.Thread):
num_users = params["num_users"] num_users = params["num_users"]
num_items = params["num_items"] num_items = params["num_items"]
users = tf.random.uniform([batch_size], dtype=tf.int32, minval=0, users = tf.random.uniform([batch_size, 1],
dtype=tf.int32,
minval=0,
maxval=num_users) maxval=num_users)
items = tf.random.uniform([batch_size], dtype=tf.int32, minval=0, items = tf.random.uniform([batch_size, 1],
dtype=tf.int32,
minval=0,
maxval=num_items) maxval=num_items)
if is_training: if is_training:
valid_point_mask = tf.cast(tf.random.uniform( valid_point_mask = tf.cast(
[batch_size], dtype=tf.int32, minval=0, maxval=2), tf.bool) tf.random.uniform([batch_size, 1],
labels = tf.cast(tf.random.uniform( dtype=tf.int32,
[batch_size], dtype=tf.int32, minval=0, maxval=2), tf.bool) minval=0,
maxval=2), tf.bool)
labels = tf.cast(
tf.random.uniform([batch_size, 1],
dtype=tf.int32,
minval=0,
maxval=2), tf.bool)
data = { data = {
movielens.USER_COLUMN: users, movielens.USER_COLUMN: users,
movielens.ITEM_COLUMN: items, movielens.ITEM_COLUMN: items,
rconst.VALID_POINT_MASK: valid_point_mask, rconst.VALID_POINT_MASK: valid_point_mask,
}, labels }, labels
else: else:
dupe_mask = tf.cast(tf.random.uniform([batch_size], dtype=tf.int32, dupe_mask = tf.cast(
minval=0, maxval=2), tf.bool) tf.random.uniform([batch_size, 1],
dtype=tf.int32,
minval=0,
maxval=2), tf.bool)
data = { data = {
movielens.USER_COLUMN: users, movielens.USER_COLUMN: users,
movielens.ITEM_COLUMN: items, movielens.ITEM_COLUMN: items,
......
...@@ -168,8 +168,11 @@ class BaseTest(tf.test.TestCase): ...@@ -168,8 +168,11 @@ class BaseTest(tf.test.TestCase):
md5 = hashlib.md5() md5 = hashlib.md5()
for features, labels in first_epoch: for features, labels in first_epoch:
data_list = [ data_list = [
features[movielens.USER_COLUMN], features[movielens.ITEM_COLUMN], features[movielens.USER_COLUMN].flatten(),
features[rconst.VALID_POINT_MASK], labels] features[movielens.ITEM_COLUMN].flatten(),
features[rconst.VALID_POINT_MASK].flatten(),
labels.flatten()
]
for i in data_list: for i in data_list:
md5.update(i.tobytes()) md5.update(i.tobytes())
...@@ -216,8 +219,10 @@ class BaseTest(tf.test.TestCase): ...@@ -216,8 +219,10 @@ class BaseTest(tf.test.TestCase):
md5 = hashlib.md5() md5 = hashlib.md5()
for features in eval_data: for features in eval_data:
data_list = [ data_list = [
features[movielens.USER_COLUMN], features[movielens.ITEM_COLUMN], features[movielens.USER_COLUMN].flatten(),
features[rconst.DUPLICATE_MASK]] features[movielens.ITEM_COLUMN].flatten(),
features[rconst.DUPLICATE_MASK].flatten()
]
for i in data_list: for i in data_list:
md5.update(i.tobytes()) md5.update(i.tobytes())
...@@ -276,8 +281,11 @@ class BaseTest(tf.test.TestCase): ...@@ -276,8 +281,11 @@ class BaseTest(tf.test.TestCase):
md5 = hashlib.md5() md5 = hashlib.md5()
for features, labels in results: for features, labels in results:
data_list = [ data_list = [
features[movielens.USER_COLUMN], features[movielens.ITEM_COLUMN], features[movielens.USER_COLUMN].flatten(),
features[rconst.VALID_POINT_MASK], labels] features[movielens.ITEM_COLUMN].flatten(),
features[rconst.VALID_POINT_MASK].flatten(),
labels.flatten()
]
for i in data_list: for i in data_list:
md5.update(i.tobytes()) md5.update(i.tobytes())
......
...@@ -37,7 +37,6 @@ from official.utils.flags import core as flags_core ...@@ -37,7 +37,6 @@ from official.utils.flags import core as flags_core
from official.utils.misc import distribution_utils from official.utils.misc import distribution_utils
from official.utils.misc import keras_utils from official.utils.misc import keras_utils
FLAGS = flags.FLAGS FLAGS = flags.FLAGS
...@@ -60,13 +59,8 @@ def get_inputs(params): ...@@ -60,13 +59,8 @@ def get_inputs(params):
dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, params=params, dataset=FLAGS.dataset, data_dir=FLAGS.data_dir, params=params,
constructor_type=FLAGS.constructor_type, constructor_type=FLAGS.constructor_type,
deterministic=FLAGS.seed is not None) deterministic=FLAGS.seed is not None)
num_train_steps = producer.train_batches_per_epoch
num_train_steps = (producer.train_batches_per_epoch // num_eval_steps = producer.eval_batches_per_epoch
params["batches_per_step"])
num_eval_steps = (producer.eval_batches_per_epoch //
params["batches_per_step"])
assert not producer.train_batches_per_epoch % params["batches_per_step"]
assert not producer.eval_batches_per_epoch % params["batches_per_step"]
return num_users, num_items, num_train_steps, num_eval_steps, producer return num_users, num_items, num_train_steps, num_eval_steps, producer
...@@ -74,18 +68,13 @@ def get_inputs(params): ...@@ -74,18 +68,13 @@ def get_inputs(params):
def parse_flags(flags_obj): def parse_flags(flags_obj):
"""Convenience function to turn flags into params.""" """Convenience function to turn flags into params."""
num_gpus = flags_core.get_num_gpus(flags_obj) num_gpus = flags_core.get_num_gpus(flags_obj)
num_devices = FLAGS.num_tpu_shards if FLAGS.tpu else num_gpus or 1
batch_size = (flags_obj.batch_size + num_devices - 1) // num_devices
eval_divisor = (rconst.NUM_EVAL_NEGATIVES + 1) * num_devices batch_size = flags_obj.batch_size
eval_batch_size = flags_obj.eval_batch_size or flags_obj.batch_size eval_batch_size = flags_obj.eval_batch_size or flags_obj.batch_size
eval_batch_size = ((eval_batch_size + eval_divisor - 1) //
eval_divisor * eval_divisor // num_devices)
return { return {
"train_epochs": flags_obj.train_epochs, "train_epochs": flags_obj.train_epochs,
"batches_per_step": num_devices, "batches_per_step": 1,
"use_seed": flags_obj.seed is not None, "use_seed": flags_obj.seed is not None,
"batch_size": batch_size, "batch_size": batch_size,
"eval_batch_size": eval_batch_size, "eval_batch_size": eval_batch_size,
...@@ -95,6 +84,7 @@ def parse_flags(flags_obj): ...@@ -95,6 +84,7 @@ def parse_flags(flags_obj):
"mf_regularization": flags_obj.mf_regularization, "mf_regularization": flags_obj.mf_regularization,
"mlp_reg_layers": [float(reg) for reg in flags_obj.mlp_regularization], "mlp_reg_layers": [float(reg) for reg in flags_obj.mlp_regularization],
"num_neg": flags_obj.num_neg, "num_neg": flags_obj.num_neg,
"distribution_strategy": flags_obj.distribution_strategy,
"num_gpus": num_gpus, "num_gpus": num_gpus,
"use_tpu": flags_obj.tpu is not None, "use_tpu": flags_obj.tpu is not None,
"tpu": flags_obj.tpu, "tpu": flags_obj.tpu,
...@@ -115,7 +105,7 @@ def parse_flags(flags_obj): ...@@ -115,7 +105,7 @@ def parse_flags(flags_obj):
} }
def get_distribution_strategy(params): def get_v1_distribution_strategy(params):
"""Returns the distribution strategy to use.""" """Returns the distribution strategy to use."""
if params["use_tpu"]: if params["use_tpu"]:
# Some of the networking libraries are quite chatty. # Some of the networking libraries are quite chatty.
......
...@@ -66,7 +66,7 @@ def construct_estimator(model_dir, params): ...@@ -66,7 +66,7 @@ def construct_estimator(model_dir, params):
Returns: Returns:
An Estimator or TPUEstimator. An Estimator or TPUEstimator.
""" """
distribution = ncf_common.get_distribution_strategy(params) distribution = ncf_common.get_v1_distribution_strategy(params)
run_config = tf.estimator.RunConfig(train_distribute=distribution, run_config = tf.estimator.RunConfig(train_distribute=distribution,
eval_distribute=distribution) eval_distribute=distribution)
......
...@@ -82,7 +82,6 @@ def create_dataset_from_data_producer(producer, params): ...@@ -82,7 +82,6 @@ def create_dataset_from_data_producer(producer, params):
Returns: Returns:
Processed training features. Processed training features.
""" """
labels = tf.expand_dims(labels, -1)
fake_dup_mask = tf.zeros_like(features[movielens.USER_COLUMN]) fake_dup_mask = tf.zeros_like(features[movielens.USER_COLUMN])
features[rconst.DUPLICATE_MASK] = fake_dup_mask features[rconst.DUPLICATE_MASK] = fake_dup_mask
features[rconst.TRAIN_LABEL_KEY] = labels features[rconst.TRAIN_LABEL_KEY] = labels
...@@ -106,7 +105,6 @@ def create_dataset_from_data_producer(producer, params): ...@@ -106,7 +105,6 @@ def create_dataset_from_data_producer(producer, params):
Processed evaluation features. Processed evaluation features.
""" """
labels = tf.cast(tf.zeros_like(features[movielens.USER_COLUMN]), tf.bool) labels = tf.cast(tf.zeros_like(features[movielens.USER_COLUMN]), tf.bool)
labels = tf.expand_dims(labels, -1)
fake_valid_pt_mask = tf.cast( fake_valid_pt_mask = tf.cast(
tf.zeros_like(features[movielens.USER_COLUMN]), tf.bool) tf.zeros_like(features[movielens.USER_COLUMN]), tf.bool)
features[rconst.VALID_POINT_MASK] = fake_valid_pt_mask features[rconst.VALID_POINT_MASK] = fake_valid_pt_mask
...@@ -134,9 +132,13 @@ def create_ncf_input_data(params, producer=None, input_meta_data=None): ...@@ -134,9 +132,13 @@ def create_ncf_input_data(params, producer=None, input_meta_data=None):
Returns: Returns:
(training dataset, evaluation dataset, train steps per epoch, (training dataset, evaluation dataset, train steps per epoch,
eval steps per epoch) eval steps per epoch)
"""
Raises:
ValueError: If data is being generated online for when using TPU's.
"""
if params["train_dataset_path"]: if params["train_dataset_path"]:
assert params["eval_dataset_path"]
train_dataset = create_dataset_from_tf_record_files( train_dataset = create_dataset_from_tf_record_files(
params["train_dataset_path"], params["train_dataset_path"],
input_meta_data["train_prebatch_size"], input_meta_data["train_prebatch_size"],
...@@ -148,34 +150,18 @@ def create_ncf_input_data(params, producer=None, input_meta_data=None): ...@@ -148,34 +150,18 @@ def create_ncf_input_data(params, producer=None, input_meta_data=None):
params["eval_batch_size"], params["eval_batch_size"],
is_training=False) is_training=False)
# TODO(b/259377621): Remove number of devices (i.e. num_train_steps = int(input_meta_data["num_train_steps"])
# params["batches_per_step"]) in input pipeline logic and only use num_eval_steps = int(input_meta_data["num_eval_steps"])
# global batch size instead.
num_train_steps = int(
np.ceil(input_meta_data["num_train_steps"] /
params["batches_per_step"]))
num_eval_steps = (
input_meta_data["num_eval_steps"] // params["batches_per_step"])
else: else:
assert producer if params["use_tpu"]:
raise ValueError("TPU training does not support data producer yet. "
"Use pre-processed data.")
assert producer
# Start retrieving data from producer. # Start retrieving data from producer.
train_dataset, eval_dataset = create_dataset_from_data_producer( train_dataset, eval_dataset = create_dataset_from_data_producer(
producer, params) producer, params)
num_train_steps = ( num_train_steps = producer.train_batches_per_epoch
producer.train_batches_per_epoch // params["batches_per_step"]) num_eval_steps = producer.eval_batches_per_epoch
num_eval_steps = (
producer.eval_batches_per_epoch // params["batches_per_step"])
assert not producer.train_batches_per_epoch % params["batches_per_step"]
assert not producer.eval_batches_per_epoch % params["batches_per_step"]
# It is required that for distributed training, the dataset must call
# batch(). The parameter of batch() here is the number of replicas involed,
# such that each replica evenly gets a slice of data.
# drop_remainder = True, as we would like batch call to return a fixed shape
# vs None, this prevents a expensive broadcast during weighted_loss
batches_per_step = params["batches_per_step"]
train_dataset = train_dataset.batch(batches_per_step, drop_remainder=True)
eval_dataset = eval_dataset.batch(batches_per_step, drop_remainder=True)
return train_dataset, eval_dataset, num_train_steps, num_eval_steps return train_dataset, eval_dataset, num_train_steps, num_eval_steps
This diff is collapsed.
...@@ -189,7 +189,7 @@ class NcfTest(tf.test.TestCase): ...@@ -189,7 +189,7 @@ class NcfTest(tf.test.TestCase):
self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) + self.assertAlmostEqual(ndcg, (1 + math.log(2) / math.log(3) +
2 * math.log(2) / math.log(4)) / 4) 2 * math.log(2) / math.log(4)) / 4)
_BASE_END_TO_END_FLAGS = ['-batch_size', '1024', '-train_epochs', '1'] _BASE_END_TO_END_FLAGS = ['-batch_size', '1044', '-train_epochs', '1']
@unittest.skipIf(keras_utils.is_v2_0(), "TODO(b/136018594)") @unittest.skipIf(keras_utils.is_v2_0(), "TODO(b/136018594)")
@mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100) @mock.patch.object(rconst, "SYNTHETIC_BATCHES_PER_EPOCH", 100)
......
...@@ -109,7 +109,6 @@ def neumf_model_fn(features, labels, mode, params): ...@@ -109,7 +109,6 @@ def neumf_model_fn(features, labels, mode, params):
mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_EPSILON, mlperf_helper.ncf_print(key=mlperf_helper.TAGS.OPT_HP_ADAM_EPSILON,
value=params["epsilon"]) value=params["epsilon"])
optimizer = tf.compat.v1.train.AdamOptimizer( optimizer = tf.compat.v1.train.AdamOptimizer(
learning_rate=params["learning_rate"], learning_rate=params["learning_rate"],
beta1=params["beta1"], beta1=params["beta1"],
...@@ -151,7 +150,7 @@ def _strip_first_and_last_dimension(x, batch_size): ...@@ -151,7 +150,7 @@ def _strip_first_and_last_dimension(x, batch_size):
return tf.reshape(x[0, :], (batch_size,)) return tf.reshape(x[0, :], (batch_size,))
def construct_model(user_input, item_input, params, need_strip=False): def construct_model(user_input, item_input, params):
# type: (tf.Tensor, tf.Tensor, dict) -> tf.keras.Model # type: (tf.Tensor, tf.Tensor, dict) -> tf.keras.Model
"""Initialize NeuMF model. """Initialize NeuMF model.
...@@ -184,34 +183,33 @@ def construct_model(user_input, item_input, params, need_strip=False): ...@@ -184,34 +183,33 @@ def construct_model(user_input, item_input, params, need_strip=False):
# Initializer for embedding layers # Initializer for embedding layers
embedding_initializer = "glorot_uniform" embedding_initializer = "glorot_uniform"
if need_strip: def mf_slice_fn(x):
batch_size = params["batch_size"] x = tf.squeeze(x, [1])
return x[:, :mf_dim]
user_input_reshaped = tf.keras.layers.Lambda(
lambda x: _strip_first_and_last_dimension(
x, batch_size))(user_input)
item_input_reshaped = tf.keras.layers.Lambda( def mlp_slice_fn(x):
lambda x: _strip_first_and_last_dimension( x = tf.squeeze(x, [1])
x, batch_size))(item_input) return x[:, mf_dim:]
# It turns out to be significantly more effecient to store the MF and MLP # It turns out to be significantly more effecient to store the MF and MLP
# embedding portions in the same table, and then slice as needed. # embedding portions in the same table, and then slice as needed.
mf_slice_fn = lambda x: x[:, :mf_dim]
mlp_slice_fn = lambda x: x[:, mf_dim:]
embedding_user = tf.keras.layers.Embedding( embedding_user = tf.keras.layers.Embedding(
num_users, mf_dim + model_layers[0] // 2, num_users,
mf_dim + model_layers[0] // 2,
embeddings_initializer=embedding_initializer, embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization), embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
input_length=1, name="embedding_user")( input_length=1,
user_input_reshaped if need_strip else user_input) name="embedding_user")(
user_input)
embedding_item = tf.keras.layers.Embedding( embedding_item = tf.keras.layers.Embedding(
num_items, mf_dim + model_layers[0] // 2, num_items,
mf_dim + model_layers[0] // 2,
embeddings_initializer=embedding_initializer, embeddings_initializer=embedding_initializer,
embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization), embeddings_regularizer=tf.keras.regularizers.l2(mf_regularization),
input_length=1, name="embedding_item")( input_length=1,
item_input_reshaped if need_strip else item_input) name="embedding_item")(
item_input)
# GMF part # GMF part
mf_user_latent = tf.keras.layers.Lambda( mf_user_latent = tf.keras.layers.Lambda(
......
...@@ -24,6 +24,8 @@ import random ...@@ -24,6 +24,8 @@ import random
import string import string
import tensorflow as tf import tensorflow as tf
from official.utils.misc import tpu_lib
def _collective_communication(all_reduce_alg): def _collective_communication(all_reduce_alg):
"""Return a CollectiveCommunication based on all_reduce_alg. """Return a CollectiveCommunication based on all_reduce_alg.
...@@ -83,16 +85,18 @@ def get_distribution_strategy(distribution_strategy="default", ...@@ -83,16 +85,18 @@ def get_distribution_strategy(distribution_strategy="default",
num_gpus=0, num_gpus=0,
num_workers=1, num_workers=1,
all_reduce_alg=None, all_reduce_alg=None,
num_packs=1): num_packs=1,
tpu_address=None):
"""Return a DistributionStrategy for running the model. """Return a DistributionStrategy for running the model.
Args: Args:
distribution_strategy: a string specifying which distribution strategy to distribution_strategy: a string specifying which distribution strategy to
use. Accepted values are 'off', 'default', 'one_device', 'mirrored', use. Accepted values are 'off', 'default', 'one_device', 'mirrored',
'parameter_server', 'multi_worker_mirrored', case insensitive. 'off' means 'parameter_server', 'multi_worker_mirrored', and 'tpu' -- case insensitive.
not to use Distribution Strategy; 'default' means to choose from 'off' means not to use Distribution Strategy; 'default' means to choose from
`MirroredStrategy`, `MultiWorkerMirroredStrategy`, or `OneDeviceStrategy` `MirroredStrategy`, `MultiWorkerMirroredStrategy`, or `OneDeviceStrategy`
according to the number of GPUs and number of workers. according to the number of GPUs and number of workers. 'tpu' means to use
TPUStrategy using `tpu_address`.
num_gpus: Number of GPUs to run this model. num_gpus: Number of GPUs to run this model.
num_workers: Number of workers to run this model. num_workers: Number of workers to run this model.
all_reduce_alg: Optional. Specifies which algorithm to use when performing all_reduce_alg: Optional. Specifies which algorithm to use when performing
...@@ -102,12 +106,14 @@ def get_distribution_strategy(distribution_strategy="default", ...@@ -102,12 +106,14 @@ def get_distribution_strategy(distribution_strategy="default",
device topology. device topology.
num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce` num_packs: Optional. Sets the `num_packs` in `tf.distribute.NcclAllReduce`
or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`. or `tf.distribute.HierarchicalCopyAllReduce` for `MirroredStrategy`.
tpu_address: Optional. String that represents TPU to connect to. Must not
be None if `distribution_strategy` is set to `tpu`.
Returns: Returns:
tf.distribute.DistibutionStrategy object. tf.distribute.DistibutionStrategy object.
Raises: Raises:
ValueError: if `distribution_strategy` is 'off' or 'one_device' and ValueError: if `distribution_strategy` is 'off' or 'one_device' and
`num_gpus` is larger than 1; or `num_gpus` is negative. `num_gpus` is larger than 1; or `num_gpus` is negative or if
`distribution_strategy` is `tpu` but `tpu_address` is not specified.
""" """
if num_gpus < 0: if num_gpus < 0:
raise ValueError("`num_gpus` can not be negative.") raise ValueError("`num_gpus` can not be negative.")
...@@ -120,6 +126,15 @@ def get_distribution_strategy(distribution_strategy="default", ...@@ -120,6 +126,15 @@ def get_distribution_strategy(distribution_strategy="default",
"flag cannot be set to 'off'.".format(num_gpus, num_workers)) "flag cannot be set to 'off'.".format(num_gpus, num_workers))
return None return None
if distribution_strategy == "tpu":
if not tpu_address:
raise ValueError("`tpu_address` must be specified when using "
"TPUStrategy.")
# Initialize TPU System.
cluster_resolver = tpu_lib.tpu_initialize(tpu_address)
return tf.distribute.experimental.TPUStrategy(cluster_resolver)
if distribution_strategy == "multi_worker_mirrored": if distribution_strategy == "multi_worker_mirrored":
return tf.distribute.experimental.MultiWorkerMirroredStrategy( return tf.distribute.experimental.MultiWorkerMirroredStrategy(
communication=_collective_communication(all_reduce_alg)) communication=_collective_communication(all_reduce_alg))
......
...@@ -31,3 +31,8 @@ def tpu_initialize(tpu_address): ...@@ -31,3 +31,8 @@ def tpu_initialize(tpu_address):
tf.config.experimental_connect_to_host(cluster_resolver.master()) tf.config.experimental_connect_to_host(cluster_resolver.master())
tf.tpu.experimental.initialize_tpu_system(cluster_resolver) tf.tpu.experimental.initialize_tpu_system(cluster_resolver)
return cluster_resolver return cluster_resolver
def get_primary_cpu_task(use_remote_tpu=False):
"""Returns remote TPU worker address. No-op for GPU/CPU training."""
return "/job:worker" if use_remote_tpu else ""
...@@ -32,3 +32,8 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:rLqvkztmWYgJ:scholar.go ...@@ -32,3 +32,8 @@ https://scholar.googleusercontent.com/scholar.bib?q=info:rLqvkztmWYgJ:scholar.go
* yinxiao@google.com * yinxiao@google.com
* menglong@google.com * menglong@google.com
* yongzhe@google.com * yongzhe@google.com
## Table of Contents
* <a href='g3doc/exporting_models.md'>Exporting a trained model</a>
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
# For training on Imagenet Video with LSTM Interleaved Mobilenet V2
[lstm_object_detection.protos.lstm_model] {
train_unroll_length: 4
eval_unroll_length: 4
lstm_state_depth: 320
depth_multipliers: 1.4
depth_multipliers: 0.35
pre_bottleneck: true
low_res: true
train_interleave_method: 'RANDOM_SKIP_SMALL'
eval_interleave_method: 'SKIP3'
}
model {
ssd {
num_classes: 30 # Num of class for imagenet vid dataset.
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 5
min_scale: 0.2
max_scale: 0.95
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.3333
}
}
image_resizer {
fixed_shape_resizer {
height: 320
width: 320
}
}
box_predictor {
convolutional_box_predictor {
min_depth: 0
max_depth: 0
num_layers_before_predictor: 3
use_dropout: false
dropout_keep_probability: 0.8
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
use_depthwise: true
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
}
feature_extractor {
type: 'lstm_ssd_interleaved_mobilenet_v2'
conv_hyperparams {
activation: RELU_6,
regularizer {
l2_regularizer {
weight: 0.00004
}
}
initializer {
truncated_normal_initializer {
stddev: 0.03
mean: 0.0
}
}
batch_norm {
train: true,
scale: true,
center: true,
decay: 0.9997,
epsilon: 0.001,
}
}
}
loss {
classification_loss {
weighted_sigmoid {
}
}
localization_loss {
weighted_smooth_l1 {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.99
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 0
}
classification_weight: 1.0
localization_weight: 4.0
}
normalize_loss_by_num_matches: true
post_processing {
batch_non_max_suppression {
score_threshold: -20.0
iou_threshold: 0.5
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
}
}
train_config: {
batch_size: 8
optimizer {
use_moving_average: false
rms_prop_optimizer: {
learning_rate: {
exponential_decay_learning_rate {
initial_learning_rate: 0.002
decay_steps: 200000
decay_factor: 0.95
}
}
momentum_optimizer_value: 0.9
decay: 0.9
epsilon: 1.0
}
}
gradient_clipping_by_norm: 10.0
batch_queue_capacity: 12
prefetch_queue_capacity: 4
}
train_input_reader: {
shuffle_buffer_size: 32
queue_capacity: 12
prefetch_size: 12
min_after_dequeue: 4
label_map_path: "path/to/label_map"
external_input_reader {
[lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
tf_record_video_input_reader: {
input_path: '/data/lstm_detection/tfrecords/test.tfrecord'
data_type: TF_SEQUENCE_EXAMPLE
video_length: 4
}
}
}
}
eval_config: {
metrics_set: "coco_evaluation_all_frames"
use_moving_averages: true
min_score_threshold: 0.5
max_num_boxes_to_visualize: 300
visualize_groundtruth_boxes: true
groundtruth_box_visualization_color: "red"
}
eval_input_reader {
label_map_path: "path/to/label_map"
shuffle: true
num_epochs: 1
num_parallel_batches: 1
num_readers: 1
external_input_reader {
[lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
tf_record_video_input_reader: {
input_path: "path/to/sequence_example/data"
data_type: TF_SEQUENCE_EXAMPLE
video_length: 10
}
}
}
}
eval_input_reader: {
label_map_path: "path/to/label_map"
external_input_reader {
[lstm_object_detection.protos.GoogleInputReader.google_input_reader] {
tf_record_video_input_reader: {
input_path: "path/to/sequence_example/data"
data_type: TF_SEQUENCE_EXAMPLE
video_length: 4
}
}
}
shuffle: true
num_readers: 1
}
...@@ -27,8 +27,6 @@ import functools ...@@ -27,8 +27,6 @@ import functools
import os import os
import tensorflow as tf import tensorflow as tf
from google.protobuf import text_format from google.protobuf import text_format
from google3.pyglib import app
from google3.pyglib import flags
from lstm_object_detection import evaluator from lstm_object_detection import evaluator
from lstm_object_detection import model_builder from lstm_object_detection import model_builder
from lstm_object_detection.inputs import seq_dataset_builder from lstm_object_detection.inputs import seq_dataset_builder
...@@ -107,4 +105,4 @@ def main(unused_argv): ...@@ -107,4 +105,4 @@ def main(unused_argv):
FLAGS.checkpoint_dir, FLAGS.eval_dir) FLAGS.checkpoint_dir, FLAGS.eval_dir)
if __name__ == '__main__': if __name__ == '__main__':
app.run() tf.app.run()
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Exports an LSTM detection model to use with tf-lite.
Outputs file:
* A tflite compatible frozen graph - $output_directory/tflite_graph.pb
The exported graph has the following input and output nodes.
Inputs:
'input_video_tensor': a float32 tensor of shape
[unroll_length, height, width, 3] containing the normalized input image.
Note that the height and width must be compatible with the height and
width configured in the fixed_shape_image resizer options in the pipeline
config proto.
Outputs:
If add_postprocessing_op is true: frozen graph adds a
TFLite_Detection_PostProcess custom op node has four outputs:
detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
locations
detection_classes: a float32 tensor of shape [1, num_boxes]
with class indices
detection_scores: a float32 tensor of shape [1, num_boxes]
with class scores
num_boxes: a float32 tensor of size 1 containing the number of detected boxes
else:
the graph has three outputs:
'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
containing the encoded box predictions.
'raw_outputs/class_predictions': a float32 tensor of shape
[1, num_anchors, num_classes] containing the class scores for each anchor
after applying score conversion.
'anchors': a float32 constant tensor of shape [num_anchors, 4]
containing the anchor boxes.
Example Usage:
--------------
python lstm_object_detection/export_tflite_lstd_graph.py \
--pipeline_config_path path/to/lstm_pipeline.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
The expected output would be in the directory
path/to/exported_model_directory (which is created if it does not exist)
with contents:
- tflite_graph.pbtxt
- tflite_graph.pb
Config overrides (see the `config_override` flag) are text protobufs
(also of type pipeline_pb2.TrainEvalPipelineConfig) which are used to override
certain fields in the provided pipeline_config_path. These are useful for
making small changes to the inference graph that differ from the training or
eval config.
Example Usage (in which we change the NMS iou_threshold to be 0.5 and
NMS score_threshold to be 0.0):
python lstm_object_detection/export_tflite_lstd_graph.py \
--pipeline_config_path path/to/lstm_pipeline.config \
--trained_checkpoint_prefix path/to/model.ckpt \
--output_directory path/to/exported_model_directory
--config_override " \
model{ \
ssd{ \
post_processing { \
batch_non_max_suppression { \
score_threshold: 0.0 \
iou_threshold: 0.5 \
} \
} \
} \
} \
"
"""
import tensorflow as tf
from lstm_object_detection.utils import config_util
from lstm_object_detection import export_tflite_lstd_graph_lib
flags = tf.app.flags
flags.DEFINE_string('output_directory', None, 'Path to write outputs.')
flags.DEFINE_string(
'pipeline_config_path', None,
'Path to a pipeline_pb2.TrainEvalPipelineConfig config '
'file.')
flags.DEFINE_string('trained_checkpoint_prefix', None, 'Checkpoint prefix.')
flags.DEFINE_integer('max_detections', 10,
'Maximum number of detections (boxes) to show.')
flags.DEFINE_integer('max_classes_per_detection', 1,
'Maximum number of classes to output per detection box.')
flags.DEFINE_integer(
'detections_per_class', 100,
'Number of anchors used per class in Regular Non-Max-Suppression.')
flags.DEFINE_bool('add_postprocessing_op', True,
'Add TFLite custom op for postprocessing to the graph.')
flags.DEFINE_bool(
'use_regular_nms', False,
'Flag to set postprocessing op to use Regular NMS instead of Fast NMS.')
flags.DEFINE_string(
'config_override', '', 'pipeline_pb2.TrainEvalPipelineConfig '
'text proto to override pipeline_config_path.')
FLAGS = flags.FLAGS
def main(argv):
del argv # Unused.
flags.mark_flag_as_required('output_directory')
flags.mark_flag_as_required('pipeline_config_path')
flags.mark_flag_as_required('trained_checkpoint_prefix')
pipeline_config = config_util.get_configs_from_pipeline_file(
FLAGS.pipeline_config_path)
export_tflite_lstd_graph_lib.export_tflite_graph(
pipeline_config, FLAGS.trained_checkpoint_prefix, FLAGS.output_directory,
FLAGS.add_postprocessing_op, FLAGS.max_detections,
FLAGS.max_classes_per_detection, use_regular_nms=FLAGS.use_regular_nms)
if __name__ == '__main__':
tf.app.run(main)
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Exports detection models to use with tf-lite.
See export_tflite_lstd_graph.py for usage.
"""
import os
import tempfile
import numpy as np
import tensorflow as tf
from tensorflow.core.framework import attr_value_pb2
from tensorflow.core.framework import types_pb2
from tensorflow.core.protobuf import saver_pb2
from tensorflow.tools.graph_transforms import TransformGraph
from object_detection import exporter
from object_detection.builders import graph_rewriter_builder
from object_detection.builders import post_processing_builder
from object_detection.core import box_list
from lstm_object_detection import model_builder
_DEFAULT_NUM_CHANNELS = 3
_DEFAULT_NUM_COORD_BOX = 4
def get_const_center_size_encoded_anchors(anchors):
"""Exports center-size encoded anchors as a constant tensor.
Args:
anchors: a float32 tensor of shape [num_anchors, 4] containing the anchor
boxes
Returns:
encoded_anchors: a float32 constant tensor of shape [num_anchors, 4]
containing the anchor boxes.
"""
anchor_boxlist = box_list.BoxList(anchors)
y, x, h, w = anchor_boxlist.get_center_coordinates_and_sizes()
num_anchors = y.get_shape().as_list()
with tf.Session() as sess:
y_out, x_out, h_out, w_out = sess.run([y, x, h, w])
encoded_anchors = tf.constant(
np.transpose(np.stack((y_out, x_out, h_out, w_out))),
dtype=tf.float32,
shape=[num_anchors[0], _DEFAULT_NUM_COORD_BOX],
name='anchors')
return encoded_anchors
def append_postprocessing_op(frozen_graph_def,
max_detections,
max_classes_per_detection,
nms_score_threshold,
nms_iou_threshold,
num_classes,
scale_values,
detections_per_class=100,
use_regular_nms=False):
"""Appends postprocessing custom op.
Args:
frozen_graph_def: Frozen GraphDef for SSD model after freezing the
checkpoint
max_detections: Maximum number of detections (boxes) to show
max_classes_per_detection: Number of classes to display per detection
nms_score_threshold: Score threshold used in Non-maximal suppression in
post-processing
nms_iou_threshold: Intersection-over-union threshold used in Non-maximal
suppression in post-processing
num_classes: number of classes in SSD detector
scale_values: scale values is a dict with following key-value pairs
{y_scale: 10, x_scale: 10, h_scale: 5, w_scale: 5} that are used in decode
centersize boxes
detections_per_class: In regular NonMaxSuppression, number of anchors used
for NonMaxSuppression per class
use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
of Fast NMS.
Returns:
transformed_graph_def: Frozen GraphDef with postprocessing custom op
appended
TFLite_Detection_PostProcess custom op node has four outputs:
detection_boxes: a float32 tensor of shape [1, num_boxes, 4] with box
locations
detection_classes: a float32 tensor of shape [1, num_boxes]
with class indices
detection_scores: a float32 tensor of shape [1, num_boxes]
with class scores
num_boxes: a float32 tensor of size 1 containing the number of detected
boxes
"""
new_output = frozen_graph_def.node.add()
new_output.op = 'TFLite_Detection_PostProcess'
new_output.name = 'TFLite_Detection_PostProcess'
new_output.attr['_output_quantized'].CopyFrom(
attr_value_pb2.AttrValue(b=True))
new_output.attr['_output_types'].list.type.extend([
types_pb2.DT_FLOAT, types_pb2.DT_FLOAT, types_pb2.DT_FLOAT,
types_pb2.DT_FLOAT
])
new_output.attr['_support_output_type_float_in_quantized_op'].CopyFrom(
attr_value_pb2.AttrValue(b=True))
new_output.attr['max_detections'].CopyFrom(
attr_value_pb2.AttrValue(i=max_detections))
new_output.attr['max_classes_per_detection'].CopyFrom(
attr_value_pb2.AttrValue(i=max_classes_per_detection))
new_output.attr['nms_score_threshold'].CopyFrom(
attr_value_pb2.AttrValue(f=nms_score_threshold.pop()))
new_output.attr['nms_iou_threshold'].CopyFrom(
attr_value_pb2.AttrValue(f=nms_iou_threshold.pop()))
new_output.attr['num_classes'].CopyFrom(
attr_value_pb2.AttrValue(i=num_classes))
new_output.attr['y_scale'].CopyFrom(
attr_value_pb2.AttrValue(f=scale_values['y_scale'].pop()))
new_output.attr['x_scale'].CopyFrom(
attr_value_pb2.AttrValue(f=scale_values['x_scale'].pop()))
new_output.attr['h_scale'].CopyFrom(
attr_value_pb2.AttrValue(f=scale_values['h_scale'].pop()))
new_output.attr['w_scale'].CopyFrom(
attr_value_pb2.AttrValue(f=scale_values['w_scale'].pop()))
new_output.attr['detections_per_class'].CopyFrom(
attr_value_pb2.AttrValue(i=detections_per_class))
new_output.attr['use_regular_nms'].CopyFrom(
attr_value_pb2.AttrValue(b=use_regular_nms))
new_output.input.extend(
['raw_outputs/box_encodings', 'raw_outputs/class_predictions', 'anchors'])
# Transform the graph to append new postprocessing op
input_names = []
output_names = ['TFLite_Detection_PostProcess']
transforms = ['strip_unused_nodes']
transformed_graph_def = TransformGraph(frozen_graph_def, input_names,
output_names, transforms)
return transformed_graph_def
def export_tflite_graph(pipeline_config,
trained_checkpoint_prefix,
output_dir,
add_postprocessing_op,
max_detections,
max_classes_per_detection,
detections_per_class=100,
use_regular_nms=False,
binary_graph_name='tflite_graph.pb',
txt_graph_name='tflite_graph.pbtxt'):
"""Exports a tflite compatible graph and anchors for ssd detection model.
Anchors are written to a tensor and tflite compatible graph
is written to output_dir/tflite_graph.pb.
Args:
pipeline_config: Dictionary of configuration objects. Keys are `model`, `train_config`,
`train_input_config`, `eval_config`, `eval_input_config`, `lstm_model`.
Value are the corresponding config objects.
trained_checkpoint_prefix: a file prefix for the checkpoint containing the
trained parameters of the SSD model.
output_dir: A directory to write the tflite graph and anchor file to.
add_postprocessing_op: If add_postprocessing_op is true: frozen graph adds a
TFLite_Detection_PostProcess custom op
max_detections: Maximum number of detections (boxes) to show
max_classes_per_detection: Number of classes to display per detection
detections_per_class: In regular NonMaxSuppression, number of anchors used
for NonMaxSuppression per class
use_regular_nms: Flag to set postprocessing op to use Regular NMS instead
of Fast NMS.
binary_graph_name: Name of the exported graph file in binary format.
txt_graph_name: Name of the exported graph file in text format.
Raises:
ValueError: if the pipeline config contains models other than ssd or uses an
fixed_shape_resizer and provides a shape as well.
"""
model_config = pipeline_config['model']
lstm_config = pipeline_config['lstm_model']
eval_config = pipeline_config['eval_config']
tf.gfile.MakeDirs(output_dir)
if model_config.WhichOneof('model') != 'ssd':
raise ValueError('Only ssd models are supported in tflite. '
'Found {} in config'.format(
model_config.WhichOneof('model')))
num_classes = model_config.ssd.num_classes
nms_score_threshold = {
model_config.ssd.post_processing.batch_non_max_suppression.
score_threshold
}
nms_iou_threshold = {
model_config.ssd.post_processing.batch_non_max_suppression.
iou_threshold
}
scale_values = {}
scale_values['y_scale'] = {
model_config.ssd.box_coder.faster_rcnn_box_coder.y_scale
}
scale_values['x_scale'] = {
model_config.ssd.box_coder.faster_rcnn_box_coder.x_scale
}
scale_values['h_scale'] = {
model_config.ssd.box_coder.faster_rcnn_box_coder.height_scale
}
scale_values['w_scale'] = {
model_config.ssd.box_coder.faster_rcnn_box_coder.width_scale
}
image_resizer_config = model_config.ssd.image_resizer
image_resizer = image_resizer_config.WhichOneof('image_resizer_oneof')
num_channels = _DEFAULT_NUM_CHANNELS
if image_resizer == 'fixed_shape_resizer':
height = image_resizer_config.fixed_shape_resizer.height
width = image_resizer_config.fixed_shape_resizer.width
if image_resizer_config.fixed_shape_resizer.convert_to_grayscale:
num_channels = 1
#TODO(richardbrks) figure out how to make with a None defined batch size
shape = [lstm_config.eval_unroll_length, height, width, num_channels]
else:
raise ValueError(
'Only fixed_shape_resizer'
'is supported with tflite. Found {}'.format(
image_resizer_config.WhichOneof('image_resizer_oneof')))
video_tensor = tf.placeholder(
tf.float32, shape=shape, name='input_video_tensor')
detection_model = model_builder.build(model_config, lstm_config,
is_training=False)
preprocessed_video, true_image_shapes = detection_model.preprocess(
tf.to_float(video_tensor))
predicted_tensors = detection_model.predict(preprocessed_video,
true_image_shapes)
# predicted_tensors = detection_model.postprocess(predicted_tensors,
# true_image_shapes)
# The score conversion occurs before the post-processing custom op
_, score_conversion_fn = post_processing_builder.build(
model_config.ssd.post_processing)
class_predictions = score_conversion_fn(
predicted_tensors['class_predictions_with_background'])
with tf.name_scope('raw_outputs'):
# 'raw_outputs/box_encodings': a float32 tensor of shape [1, num_anchors, 4]
# containing the encoded box predictions. Note that these are raw
# predictions and no Non-Max suppression is applied on them and
# no decode center size boxes is applied to them.
tf.identity(predicted_tensors['box_encodings'], name='box_encodings')
# 'raw_outputs/class_predictions': a float32 tensor of shape
# [1, num_anchors, num_classes] containing the class scores for each anchor
# after applying score conversion.
tf.identity(class_predictions, name='class_predictions')
# 'anchors': a float32 tensor of shape
# [4, num_anchors] containing the anchors as a constant node.
tf.identity(
get_const_center_size_encoded_anchors(predicted_tensors['anchors']),
name='anchors')
# Add global step to the graph, so we know the training step number when we
# evaluate the model.
tf.train.get_or_create_global_step()
# graph rewriter
is_quantized = ('graph_rewriter' in pipeline_config)
if is_quantized:
graph_rewriter_config = pipeline_config['graph_rewriter']
graph_rewriter_fn = graph_rewriter_builder.build(
graph_rewriter_config, is_training=False, is_export=True)
graph_rewriter_fn()
if model_config.ssd.feature_extractor.HasField('fpn'):
exporter.rewrite_nn_resize_op(is_quantized)
# freeze the graph
saver_kwargs = {}
if eval_config.use_moving_averages:
saver_kwargs['write_version'] = saver_pb2.SaverDef.V1
moving_average_checkpoint = tempfile.NamedTemporaryFile()
exporter.replace_variable_values_with_moving_averages(
tf.get_default_graph(), trained_checkpoint_prefix,
moving_average_checkpoint.name)
checkpoint_to_use = moving_average_checkpoint.name
else:
checkpoint_to_use = trained_checkpoint_prefix
saver = tf.train.Saver(**saver_kwargs)
input_saver_def = saver.as_saver_def()
frozen_graph_def = exporter.freeze_graph_with_def_protos(
input_graph_def=tf.get_default_graph().as_graph_def(),
input_saver_def=input_saver_def,
input_checkpoint=checkpoint_to_use,
output_node_names=','.join([
'raw_outputs/box_encodings', 'raw_outputs/class_predictions',
'anchors'
]),
restore_op_name='save/restore_all',
filename_tensor_name='save/Const:0',
clear_devices=True,
output_graph='',
initializer_nodes='')
# Add new operation to do post processing in a custom op (TF Lite only)
#(richardbrks) Do use this or detection_model.postprocess?
if add_postprocessing_op:
transformed_graph_def = append_postprocessing_op(
frozen_graph_def, max_detections, max_classes_per_detection,
nms_score_threshold, nms_iou_threshold, num_classes, scale_values,
detections_per_class, use_regular_nms)
else:
# Return frozen without adding post-processing custom op
transformed_graph_def = frozen_graph_def
binary_graph = os.path.join(output_dir, binary_graph_name)
with tf.gfile.GFile(binary_graph, 'wb') as f:
f.write(transformed_graph_def.SerializeToString())
txt_graph = os.path.join(output_dir, txt_graph_name)
with tf.gfile.GFile(txt_graph, 'w') as f:
f.write(str(transformed_graph_def))
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment