Commit afd5579f authored by Kaushik Shivakumar's avatar Kaushik Shivakumar
Browse files

Merge remote-tracking branch 'upstream/master' into context_tf2

parents dcd96e02 567bd18d
...@@ -36,7 +36,7 @@ def normalize_image(image, original_minval, original_maxval, target_minval, ...@@ -36,7 +36,7 @@ def normalize_image(image, original_minval, original_maxval, target_minval,
Returns: Returns:
image: image which is the same shape as input image. image: image which is the same shape as input image.
""" """
with tf.name_scope('NormalizeImage', values=[image]): with tf.compat.v1.name_scope('NormalizeImage', values=[image]):
original_minval = float(original_minval) original_minval = float(original_minval)
original_maxval = float(original_maxval) original_maxval = float(original_maxval)
target_minval = float(target_minval) target_minval = float(target_minval)
...@@ -68,16 +68,17 @@ def generate_tfexample_image(input_example_strings, ...@@ -68,16 +68,17 @@ def generate_tfexample_image(input_example_strings,
A tensor with shape [batch_size, height, width, channels] of type float32 A tensor with shape [batch_size, height, width, channels] of type float32
with values in the range [0..1] with values in the range [0..1]
""" """
batch_size = tf.shape(input_example_strings)[0] batch_size = tf.shape(input=input_example_strings)[0]
images_shape = tf.stack( images_shape = tf.stack(
[batch_size, image_height, image_width, image_channels]) [batch_size, image_height, image_width, image_channels])
tf_example_image_key = 'image/encoded' tf_example_image_key = 'image/encoded'
feature_configs = { feature_configs = {
tf_example_image_key: tf_example_image_key:
tf.FixedLenFeature( tf.io.FixedLenFeature(
image_height * image_width * image_channels, dtype=tf.float32) image_height * image_width * image_channels, dtype=tf.float32)
} }
feature_tensors = tf.parse_example(input_example_strings, feature_configs) feature_tensors = tf.io.parse_example(
serialized=input_example_strings, features=feature_configs)
float_images = tf.reshape( float_images = tf.reshape(
normalize_image( normalize_image(
feature_tensors[tf_example_image_key], feature_tensors[tf_example_image_key],
...@@ -97,11 +98,11 @@ def attention_ocr_attention_masks(num_characters): ...@@ -97,11 +98,11 @@ def attention_ocr_attention_masks(num_characters):
names = ['%s/Softmax:0' % (prefix)] names = ['%s/Softmax:0' % (prefix)]
for i in range(1, num_characters): for i in range(1, num_characters):
names += ['%s_%d/Softmax:0' % (prefix, i)] names += ['%s_%d/Softmax:0' % (prefix, i)]
return [tf.get_default_graph().get_tensor_by_name(n) for n in names] return [tf.compat.v1.get_default_graph().get_tensor_by_name(n) for n in names]
def build_tensor_info(tensor_dict): def build_tensor_info(tensor_dict):
return { return {
k: tf.saved_model.utils.build_tensor_info(t) k: tf.compat.v1.saved_model.utils.build_tensor_info(t)
for k, t in tensor_dict.items() for k, t in tensor_dict.items()
} }
...@@ -29,7 +29,7 @@ _CHECKPOINT_URL = ( ...@@ -29,7 +29,7 @@ _CHECKPOINT_URL = (
def _clean_up(): def _clean_up():
tf.gfile.DeleteRecursively(tf.test.get_temp_dir()) tf.io.gfile.rmtree(tf.compat.v1.test.get_temp_dir())
def _create_tf_example_string(image): def _create_tf_example_string(image):
...@@ -47,7 +47,7 @@ class AttentionOcrExportTest(tf.test.TestCase): ...@@ -47,7 +47,7 @@ class AttentionOcrExportTest(tf.test.TestCase):
for suffix in ['.meta', '.index', '.data-00000-of-00001']: for suffix in ['.meta', '.index', '.data-00000-of-00001']:
filename = _CHECKPOINT + suffix filename = _CHECKPOINT + suffix
self.assertTrue( self.assertTrue(
tf.gfile.Exists(filename), tf.io.gfile.exists(filename),
msg='Missing checkpoint file %s. ' msg='Missing checkpoint file %s. '
'Please download and extract it from %s' % 'Please download and extract it from %s' %
(filename, _CHECKPOINT_URL)) (filename, _CHECKPOINT_URL))
...@@ -57,7 +57,8 @@ class AttentionOcrExportTest(tf.test.TestCase): ...@@ -57,7 +57,8 @@ class AttentionOcrExportTest(tf.test.TestCase):
os.path.dirname(__file__), 'datasets/testdata/fsns') os.path.dirname(__file__), 'datasets/testdata/fsns')
tf.test.TestCase.setUp(self) tf.test.TestCase.setUp(self)
_clean_up() _clean_up()
self.export_dir = os.path.join(tf.test.get_temp_dir(), 'exported_model') self.export_dir = os.path.join(
tf.compat.v1.test.get_temp_dir(), 'exported_model')
self.minimal_output_signature = { self.minimal_output_signature = {
'predictions': 'AttentionOcr_v1/predicted_chars:0', 'predictions': 'AttentionOcr_v1/predicted_chars:0',
'scores': 'AttentionOcr_v1/predicted_scores:0', 'scores': 'AttentionOcr_v1/predicted_scores:0',
...@@ -93,10 +94,10 @@ class AttentionOcrExportTest(tf.test.TestCase): ...@@ -93,10 +94,10 @@ class AttentionOcrExportTest(tf.test.TestCase):
size=self.dataset.image_shape).astype('uint8'), size=self.dataset.image_shape).astype('uint8'),
} }
signature_def = graph_def.signature_def[ signature_def = graph_def.signature_def[
tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY] tf.saved_model.DEFAULT_SERVING_SIGNATURE_DEF_KEY]
if serving: if serving:
input_name = signature_def.inputs[ input_name = signature_def.inputs[
tf.saved_model.signature_constants.CLASSIFY_INPUTS].name tf.saved_model.CLASSIFY_INPUTS].name
# Model for serving takes input: inputs['inputs'] = 'tf_example:0' # Model for serving takes input: inputs['inputs'] = 'tf_example:0'
feed_dict = { feed_dict = {
input_name: [ input_name: [
...@@ -126,11 +127,11 @@ class AttentionOcrExportTest(tf.test.TestCase): ...@@ -126,11 +127,11 @@ class AttentionOcrExportTest(tf.test.TestCase):
export_for_serving: True if the model was exported for Serving. This export_for_serving: True if the model was exported for Serving. This
affects how input is fed into the model. affects how input is fed into the model.
""" """
tf.reset_default_graph() tf.compat.v1.reset_default_graph()
sess = tf.Session() sess = tf.compat.v1.Session()
graph_def = tf.saved_model.loader.load( graph_def = tf.compat.v1.saved_model.loader.load(
sess=sess, sess=sess,
tags=[tf.saved_model.tag_constants.SERVING], tags=[tf.saved_model.SERVING],
export_dir=self.export_dir) export_dir=self.export_dir)
feed_dict = self.create_input_feed(graph_def, export_for_serving) feed_dict = self.create_input_feed(graph_def, export_for_serving)
results = sess.run(self.minimal_output_signature, feed_dict=feed_dict) results = sess.run(self.minimal_output_signature, feed_dict=feed_dict)
......
...@@ -52,7 +52,7 @@ class ModelTest(tf.test.TestCase): ...@@ -52,7 +52,7 @@ class ModelTest(tf.test.TestCase):
self.num_char_classes) self.num_char_classes)
self.length_logit_shape = (self.batch_size, self.seq_length + 1) self.length_logit_shape = (self.batch_size, self.seq_length + 1)
# Placeholder knows image dimensions, but not batch size. # Placeholder knows image dimensions, but not batch size.
self.input_images = tf.placeholder( self.input_images = tf.compat.v1.placeholder(
tf.float32, tf.float32,
shape=(None, self.image_height, self.image_width, 3), shape=(None, self.image_height, self.image_width, 3),
name='input_node') name='input_node')
...@@ -89,8 +89,8 @@ class ModelTest(tf.test.TestCase): ...@@ -89,8 +89,8 @@ class ModelTest(tf.test.TestCase):
with self.test_session() as sess: with self.test_session() as sess:
endpoints_tf = ocr_model.create_base( endpoints_tf = ocr_model.create_base(
images=self.input_images, labels_one_hot=None) images=self.input_images, labels_one_hot=None)
sess.run(tf.global_variables_initializer()) sess.run(tf.compat.v1.global_variables_initializer())
tf.tables_initializer().run() tf.compat.v1.tables_initializer().run()
endpoints = sess.run( endpoints = sess.run(
endpoints_tf, feed_dict={self.input_images: self.fake_images}) endpoints_tf, feed_dict={self.input_images: self.fake_images})
...@@ -127,7 +127,7 @@ class ModelTest(tf.test.TestCase): ...@@ -127,7 +127,7 @@ class ModelTest(tf.test.TestCase):
ocr_model = self.create_model() ocr_model = self.create_model()
conv_tower = ocr_model.conv_tower_fn(self.input_images) conv_tower = ocr_model.conv_tower_fn(self.input_images)
sess.run(tf.global_variables_initializer()) sess.run(tf.compat.v1.global_variables_initializer())
conv_tower_np = sess.run( conv_tower_np = sess.run(
conv_tower, feed_dict={self.input_images: self.fake_images}) conv_tower, feed_dict={self.input_images: self.fake_images})
...@@ -141,9 +141,9 @@ class ModelTest(tf.test.TestCase): ...@@ -141,9 +141,9 @@ class ModelTest(tf.test.TestCase):
ocr_model = self.create_model() ocr_model = self.create_model()
ocr_model.create_base(images=self.input_images, labels_one_hot=None) ocr_model.create_base(images=self.input_images, labels_one_hot=None)
with self.test_session() as sess: with self.test_session() as sess:
tfprof_root = tf.profiler.profile( tfprof_root = tf.compat.v1.profiler.profile(
sess.graph, sess.graph,
options=tf.profiler.ProfileOptionBuilder options=tf.compat.v1.profiler.ProfileOptionBuilder
.trainable_variables_parameter()) .trainable_variables_parameter())
model_size_bytes = 4 * tfprof_root.total_parameters model_size_bytes = 4 * tfprof_root.total_parameters
...@@ -163,9 +163,9 @@ class ModelTest(tf.test.TestCase): ...@@ -163,9 +163,9 @@ class ModelTest(tf.test.TestCase):
summaries = ocr_model.create_summaries( summaries = ocr_model.create_summaries(
data, endpoints, charset, is_training=False) data, endpoints, charset, is_training=False)
with self.test_session() as sess: with self.test_session() as sess:
sess.run(tf.global_variables_initializer()) sess.run(tf.compat.v1.global_variables_initializer())
sess.run(tf.local_variables_initializer()) sess.run(tf.compat.v1.local_variables_initializer())
tf.tables_initializer().run() tf.compat.v1.tables_initializer().run()
sess.run(summaries) # just check it is runnable sess.run(summaries) # just check it is runnable
def test_sequence_loss_function_without_label_smoothing(self): def test_sequence_loss_function_without_label_smoothing(self):
...@@ -188,7 +188,7 @@ class ModelTest(tf.test.TestCase): ...@@ -188,7 +188,7 @@ class ModelTest(tf.test.TestCase):
Returns: Returns:
a list of tensors with encoded image coordinates in them. a list of tensors with encoded image coordinates in them.
""" """
batch_size = tf.shape(net)[0] batch_size = tf.shape(input=net)[0]
_, h, w, _ = net.shape.as_list() _, h, w, _ = net.shape.as_list()
h_loc = [ h_loc = [
tf.tile( tf.tile(
...@@ -200,7 +200,8 @@ class ModelTest(tf.test.TestCase): ...@@ -200,7 +200,8 @@ class ModelTest(tf.test.TestCase):
h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2) h_loc = tf.concat([tf.expand_dims(t, 2) for t in h_loc], 2)
w_loc = [ w_loc = [
tf.tile( tf.tile(
tf.contrib.layers.one_hot_encoding(tf.constant([i]), num_classes=w), tf.contrib.layers.one_hot_encoding(
tf.constant([i]), num_classes=w),
[h, 1]) for i in range(w) [h, 1]) for i in range(w)
] ]
w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2) w_loc = tf.concat([tf.expand_dims(t, 2) for t in w_loc], 2)
...@@ -272,8 +273,8 @@ class ModelTest(tf.test.TestCase): ...@@ -272,8 +273,8 @@ class ModelTest(tf.test.TestCase):
endpoints_tf = ocr_model.create_base( endpoints_tf = ocr_model.create_base(
images=self.fake_images, labels_one_hot=None) images=self.fake_images, labels_one_hot=None)
sess.run(tf.global_variables_initializer()) sess.run(tf.compat.v1.global_variables_initializer())
tf.tables_initializer().run() tf.compat.v1.tables_initializer().run()
endpoints = sess.run(endpoints_tf) endpoints = sess.run(endpoints_tf)
self.assertEqual(endpoints.predicted_text.shape, (self.batch_size,)) self.assertEqual(endpoints.predicted_text.shape, (self.batch_size,))
...@@ -289,7 +290,7 @@ class CharsetMapperTest(tf.test.TestCase): ...@@ -289,7 +290,7 @@ class CharsetMapperTest(tf.test.TestCase):
charset_mapper = model.CharsetMapper(charset) charset_mapper = model.CharsetMapper(charset)
with self.test_session() as sess: with self.test_session() as sess:
tf.tables_initializer().run() tf.compat.v1.tables_initializer().run()
text = sess.run(charset_mapper.get_text(ids)) text = sess.run(charset_mapper.get_text(ids))
self.assertAllEqual(text, [b'hello', b'world']) self.assertAllEqual(text, [b'hello', b'world'])
......
...@@ -111,12 +111,12 @@ class SequenceLayerBase(object): ...@@ -111,12 +111,12 @@ class SequenceLayerBase(object):
self._mparams = method_params self._mparams = method_params
self._net = net self._net = net
self._labels_one_hot = labels_one_hot self._labels_one_hot = labels_one_hot
self._batch_size = tf.shape(net)[0] self._batch_size = tf.shape(input=net)[0]
# Initialize parameters for char logits which will be computed on the fly # Initialize parameters for char logits which will be computed on the fly
# inside an LSTM decoder. # inside an LSTM decoder.
self._char_logits = {} self._char_logits = {}
regularizer = slim.l2_regularizer(self._mparams.weight_decay) regularizer = tf.keras.regularizers.l2(0.5 * (self._mparams.weight_decay))
self._softmax_w = slim.model_variable( self._softmax_w = slim.model_variable(
'softmax_w', 'softmax_w',
[self._mparams.num_lstm_units, self._params.num_char_classes], [self._mparams.num_lstm_units, self._params.num_char_classes],
...@@ -124,7 +124,7 @@ class SequenceLayerBase(object): ...@@ -124,7 +124,7 @@ class SequenceLayerBase(object):
regularizer=regularizer) regularizer=regularizer)
self._softmax_b = slim.model_variable( self._softmax_b = slim.model_variable(
'softmax_b', [self._params.num_char_classes], 'softmax_b', [self._params.num_char_classes],
initializer=tf.zeros_initializer(), initializer=tf.compat.v1.zeros_initializer(),
regularizer=regularizer) regularizer=regularizer)
@abc.abstractmethod @abc.abstractmethod
...@@ -203,8 +203,8 @@ class SequenceLayerBase(object): ...@@ -203,8 +203,8 @@ class SequenceLayerBase(object):
A tensor with shape [batch_size, num_char_classes] A tensor with shape [batch_size, num_char_classes]
""" """
if char_index not in self._char_logits: if char_index not in self._char_logits:
self._char_logits[char_index] = tf.nn.xw_plus_b(inputs, self._softmax_w, self._char_logits[char_index] = tf.compat.v1.nn.xw_plus_b(inputs, self._softmax_w,
self._softmax_b) self._softmax_b)
return self._char_logits[char_index] return self._char_logits[char_index]
def char_one_hot(self, logit): def char_one_hot(self, logit):
...@@ -216,7 +216,7 @@ class SequenceLayerBase(object): ...@@ -216,7 +216,7 @@ class SequenceLayerBase(object):
Returns: Returns:
A tensor with shape [batch_size, num_char_classes] A tensor with shape [batch_size, num_char_classes]
""" """
prediction = tf.argmax(logit, axis=1) prediction = tf.argmax(input=logit, axis=1)
return slim.one_hot_encoding(prediction, self._params.num_char_classes) return slim.one_hot_encoding(prediction, self._params.num_char_classes)
def get_input(self, prev, i): def get_input(self, prev, i):
...@@ -244,10 +244,10 @@ class SequenceLayerBase(object): ...@@ -244,10 +244,10 @@ class SequenceLayerBase(object):
Returns: Returns:
A tensor with shape [batch_size, seq_length, num_char_classes]. A tensor with shape [batch_size, seq_length, num_char_classes].
""" """
with tf.variable_scope('LSTM'): with tf.compat.v1.variable_scope('LSTM'):
first_label = self.get_input(prev=None, i=0) first_label = self.get_input(prev=None, i=0)
decoder_inputs = [first_label] + [None] * (self._params.seq_length - 1) decoder_inputs = [first_label] + [None] * (self._params.seq_length - 1)
lstm_cell = tf.contrib.rnn.LSTMCell( lstm_cell = tf.compat.v1.nn.rnn_cell.LSTMCell(
self._mparams.num_lstm_units, self._mparams.num_lstm_units,
use_peepholes=False, use_peepholes=False,
cell_clip=self._mparams.lstm_state_clip_value, cell_clip=self._mparams.lstm_state_clip_value,
...@@ -259,9 +259,9 @@ class SequenceLayerBase(object): ...@@ -259,9 +259,9 @@ class SequenceLayerBase(object):
loop_function=self.get_input, loop_function=self.get_input,
cell=lstm_cell) cell=lstm_cell)
with tf.variable_scope('logits'): with tf.compat.v1.variable_scope('logits'):
logits_list = [ logits_list = [
tf.expand_dims(self.char_logit(logit, i), dim=1) tf.expand_dims(self.char_logit(logit, i), axis=1)
for i, logit in enumerate(lstm_outputs) for i, logit in enumerate(lstm_outputs)
] ]
......
...@@ -29,13 +29,13 @@ import sequence_layers ...@@ -29,13 +29,13 @@ import sequence_layers
def fake_net(batch_size, num_features, feature_size): def fake_net(batch_size, num_features, feature_size):
return tf.convert_to_tensor( return tf.convert_to_tensor(
np.random.uniform(size=(batch_size, num_features, feature_size)), value=np.random.uniform(size=(batch_size, num_features, feature_size)),
dtype=tf.float32) dtype=tf.float32)
def fake_labels(batch_size, seq_length, num_char_classes): def fake_labels(batch_size, seq_length, num_char_classes):
labels_np = tf.convert_to_tensor( labels_np = tf.convert_to_tensor(
np.random.randint( value=np.random.randint(
low=0, high=num_char_classes, size=(batch_size, seq_length))) low=0, high=num_char_classes, size=(batch_size, seq_length)))
return slim.one_hot_encoding(labels_np, num_classes=num_char_classes) return slim.one_hot_encoding(labels_np, num_classes=num_char_classes)
......
...@@ -96,16 +96,16 @@ def get_training_hparams(): ...@@ -96,16 +96,16 @@ def get_training_hparams():
def create_optimizer(hparams): def create_optimizer(hparams):
"""Creates optimized based on the specified flags.""" """Creates optimized based on the specified flags."""
if hparams.optimizer == 'momentum': if hparams.optimizer == 'momentum':
optimizer = tf.train.MomentumOptimizer( optimizer = tf.compat.v1.train.MomentumOptimizer(
hparams.learning_rate, momentum=hparams.momentum) hparams.learning_rate, momentum=hparams.momentum)
elif hparams.optimizer == 'adam': elif hparams.optimizer == 'adam':
optimizer = tf.train.AdamOptimizer(hparams.learning_rate) optimizer = tf.compat.v1.train.AdamOptimizer(hparams.learning_rate)
elif hparams.optimizer == 'adadelta': elif hparams.optimizer == 'adadelta':
optimizer = tf.train.AdadeltaOptimizer(hparams.learning_rate) optimizer = tf.compat.v1.train.AdadeltaOptimizer(hparams.learning_rate)
elif hparams.optimizer == 'adagrad': elif hparams.optimizer == 'adagrad':
optimizer = tf.train.AdagradOptimizer(hparams.learning_rate) optimizer = tf.compat.v1.train.AdagradOptimizer(hparams.learning_rate)
elif hparams.optimizer == 'rmsprop': elif hparams.optimizer == 'rmsprop':
optimizer = tf.train.RMSPropOptimizer( optimizer = tf.compat.v1.train.RMSPropOptimizer(
hparams.learning_rate, momentum=hparams.momentum) hparams.learning_rate, momentum=hparams.momentum)
return optimizer return optimizer
...@@ -154,14 +154,14 @@ def train(loss, init_fn, hparams): ...@@ -154,14 +154,14 @@ def train(loss, init_fn, hparams):
def prepare_training_dir(): def prepare_training_dir():
if not tf.gfile.Exists(FLAGS.train_log_dir): if not tf.io.gfile.exists(FLAGS.train_log_dir):
logging.info('Create a new training directory %s', FLAGS.train_log_dir) logging.info('Create a new training directory %s', FLAGS.train_log_dir)
tf.gfile.MakeDirs(FLAGS.train_log_dir) tf.io.gfile.makedirs(FLAGS.train_log_dir)
else: else:
if FLAGS.reset_train_dir: if FLAGS.reset_train_dir:
logging.info('Reset the training directory %s', FLAGS.train_log_dir) logging.info('Reset the training directory %s', FLAGS.train_log_dir)
tf.gfile.DeleteRecursively(FLAGS.train_log_dir) tf.io.gfile.rmtree(FLAGS.train_log_dir)
tf.gfile.MakeDirs(FLAGS.train_log_dir) tf.io.gfile.makedirs(FLAGS.train_log_dir)
else: else:
logging.info('Use already existing training directory %s', logging.info('Use already existing training directory %s',
FLAGS.train_log_dir) FLAGS.train_log_dir)
...@@ -169,7 +169,7 @@ def prepare_training_dir(): ...@@ -169,7 +169,7 @@ def prepare_training_dir():
def calculate_graph_metrics(): def calculate_graph_metrics():
param_stats = model_analyzer.print_model_analysis( param_stats = model_analyzer.print_model_analysis(
tf.get_default_graph(), tf.compat.v1.get_default_graph(),
tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS) tfprof_options=model_analyzer.TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
return param_stats.total_parameters return param_stats.total_parameters
...@@ -186,7 +186,7 @@ def main(_): ...@@ -186,7 +186,7 @@ def main(_):
# If ps_tasks is zero, the local device is used. When using multiple # If ps_tasks is zero, the local device is used. When using multiple
# (non-local) replicas, the ReplicaDeviceSetter distributes the variables # (non-local) replicas, the ReplicaDeviceSetter distributes the variables
# across the different devices. # across the different devices.
device_setter = tf.train.replica_device_setter( device_setter = tf.compat.v1.train.replica_device_setter(
FLAGS.ps_tasks, merge_devices=True) FLAGS.ps_tasks, merge_devices=True)
with tf.device(device_setter): with tf.device(device_setter):
data = data_provider.get_data( data = data_provider.get_data(
......
...@@ -37,16 +37,16 @@ def logits_to_log_prob(logits): ...@@ -37,16 +37,16 @@ def logits_to_log_prob(logits):
probabilities. probabilities.
""" """
with tf.variable_scope('log_probabilities'): with tf.compat.v1.variable_scope('log_probabilities'):
reduction_indices = len(logits.shape.as_list()) - 1 reduction_indices = len(logits.shape.as_list()) - 1
max_logits = tf.reduce_max( max_logits = tf.reduce_max(
logits, reduction_indices=reduction_indices, keep_dims=True) input_tensor=logits, axis=reduction_indices, keepdims=True)
safe_logits = tf.subtract(logits, max_logits) safe_logits = tf.subtract(logits, max_logits)
sum_exp = tf.reduce_sum( sum_exp = tf.reduce_sum(
tf.exp(safe_logits), input_tensor=tf.exp(safe_logits),
reduction_indices=reduction_indices, axis=reduction_indices,
keep_dims=True) keepdims=True)
log_probs = tf.subtract(safe_logits, tf.log(sum_exp)) log_probs = tf.subtract(safe_logits, tf.math.log(sum_exp))
return log_probs return log_probs
...@@ -91,7 +91,7 @@ def ConvertAllInputsToTensors(func): ...@@ -91,7 +91,7 @@ def ConvertAllInputsToTensors(func):
""" """
def FuncWrapper(*args): def FuncWrapper(*args):
tensors = [tf.convert_to_tensor(a) for a in args] tensors = [tf.convert_to_tensor(value=a) for a in args]
return func(*tensors) return func(*tensors)
return FuncWrapper return FuncWrapper
...@@ -109,6 +109,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer ...@@ -109,6 +109,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer
Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi, Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi,
Yixin Shi, Yu-hui Chen, Zhichao Lu. Yixin Shi, Yu-hui Chen, Zhichao Lu.
### MobileDet GPU
We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP
higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson
Xavier at comparable latency (3.2ms vs 3.3ms).
Along with the model definition, we are also releasing model checkpoints trained
on the COCO dataset.
<b>Thanks to contributors</b>: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An
(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA).
### Context R-CNN ### Context R-CNN
We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that
......
...@@ -39,6 +39,7 @@ from object_detection.protos import losses_pb2 ...@@ -39,6 +39,7 @@ from object_detection.protos import losses_pb2
from object_detection.protos import model_pb2 from object_detection.protos import model_pb2
from object_detection.utils import label_map_util from object_detection.utils import label_map_util
from object_detection.utils import ops from object_detection.utils import ops
from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import tf_version from object_detection.utils import tf_version
## Feature Extractors for TF ## Feature Extractors for TF
...@@ -48,6 +49,7 @@ from object_detection.utils import tf_version ...@@ -48,6 +49,7 @@ from object_detection.utils import tf_version
# pylint: disable=g-import-not-at-top # pylint: disable=g-import-not-at-top
if tf_version.is_tf2(): if tf_version.is_tf2():
from object_detection.models import center_net_hourglass_feature_extractor from object_detection.models import center_net_hourglass_feature_extractor
from object_detection.models import center_net_mobilenet_v2_feature_extractor
from object_detection.models import center_net_resnet_feature_extractor from object_detection.models import center_net_resnet_feature_extractor
from object_detection.models import center_net_resnet_v1_fpn_feature_extractor from object_detection.models import center_net_resnet_v1_fpn_feature_extractor
from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras from object_detection.models import faster_rcnn_inception_resnet_v2_keras_feature_extractor as frcnn_inc_res_keras
...@@ -140,11 +142,18 @@ if tf_version.is_tf2(): ...@@ -140,11 +142,18 @@ if tf_version.is_tf2():
CENTER_NET_EXTRACTOR_FUNCTION_MAP = { CENTER_NET_EXTRACTOR_FUNCTION_MAP = {
'resnet_v2_50': center_net_resnet_feature_extractor.resnet_v2_50, 'resnet_v2_50': center_net_resnet_feature_extractor.resnet_v2_50,
'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101, 'resnet_v2_101': center_net_resnet_feature_extractor.resnet_v2_101,
'resnet_v1_18_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_18_fpn,
'resnet_v1_34_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_34_fpn,
'resnet_v1_50_fpn': 'resnet_v1_50_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_50_fpn, center_net_resnet_v1_fpn_feature_extractor.resnet_v1_50_fpn,
'resnet_v1_101_fpn': 'resnet_v1_101_fpn':
center_net_resnet_v1_fpn_feature_extractor.resnet_v1_101_fpn, center_net_resnet_v1_fpn_feature_extractor.resnet_v1_101_fpn,
'hourglass_104': center_net_hourglass_feature_extractor.hourglass_104, 'hourglass_104':
center_net_hourglass_feature_extractor.hourglass_104,
'mobilenet_v2':
center_net_mobilenet_v2_feature_extractor.mobilenet_v2,
} }
FEATURE_EXTRACTOR_MAPS = [ FEATURE_EXTRACTOR_MAPS = [
...@@ -648,8 +657,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -648,8 +657,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
second_stage_localization_loss_weight) second_stage_localization_loss_weight)
crop_and_resize_fn = ( crop_and_resize_fn = (
ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize spatial_ops.multilevel_matmul_crop_and_resize
else ops.native_crop_and_resize) if frcnn_config.use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
clip_anchors_to_image = ( clip_anchors_to_image = (
frcnn_config.clip_anchors_to_image) frcnn_config.clip_anchors_to_image)
......
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "inference_from_saved_model_tf2_colab.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "cT5cdSLPX0ui"
},
"source": [
"# Intro to Object Detection Colab\n",
"\n",
"Welcome to the object detection colab! This demo will take you through the steps of running an \"out-of-the-box\" detection model in SavedModel format on a collection of images.\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"colab_type": "text",
"id": "vPs64QA1Zdov"
},
"source": [
"Imports"
]
},
{
"cell_type": "code",
"metadata": {
"id": "OBzb04bdNGM8",
"colab_type": "code",
"colab": {}
},
"source": [
"!pip install -U --pre tensorflow==\"2.2.0\""
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "NgSXyvKSNHIl",
"colab_type": "code",
"colab": {}
},
"source": [
"import os\n",
"import pathlib\n",
"\n",
"# Clone the tensorflow models repository if it doesn't already exist\n",
"if \"models\" in pathlib.Path.cwd().parts:\n",
" while \"models\" in pathlib.Path.cwd().parts:\n",
" os.chdir('..')\n",
"elif not pathlib.Path('models').exists():\n",
" !git clone --depth 1 https://github.com/tensorflow/models"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "rhpPgW7TNLs6",
"colab_type": "code",
"colab": {}
},
"source": [
"# Install the Object Detection API\n",
"%%bash\n",
"cd models/research/\n",
"protoc object_detection/protos/*.proto --python_out=.\n",
"cp object_detection/packages/tf2/setup.py .\n",
"python -m pip install ."
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "yn5_uV1HLvaz",
"colab": {}
},
"source": [
"import io\n",
"import os\n",
"import scipy.misc\n",
"import numpy as np\n",
"import six\n",
"import time\n",
"\n",
"from six import BytesIO\n",
"\n",
"import matplotlib\n",
"import matplotlib.pyplot as plt\n",
"from PIL import Image, ImageDraw, ImageFont\n",
"\n",
"import tensorflow as tf\n",
"from object_detection.utils import visualization_utils as viz_utils\n",
"\n",
"%matplotlib inline"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "-y9R0Xllefec",
"colab": {}
},
"source": [
"def load_image_into_numpy_array(path):\n",
" \"\"\"Load an image from file into a numpy array.\n",
"\n",
" Puts image into numpy array to feed into tensorflow graph.\n",
" Note that by convention we put it into a numpy array with shape\n",
" (height, width, channels), where channels=3 for RGB.\n",
"\n",
" Args:\n",
" path: a file path (this can be local or on colossus)\n",
"\n",
" Returns:\n",
" uint8 numpy array with shape (img_height, img_width, 3)\n",
" \"\"\"\n",
" img_data = tf.io.gfile.GFile(path, 'rb').read()\n",
" image = Image.open(BytesIO(img_data))\n",
" (im_width, im_height) = image.size\n",
" return np.array(image.getdata()).reshape(\n",
" (im_height, im_width, 3)).astype(np.uint8)\n",
"\n",
"# Load the COCO Label Map\n",
"category_index = {\n",
" 1: {'id': 1, 'name': 'person'},\n",
" 2: {'id': 2, 'name': 'bicycle'},\n",
" 3: {'id': 3, 'name': 'car'},\n",
" 4: {'id': 4, 'name': 'motorcycle'},\n",
" 5: {'id': 5, 'name': 'airplane'},\n",
" 6: {'id': 6, 'name': 'bus'},\n",
" 7: {'id': 7, 'name': 'train'},\n",
" 8: {'id': 8, 'name': 'truck'},\n",
" 9: {'id': 9, 'name': 'boat'},\n",
" 10: {'id': 10, 'name': 'traffic light'},\n",
" 11: {'id': 11, 'name': 'fire hydrant'},\n",
" 13: {'id': 13, 'name': 'stop sign'},\n",
" 14: {'id': 14, 'name': 'parking meter'},\n",
" 15: {'id': 15, 'name': 'bench'},\n",
" 16: {'id': 16, 'name': 'bird'},\n",
" 17: {'id': 17, 'name': 'cat'},\n",
" 18: {'id': 18, 'name': 'dog'},\n",
" 19: {'id': 19, 'name': 'horse'},\n",
" 20: {'id': 20, 'name': 'sheep'},\n",
" 21: {'id': 21, 'name': 'cow'},\n",
" 22: {'id': 22, 'name': 'elephant'},\n",
" 23: {'id': 23, 'name': 'bear'},\n",
" 24: {'id': 24, 'name': 'zebra'},\n",
" 25: {'id': 25, 'name': 'giraffe'},\n",
" 27: {'id': 27, 'name': 'backpack'},\n",
" 28: {'id': 28, 'name': 'umbrella'},\n",
" 31: {'id': 31, 'name': 'handbag'},\n",
" 32: {'id': 32, 'name': 'tie'},\n",
" 33: {'id': 33, 'name': 'suitcase'},\n",
" 34: {'id': 34, 'name': 'frisbee'},\n",
" 35: {'id': 35, 'name': 'skis'},\n",
" 36: {'id': 36, 'name': 'snowboard'},\n",
" 37: {'id': 37, 'name': 'sports ball'},\n",
" 38: {'id': 38, 'name': 'kite'},\n",
" 39: {'id': 39, 'name': 'baseball bat'},\n",
" 40: {'id': 40, 'name': 'baseball glove'},\n",
" 41: {'id': 41, 'name': 'skateboard'},\n",
" 42: {'id': 42, 'name': 'surfboard'},\n",
" 43: {'id': 43, 'name': 'tennis racket'},\n",
" 44: {'id': 44, 'name': 'bottle'},\n",
" 46: {'id': 46, 'name': 'wine glass'},\n",
" 47: {'id': 47, 'name': 'cup'},\n",
" 48: {'id': 48, 'name': 'fork'},\n",
" 49: {'id': 49, 'name': 'knife'},\n",
" 50: {'id': 50, 'name': 'spoon'},\n",
" 51: {'id': 51, 'name': 'bowl'},\n",
" 52: {'id': 52, 'name': 'banana'},\n",
" 53: {'id': 53, 'name': 'apple'},\n",
" 54: {'id': 54, 'name': 'sandwich'},\n",
" 55: {'id': 55, 'name': 'orange'},\n",
" 56: {'id': 56, 'name': 'broccoli'},\n",
" 57: {'id': 57, 'name': 'carrot'},\n",
" 58: {'id': 58, 'name': 'hot dog'},\n",
" 59: {'id': 59, 'name': 'pizza'},\n",
" 60: {'id': 60, 'name': 'donut'},\n",
" 61: {'id': 61, 'name': 'cake'},\n",
" 62: {'id': 62, 'name': 'chair'},\n",
" 63: {'id': 63, 'name': 'couch'},\n",
" 64: {'id': 64, 'name': 'potted plant'},\n",
" 65: {'id': 65, 'name': 'bed'},\n",
" 67: {'id': 67, 'name': 'dining table'},\n",
" 70: {'id': 70, 'name': 'toilet'},\n",
" 72: {'id': 72, 'name': 'tv'},\n",
" 73: {'id': 73, 'name': 'laptop'},\n",
" 74: {'id': 74, 'name': 'mouse'},\n",
" 75: {'id': 75, 'name': 'remote'},\n",
" 76: {'id': 76, 'name': 'keyboard'},\n",
" 77: {'id': 77, 'name': 'cell phone'},\n",
" 78: {'id': 78, 'name': 'microwave'},\n",
" 79: {'id': 79, 'name': 'oven'},\n",
" 80: {'id': 80, 'name': 'toaster'},\n",
" 81: {'id': 81, 'name': 'sink'},\n",
" 82: {'id': 82, 'name': 'refrigerator'},\n",
" 84: {'id': 84, 'name': 'book'},\n",
" 85: {'id': 85, 'name': 'clock'},\n",
" 86: {'id': 86, 'name': 'vase'},\n",
" 87: {'id': 87, 'name': 'scissors'},\n",
" 88: {'id': 88, 'name': 'teddy bear'},\n",
" 89: {'id': 89, 'name': 'hair drier'},\n",
" 90: {'id': 90, 'name': 'toothbrush'},\n",
"}"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "QwcBC2TlPSwg",
"colab_type": "code",
"colab": {}
},
"source": [
"# Download the saved model and put it into models/research/object_detection/test_data/\n",
"!wget http://download.tensorflow.org/models/object_detection/tf2/20200711/efficientdet_d5_coco17_tpu-32.tar.gz\n",
"!tar -xf efficientdet_d5_coco17_tpu-32.tar.gz\n",
"!mv efficientdet_d5_coco17_tpu-32/ models/research/object_detection/test_data/"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "Z2p-PmKLYCVU",
"colab": {}
},
"source": [
"start_time = time.time()\n",
"tf.keras.backend.clear_session()\n",
"detect_fn = tf.saved_model.load('models/research/object_detection/test_data/efficientdet_d5_coco17_tpu-32/saved_model/')\n",
"end_time = time.time()\n",
"elapsed_time = end_time - start_time\n",
"print('Elapsed time: ' + str(elapsed_time) + 's')"
],
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab_type": "code",
"id": "vukkhd5-9NSL",
"colab": {}
},
"source": [
"import time\n",
"\n",
"image_dir = 'models/research/object_detection/test_images'\n",
"\n",
"elapsed = []\n",
"for i in range(2):\n",
" image_path = os.path.join(image_dir, 'image' + str(i + 1) + '.jpg')\n",
" image_np = load_image_into_numpy_array(image_path)\n",
" input_tensor = np.expand_dims(image_np, 0)\n",
" start_time = time.time()\n",
" detections = detect_fn(input_tensor)\n",
" end_time = time.time()\n",
" elapsed.append(end_time - start_time)\n",
"\n",
" plt.rcParams['figure.figsize'] = [42, 21]\n",
" label_id_offset = 1\n",
" image_np_with_detections = image_np.copy()\n",
" viz_utils.visualize_boxes_and_labels_on_image_array(\n",
" image_np_with_detections,\n",
" detections['detection_boxes'][0].numpy(),\n",
" detections['detection_classes'][0].numpy().astype(np.int32),\n",
" detections['detection_scores'][0].numpy(),\n",
" category_index,\n",
" use_normalized_coordinates=True,\n",
" max_boxes_to_draw=200,\n",
" min_score_thresh=.40,\n",
" agnostic_mode=False)\n",
" plt.subplot(2, 1, i+1)\n",
" plt.imshow(image_np_with_detections)\n",
"\n",
"mean_elapsed = sum(elapsed) / float(len(elapsed))\n",
"print('Elapsed time: ' + str(mean_elapsed) + ' second per image')"
],
"execution_count": null,
"outputs": []
}
]
}
\ No newline at end of file
...@@ -1600,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object): ...@@ -1600,6 +1600,17 @@ class CenterNetKeypointTargetAssigner(object):
return (batch_indices, batch_offsets, batch_weights) return (batch_indices, batch_offsets, batch_weights)
def _resize_masks(masks, height, width, method):
# Resize segmentation masks to conform to output dimensions. Use TF2
# image resize because TF1's version is buggy:
# https://yaqs.corp.google.com/eng/q/4970450458378240
masks = tf2.image.resize(
masks[:, :, :, tf.newaxis],
size=(height, width),
method=method)
return masks[:, :, :, 0]
class CenterNetMaskTargetAssigner(object): class CenterNetMaskTargetAssigner(object):
"""Wrapper to compute targets for segmentation masks.""" """Wrapper to compute targets for segmentation masks."""
...@@ -1641,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object): ...@@ -1641,13 +1652,9 @@ class CenterNetMaskTargetAssigner(object):
segmentation_targets_list = [] segmentation_targets_list = []
for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list): for gt_masks, gt_classes in zip(gt_masks_list, gt_classes_list):
# Resize segmentation masks to conform to output dimensions. Use TF2 gt_masks = _resize_masks(gt_masks, output_height, output_width,
# image resize because TF1's version is buggy: mask_resize_method)
# https://yaqs.corp.google.com/eng/q/4970450458378240 gt_masks = gt_masks[:, :, :, tf.newaxis]
gt_masks = tf2.image.resize(
gt_masks[:, :, :, tf.newaxis],
size=(output_height, output_width),
method=mask_resize_method)
gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes]) gt_classes_reshaped = tf.reshape(gt_classes, [-1, 1, 1, num_classes])
# Shape: [h, w, num_classes]. # Shape: [h, w, num_classes].
segmentations_for_image = tf.reduce_max( segmentations_for_image = tf.reduce_max(
...@@ -1771,3 +1778,120 @@ class CenterNetDensePoseTargetAssigner(object): ...@@ -1771,3 +1778,120 @@ class CenterNetDensePoseTargetAssigner(object):
batch_surface_coords = tf.concat(batch_surface_coords, axis=0) batch_surface_coords = tf.concat(batch_surface_coords, axis=0)
batch_weights = tf.concat(batch_weights, axis=0) batch_weights = tf.concat(batch_weights, axis=0)
return batch_indices, batch_part_ids, batch_surface_coords, batch_weights return batch_indices, batch_part_ids, batch_surface_coords, batch_weights
def filter_mask_overlap_min_area(masks):
"""If a pixel belongs to 2 instances, remove it from the larger instance."""
num_instances = tf.shape(masks)[0]
def _filter_min_area():
"""Helper function to filter non empty masks."""
areas = tf.reduce_sum(masks, axis=[1, 2], keepdims=True)
per_pixel_area = masks * areas
# Make sure background is ignored in argmin.
per_pixel_area = (masks * per_pixel_area +
(1 - masks) * per_pixel_area.dtype.max)
min_index = tf.cast(tf.argmin(per_pixel_area, axis=0), tf.int32)
filtered_masks = (
tf.range(num_instances)[:, tf.newaxis, tf.newaxis]
==
min_index[tf.newaxis, :, :]
)
return tf.cast(filtered_masks, tf.float32) * masks
return tf.cond(num_instances > 0, _filter_min_area,
lambda: masks)
def filter_mask_overlap(masks, method='min_area'):
if method == 'min_area':
return filter_mask_overlap_min_area(masks)
else:
raise ValueError('Unknown mask overlap filter type - {}'.format(method))
class CenterNetCornerOffsetTargetAssigner(object):
"""Wrapper to compute corner offsets for boxes using masks."""
def __init__(self, stride, overlap_resolution='min_area'):
"""Initializes the corner offset target assigner.
Args:
stride: int, the stride of the network in output pixels.
overlap_resolution: string, specifies how we handle overlapping
instance masks. Currently only 'min_area' is supported which assigns
overlapping pixels to the instance with the minimum area.
"""
self._stride = stride
self._overlap_resolution = overlap_resolution
def assign_corner_offset_targets(
self, gt_boxes_list, gt_masks_list):
"""Computes the corner offset targets and foreground map.
For each pixel that is part of any object's foreground, this function
computes the relative offsets to the top-left and bottom-right corners of
that instance's bounding box. It also returns a foreground map to indicate
which pixels contain valid corner offsets.
Args:
gt_boxes_list: A list of float tensors with shape [num_boxes, 4]
representing the groundtruth detection bounding boxes for each sample in
the batch. The coordinates are expected in normalized coordinates.
gt_masks_list: A list of float tensors with shape [num_boxes,
input_height, input_width] with values in {0, 1} representing instance
masks for each object.
Returns:
corner_offsets: A float tensor of shape [batch_size, height, width, 4]
containing, in order, the (y, x) offsets to the top left corner and
the (y, x) offsets to the bottom right corner for each foregroung pixel
foreground: A float tensor of shape [batch_size, height, width] in which
each pixel is set to 1 if it is a part of any instance's foreground
(and thus contains valid corner offsets) and 0 otherwise.
"""
_, input_height, input_width = (
shape_utils.combined_static_and_dynamic_shape(gt_masks_list[0]))
output_height = input_height // self._stride
output_width = input_width // self._stride
y_grid, x_grid = tf.meshgrid(
tf.range(output_height), tf.range(output_width),
indexing='ij')
y_grid, x_grid = tf.cast(y_grid, tf.float32), tf.cast(x_grid, tf.float32)
corner_targets = []
foreground_targets = []
for gt_masks, gt_boxes in zip(gt_masks_list, gt_boxes_list):
gt_masks = _resize_masks(gt_masks, output_height, output_width,
method=ResizeMethod.NEAREST_NEIGHBOR)
gt_masks = filter_mask_overlap(gt_masks, self._overlap_resolution)
ymin, xmin, ymax, xmax = tf.unstack(gt_boxes, axis=1)
ymin, ymax = ymin * output_height, ymax * output_height
xmin, xmax = xmin * output_width, xmax * output_width
top_y = ymin[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
left_x = xmin[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
bottom_y = ymax[:, tf.newaxis, tf.newaxis] - y_grid[tf.newaxis]
right_x = xmax[:, tf.newaxis, tf.newaxis] - x_grid[tf.newaxis]
foreground_target = tf.cast(tf.reduce_sum(gt_masks, axis=0) > 0.5,
tf.float32)
foreground_targets.append(foreground_target)
corner_target = tf.stack([
tf.reduce_sum(top_y * gt_masks, axis=0),
tf.reduce_sum(left_x * gt_masks, axis=0),
tf.reduce_sum(bottom_y * gt_masks, axis=0),
tf.reduce_sum(right_x * gt_masks, axis=0),
], axis=2)
corner_targets.append(corner_target)
return (tf.stack(corner_targets, axis=0),
tf.stack(foreground_targets, axis=0))
...@@ -1999,6 +1999,181 @@ class CenterNetDensePoseTargetAssignerTest(test_case.TestCase): ...@@ -1999,6 +1999,181 @@ class CenterNetDensePoseTargetAssignerTest(test_case.TestCase):
self.assertAllClose(expected_batch_weights, batch_weights) self.assertAllClose(expected_batch_weights, batch_weights)
class CornerOffsetTargetAssignerTest(test_case.TestCase):
def test_filter_overlap_min_area_empty(self):
"""Test that empty masks work on CPU."""
def graph_fn(masks):
return targetassigner.filter_mask_overlap_min_area(masks)
masks = self.execute_cpu(graph_fn, [np.zeros((0, 5, 5), dtype=np.float32)])
self.assertEqual(masks.shape, (0, 5, 5))
def test_filter_overlap_min_area(self):
"""Test the object with min. area is selected instead of overlap."""
def graph_fn(masks):
return targetassigner.filter_mask_overlap_min_area(masks)
masks = np.zeros((3, 4, 4), dtype=np.float32)
masks[0, :2, :2] = 1.0
masks[1, :3, :3] = 1.0
masks[2, 3, 3] = 1.0
masks = self.execute(graph_fn, [masks])
self.assertAllClose(masks[0],
[[1, 1, 0, 0],
[1, 1, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]])
self.assertAllClose(masks[1],
[[0, 0, 1, 0],
[0, 0, 1, 0],
[1, 1, 1, 0],
[0, 0, 0, 0]])
self.assertAllClose(masks[2],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
def test_assign_corner_offset_single_object(self):
"""Test that corner offsets are correct with a single object."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.constant([[0., 0., 1., 1.]])
]
mask = np.zeros((1, 4, 4), dtype=np.float32)
mask[0, 1:3, 1:3] = 1.0
masks = [tf.constant(mask)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute(graph_fn, [])
self.assertAllClose(foreground[0],
[[0, 0, 0, 0],
[0, 1, 1, 0],
[0, 1, 1, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 0],
[[0, 0, 0, 0],
[0, -1, -1, 0],
[0, -2, -2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 1],
[[0, 0, 0, 0],
[0, -1, -2, 0],
[0, -1, -2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 2],
[[0, 0, 0, 0],
[0, 3, 3, 0],
[0, 2, 2, 0],
[0, 0, 0, 0]])
self.assertAllClose(corner_offsets[0, :, :, 3],
[[0, 0, 0, 0],
[0, 3, 2, 0],
[0, 3, 2, 0],
[0, 0, 0, 0]])
def test_assign_corner_offset_multiple_objects(self):
"""Test corner offsets are correct with multiple objects."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.constant([[0., 0., 1., 1.], [0., 0., 0., 0.]]),
tf.constant([[0., 0., .25, .25], [.25, .25, 1., 1.]])
]
mask1 = np.zeros((2, 4, 4), dtype=np.float32)
mask1[0, 0, 0] = 1.0
mask1[0, 3, 3] = 1.0
mask2 = np.zeros((2, 4, 4), dtype=np.float32)
mask2[0, :2, :2] = 1.0
mask2[1, 1:, 1:] = 1.0
masks = [tf.constant(mask1), tf.constant(mask2)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute(graph_fn, [])
self.assertEqual(corner_offsets.shape, (2, 4, 4, 4))
self.assertEqual(foreground.shape, (2, 4, 4))
self.assertAllClose(foreground[0],
[[1, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(corner_offsets[0, :, :, 0],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, -3]])
self.assertAllClose(corner_offsets[0, :, :, 1],
[[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, -3]])
self.assertAllClose(corner_offsets[0, :, :, 2],
[[4, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(corner_offsets[0, :, :, 3],
[[4, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 1]])
self.assertAllClose(foreground[1],
[[1, 1, 0, 0],
[1, 1, 1, 1],
[0, 1, 1, 1],
[0, 1, 1, 1]])
self.assertAllClose(corner_offsets[1, :, :, 0],
[[0, 0, 0, 0],
[-1, -1, 0, 0],
[0, -1, -1, -1],
[0, -2, -2, -2]])
self.assertAllClose(corner_offsets[1, :, :, 1],
[[0, -1, 0, 0],
[0, -1, -1, -2],
[0, 0, -1, -2],
[0, 0, -1, -2]])
self.assertAllClose(corner_offsets[1, :, :, 2],
[[1, 1, 0, 0],
[0, 0, 3, 3],
[0, 2, 2, 2],
[0, 1, 1, 1]])
self.assertAllClose(corner_offsets[1, :, :, 3],
[[1, 0, 0, 0],
[1, 0, 2, 1],
[0, 3, 2, 1],
[0, 3, 2, 1]])
def test_assign_corner_offsets_no_objects(self):
"""Test assignment works with empty input on cpu."""
assigner = targetassigner.CenterNetCornerOffsetTargetAssigner(stride=1)
def graph_fn():
boxes = [
tf.zeros((0, 4), dtype=tf.float32)
]
masks = [tf.zeros((0, 5, 5), dtype=tf.float32)]
return assigner.assign_corner_offset_targets(boxes, masks)
corner_offsets, foreground = self.execute_cpu(graph_fn, [])
self.assertAllClose(corner_offsets, np.zeros((1, 5, 5, 4)))
self.assertAllClose(foreground, np.zeros((1, 5, 5)))
if __name__ == '__main__': if __name__ == '__main__':
tf.enable_v2_behavior() tf.enable_v2_behavior()
tf.test.main() tf.test.main()
...@@ -40,6 +40,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer ...@@ -40,6 +40,18 @@ Sergi Caelles Prat, Shan Yang, Sudheendra Vijayanarasimhan, Tina Tian, Tomer
Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi, Kaftan, Vighnesh Birodkar, Vishnu Banna, Vivek Rathod, Yanhui Liang, Yiming Shi,
Yixin Shi, Yu-hui Chen, Zhichao Lu. Yixin Shi, Yu-hui Chen, Zhichao Lu.
### June 26th, 2020
We have released SSDLite with MobileDet GPU backbone, which achieves 17% mAP
higher than the MobileNetV2 SSDLite (27.5 mAP vs 23.5 mAP) on a NVIDIA Jetson
Xavier at comparable latency (3.2ms vs 3.3ms).
Along with the model definition, we are also releasing model checkpoints trained
on the COCO dataset.
<b>Thanks to contributors</b>: Yongzhe Wang, Bo Chen, Hanxiao Liu, Le An
(NVIDIA), Yu-Te Cheng (NVIDIA), Oliver Knieps (NVIDIA), and Josh Park (NVIDIA).
### June 17th, 2020 ### June 17th, 2020
We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that We have released [Context R-CNN](https://arxiv.org/abs/1912.03538), a model that
......
...@@ -1094,8 +1094,12 @@ def get_reduce_to_frame_fn(input_reader_config, is_training): ...@@ -1094,8 +1094,12 @@ def get_reduce_to_frame_fn(input_reader_config, is_training):
num_frames = tf.cast( num_frames = tf.cast(
tf.shape(tensor_dict[fields.InputDataFields.source_id])[0], tf.shape(tensor_dict[fields.InputDataFields.source_id])[0],
dtype=tf.int32) dtype=tf.int32)
frame_index = tf.random.uniform((), minval=0, maxval=num_frames, if input_reader_config.frame_index == -1:
dtype=tf.int32) frame_index = tf.random.uniform((), minval=0, maxval=num_frames,
dtype=tf.int32)
else:
frame_index = tf.constant(input_reader_config.frame_index,
dtype=tf.int32)
out_tensor_dict = {} out_tensor_dict = {}
for key in tensor_dict: for key in tensor_dict:
if key in fields.SEQUENCE_FIELDS: if key in fields.SEQUENCE_FIELDS:
......
...@@ -61,7 +61,7 @@ def _get_configs_for_model(model_name): ...@@ -61,7 +61,7 @@ def _get_configs_for_model(model_name):
configs, kwargs_dict=override_dict) configs, kwargs_dict=override_dict)
def _get_configs_for_model_sequence_example(model_name): def _get_configs_for_model_sequence_example(model_name, frame_index=-1):
"""Returns configurations for model.""" """Returns configurations for model."""
fname = os.path.join(tf.resource_loader.get_data_files_path(), fname = os.path.join(tf.resource_loader.get_data_files_path(),
'test_data/' + model_name + '.config') 'test_data/' + model_name + '.config')
...@@ -74,7 +74,8 @@ def _get_configs_for_model_sequence_example(model_name): ...@@ -74,7 +74,8 @@ def _get_configs_for_model_sequence_example(model_name):
override_dict = { override_dict = {
'train_input_path': data_path, 'train_input_path': data_path,
'eval_input_path': data_path, 'eval_input_path': data_path,
'label_map_path': label_map_path 'label_map_path': label_map_path,
'frame_index': frame_index
} }
return config_util.merge_external_params_with_configs( return config_util.merge_external_params_with_configs(
configs, kwargs_dict=override_dict) configs, kwargs_dict=override_dict)
...@@ -312,6 +313,46 @@ class InputFnTest(test_case.TestCase, parameterized.TestCase): ...@@ -312,6 +313,46 @@ class InputFnTest(test_case.TestCase, parameterized.TestCase):
tf.float32, tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype) labels[fields.InputDataFields.groundtruth_weights].dtype)
def test_context_rcnn_resnet50_train_input_with_sequence_example_frame_index(
self, train_batch_size=8):
"""Tests the training input function for FasterRcnnResnet50."""
configs = _get_configs_for_model_sequence_example(
'context_rcnn_camera_trap', frame_index=2)
model_config = configs['model']
train_config = configs['train_config']
train_config.batch_size = train_batch_size
train_input_fn = inputs.create_train_input_fn(
train_config, configs['train_input_config'], model_config)
features, labels = _make_initializable_iterator(train_input_fn()).get_next()
self.assertAllEqual([train_batch_size, 640, 640, 3],
features[fields.InputDataFields.image].shape.as_list())
self.assertEqual(tf.float32, features[fields.InputDataFields.image].dtype)
self.assertAllEqual([train_batch_size],
features[inputs.HASH_KEY].shape.as_list())
self.assertEqual(tf.int32, features[inputs.HASH_KEY].dtype)
self.assertAllEqual(
[train_batch_size, 100, 4],
labels[fields.InputDataFields.groundtruth_boxes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_boxes].dtype)
self.assertAllEqual(
[train_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_classes].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_classes].dtype)
self.assertAllEqual(
[train_batch_size, 100],
labels[fields.InputDataFields.groundtruth_weights].shape.as_list())
self.assertEqual(tf.float32,
labels[fields.InputDataFields.groundtruth_weights].dtype)
self.assertAllEqual(
[train_batch_size, 100, model_config.faster_rcnn.num_classes],
labels[fields.InputDataFields.groundtruth_confidences].shape.as_list())
self.assertEqual(
tf.float32,
labels[fields.InputDataFields.groundtruth_confidences].dtype)
def test_ssd_inceptionV2_train_input(self): def test_ssd_inceptionV2_train_input(self):
"""Tests the training input function for SSDInceptionV2.""" """Tests the training input function for SSDInceptionV2."""
configs = _get_configs_for_model('ssd_inception_v2_pets') configs = _get_configs_for_model('ssd_inception_v2_pets')
......
...@@ -332,7 +332,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -332,7 +332,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
""" """
box_features = self._crop_and_resize_fn( box_features = self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized, [features_to_crop], proposal_boxes_normalized, None,
[self._initial_crop_size, self._initial_crop_size]) [self._initial_crop_size, self._initial_crop_size])
attention_features = self._context_feature_extract_fn( attention_features = self._context_feature_extract_fn(
......
...@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch ...@@ -41,7 +41,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2 from object_detection.protos import post_processing_pb2
from object_detection.utils import ops from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case from object_detection.utils import test_case
from object_detection.utils import test_utils from object_detection.utils import test_utils
from object_detection.utils import tf_version from object_detection.utils import tf_version
...@@ -362,8 +362,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -362,8 +362,9 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None) max_negatives_per_positive=None)
crop_and_resize_fn = ( crop_and_resize_fn = (
ops.matmul_crop_and_resize spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize) if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = { common_kwargs = {
'is_training': 'is_training':
is_training, is_training,
......
...@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1948,9 +1948,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns: Returns:
A float32 tensor with shape [K, new_height, new_width, depth]. A float32 tensor with shape [K, new_height, new_width, depth].
""" """
features_to_crop = [features_to_crop]
num_levels = len(features_to_crop)
box_levels = None
if num_levels != 1:
# If there are multiple levels to select, get the box levels
box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1,
1.0/224, proposal_boxes_normalized)
cropped_regions = self._flatten_first_two_dimensions( cropped_regions = self._flatten_first_two_dimensions(
self._crop_and_resize_fn( self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized, features_to_crop, proposal_boxes_normalized, box_levels,
[self._initial_crop_size, self._initial_crop_size])) [self._initial_crop_size, self._initial_crop_size]))
return self._maxpool_layer(cropped_regions) return self._maxpool_layer(cropped_regions)
...@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -2517,8 +2524,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
image_shape[1], image_shape[2], check_range=False).get() image_shape[1], image_shape[2], check_range=False).get()
flat_cropped_gt_mask = self._crop_and_resize_fn( flat_cropped_gt_mask = self._crop_and_resize_fn(
tf.expand_dims(flat_gt_masks, -1), [tf.expand_dims(flat_gt_masks, -1)],
tf.expand_dims(flat_normalized_proposals, axis=1), tf.expand_dims(flat_normalized_proposals, axis=1), None,
[mask_height, mask_width]) [mask_height, mask_width])
# Without stopping gradients into cropped groundtruth masks the # Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is # performance with 100-padded groundtruth masks when batch size > 1 is
......
...@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch ...@@ -34,7 +34,7 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
from object_detection.protos import post_processing_pb2 from object_detection.protos import post_processing_pb2
from object_detection.utils import ops from object_detection.utils import spatial_transform_ops as spatial_ops
from object_detection.utils import test_case from object_detection.utils import test_case
from object_detection.utils import test_utils from object_detection.utils import test_utils
from object_detection.utils import tf_version from object_detection.utils import tf_version
...@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -377,8 +377,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
max_negatives_per_positive=None) max_negatives_per_positive=None)
crop_and_resize_fn = ( crop_and_resize_fn = (
ops.matmul_crop_and_resize spatial_ops.multilevel_matmul_crop_and_resize
if use_matmul_crop_and_resize else ops.native_crop_and_resize) if use_matmul_crop_and_resize
else spatial_ops.multilevel_native_crop_and_resize)
common_kwargs = { common_kwargs = {
'is_training': 'is_training':
is_training, is_training,
......
...@@ -414,7 +414,7 @@ def train_loop( ...@@ -414,7 +414,7 @@ def train_loop(
train_steps=None, train_steps=None,
use_tpu=False, use_tpu=False,
save_final_config=False, save_final_config=False,
checkpoint_every_n=1000, checkpoint_every_n=5000,
checkpoint_max_to_keep=7, checkpoint_max_to_keep=7,
**kwargs): **kwargs):
"""Trains a model using eager + functions. """Trains a model using eager + functions.
...@@ -855,6 +855,7 @@ def eval_continuously( ...@@ -855,6 +855,7 @@ def eval_continuously(
checkpoint_dir=None, checkpoint_dir=None,
wait_interval=180, wait_interval=180,
timeout=3600, timeout=3600,
eval_index=None,
**kwargs): **kwargs):
"""Run continuous evaluation of a detection model eagerly. """Run continuous evaluation of a detection model eagerly.
...@@ -884,6 +885,8 @@ def eval_continuously( ...@@ -884,6 +885,8 @@ def eval_continuously(
new checkpoint. new checkpoint.
timeout: The maximum number of seconds to wait for a checkpoint. Execution timeout: The maximum number of seconds to wait for a checkpoint. Execution
will terminate if no new checkpoints are found after these many seconds. will terminate if no new checkpoints are found after these many seconds.
eval_index: int, optional If give, only evaluate the dataset at the given
index.
**kwargs: Additional keyword arguments for configuration override. **kwargs: Additional keyword arguments for configuration override.
""" """
...@@ -937,6 +940,11 @@ def eval_continuously( ...@@ -937,6 +940,11 @@ def eval_continuously(
model=detection_model) model=detection_model)
eval_inputs.append((eval_input_config.name, next_eval_input)) eval_inputs.append((eval_input_config.name, next_eval_input))
if eval_index is not None:
eval_inputs = [eval_inputs[eval_index]]
tf.logging.info('eval_index selected - {}'.format(
eval_inputs))
global_step = tf.compat.v2.Variable( global_step = tf.compat.v2.Variable(
0, trainable=False, dtype=tf.compat.v2.dtypes.int64) 0, trainable=False, dtype=tf.compat.v2.dtypes.int64)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment