Unverified Commit ed4e22b8 authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Merge pull request #3973 from pkulzc/master

Object detection internal changes
parents cac90a0e 13b89b93
......@@ -49,12 +49,12 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
if box_predictor_oneof == 'convolutional_box_predictor':
conv_box_predictor = box_predictor_config.convolutional_box_predictor
conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.ConvolutionalBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
conv_hyperparams_fn=conv_hyperparams_fn,
min_depth=conv_box_predictor.min_depth,
max_depth=conv_box_predictor.max_depth,
num_layers_before_predictor=(conv_box_predictor.
......@@ -73,12 +73,12 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
if box_predictor_oneof == 'weight_shared_convolutional_box_predictor':
conv_box_predictor = (box_predictor_config.
weight_shared_convolutional_box_predictor)
conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
conv_hyperparams_fn=conv_hyperparams_fn,
depth=conv_box_predictor.depth,
num_layers_before_predictor=(conv_box_predictor.
num_layers_before_predictor),
......@@ -90,20 +90,20 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
if box_predictor_oneof == 'mask_rcnn_box_predictor':
mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor
fc_hyperparams = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
is_training)
conv_hyperparams = None
fc_hyperparams_fn = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
is_training)
conv_hyperparams_fn = None
if mask_rcnn_box_predictor.HasField('conv_hyperparams'):
conv_hyperparams = argscope_fn(mask_rcnn_box_predictor.conv_hyperparams,
is_training)
conv_hyperparams_fn = argscope_fn(
mask_rcnn_box_predictor.conv_hyperparams, is_training)
box_predictor_object = box_predictor.MaskRCNNBoxPredictor(
is_training=is_training,
num_classes=num_classes,
fc_hyperparams=fc_hyperparams,
fc_hyperparams_fn=fc_hyperparams_fn,
use_dropout=mask_rcnn_box_predictor.use_dropout,
dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability,
box_code_size=mask_rcnn_box_predictor.box_code_size,
conv_hyperparams=conv_hyperparams,
conv_hyperparams_fn=conv_hyperparams_fn,
predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks,
mask_height=mask_rcnn_box_predictor.mask_height,
mask_width=mask_rcnn_box_predictor.mask_width,
......@@ -111,17 +111,19 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
mask_rcnn_box_predictor.mask_prediction_num_conv_layers),
mask_prediction_conv_depth=(
mask_rcnn_box_predictor.mask_prediction_conv_depth),
masks_are_class_agnostic=(
mask_rcnn_box_predictor.masks_are_class_agnostic),
predict_keypoints=mask_rcnn_box_predictor.predict_keypoints)
return box_predictor_object
if box_predictor_oneof == 'rfcn_box_predictor':
rfcn_box_predictor = box_predictor_config.rfcn_box_predictor
conv_hyperparams = argscope_fn(rfcn_box_predictor.conv_hyperparams,
is_training)
conv_hyperparams_fn = argscope_fn(rfcn_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.RfcnBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
conv_hyperparams_fn=conv_hyperparams_fn,
crop_size=[rfcn_box_predictor.crop_height,
rfcn_box_predictor.crop_width],
num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height,
......
......@@ -54,7 +54,7 @@ class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
self.assertAlmostEqual((hyperparams_proto.regularizer.
l1_regularizer.weight),
(conv_hyperparams_actual.regularizer.l1_regularizer.
......@@ -183,7 +183,7 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
self.assertAlmostEqual((hyperparams_proto.regularizer.
l1_regularizer.weight),
(conv_hyperparams_actual.regularizer.l1_regularizer.
......@@ -297,7 +297,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
is_training=False,
num_classes=10)
mock_argscope_fn.assert_called_with(hyperparams_proto, False)
self.assertEqual(box_predictor._fc_hyperparams, 'arg_scope')
self.assertEqual(box_predictor._fc_hyperparams_fn, 'arg_scope')
def test_non_default_mask_rcnn_box_predictor(self):
fc_hyperparams_text_proto = """
......@@ -417,7 +417,7 @@ class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
self.assertAlmostEqual((hyperparams_proto.regularizer.
l1_regularizer.weight),
(conv_hyperparams_actual.regularizer.l1_regularizer.
......
......@@ -72,7 +72,9 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
fields.InputDataFields.num_groundtruth_boxes: [],
fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
fields.InputDataFields.true_image_shape: [3]
fields.InputDataFields.true_image_shape: [3],
fields.InputDataFields.multiclass_scores: [
max_num_boxes, num_classes + 1 if num_classes is not None else None],
}
# Determine whether groundtruth_classes are integers or one-hot encodings, and
# apply batching appropriately.
......
......@@ -17,6 +17,7 @@
import tensorflow as tf
from object_detection.protos import hyperparams_pb2
from object_detection.utils import context_manager
slim = tf.contrib.slim
......@@ -43,7 +44,8 @@ def build(hyperparams_config, is_training):
is_training: Whether the network is in training mode.
Returns:
arg_scope: tf-slim arg_scope containing hyperparameters for ops.
arg_scope_fn: A function to construct tf-slim arg_scope containing
hyperparameters for ops.
Raises:
ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
......@@ -64,16 +66,21 @@ def build(hyperparams_config, is_training):
if hyperparams_config.HasField('op') and (
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
affected_ops = [slim.fully_connected]
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm,
normalizer_params=batch_norm_params) as sc:
return sc
def scope_fn():
with (slim.arg_scope([slim.batch_norm], **batch_norm_params)
if batch_norm_params is not None else
context_manager.IdentityContextManager()):
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm) as sc:
return sc
return scope_fn
def _build_activation_fn(activation_fn):
......@@ -167,6 +174,9 @@ def _build_batch_norm_params(batch_norm, is_training):
'center': batch_norm.center,
'scale': batch_norm.scale,
'epsilon': batch_norm.epsilon,
# Remove is_training parameter from here and deprecate it in the proto
# once we refactor Faster RCNN models to set is_training through an outer
# arg_scope in the meta architecture.
'is_training': is_training and batch_norm.train,
}
return batch_norm_params
......@@ -26,11 +26,11 @@ from object_detection.protos import hyperparams_pb2
slim = tf.contrib.slim
class HyperparamsBuilderTest(tf.test.TestCase):
def _get_scope_key(op):
return getattr(op, '_key_op', str(op))
# TODO(rathodv): Make this a public api in slim arg_scope.py.
def _get_scope_key(self, op):
return getattr(op, '_key_op', str(op))
class HyperparamsBuilderTest(tf.test.TestCase):
def test_default_arg_scope_has_conv2d_op(self):
conv_hyperparams_text_proto = """
......@@ -45,8 +45,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
self.assertTrue(self._get_scope_key(slim.conv2d) in scope)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(_get_scope_key(slim.conv2d) in scope)
def test_default_arg_scope_has_separable_conv2d_op(self):
conv_hyperparams_text_proto = """
......@@ -61,8 +63,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
self.assertTrue(self._get_scope_key(slim.separable_conv2d) in scope)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(_get_scope_key(slim.separable_conv2d) in scope)
def test_default_arg_scope_has_conv2d_transpose_op(self):
conv_hyperparams_text_proto = """
......@@ -77,8 +81,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(_get_scope_key(slim.conv2d_transpose) in scope)
def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
conv_hyperparams_text_proto = """
......@@ -94,8 +100,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
self.assertTrue(self._get_scope_key(slim.fully_connected) in scope)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(_get_scope_key(slim.fully_connected) in scope)
def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
conv_hyperparams_text_proto = """
......@@ -110,7 +118,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
kwargs_1, kwargs_2, kwargs_3 = scope.values()
self.assertDictEqual(kwargs_1, kwargs_2)
self.assertDictEqual(kwargs_1, kwargs_3)
......@@ -129,7 +139,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
regularizer = conv_scope_arguments['weights_regularizer']
weights = np.array([1., -1, 4., 2.])
......@@ -151,8 +163,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
regularizer = conv_scope_arguments['weights_regularizer']
weights = np.array([1., -1, 4., 2.])
......@@ -180,10 +194,12 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
......@@ -210,10 +226,12 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=False)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=False)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
......@@ -240,10 +258,12 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
......@@ -263,10 +283,11 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
self.assertEqual(conv_scope_arguments['normalizer_params'], None)
def test_use_none_activation(self):
conv_hyperparams_text_proto = """
......@@ -282,8 +303,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], None)
def test_use_relu_activation(self):
......@@ -300,8 +323,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
def test_use_relu_6_activation(self):
......@@ -318,8 +343,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
def _assert_variance_in_range(self, initializer, shape, variance,
......@@ -351,8 +378,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
......@@ -373,8 +402,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 40.)
......@@ -395,8 +426,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=4. / (100. + 40.))
......@@ -417,8 +450,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
......@@ -438,8 +473,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.49, tol=1e-1)
......@@ -459,8 +496,10 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
conv_scope_arguments = scope.values()[0]
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.64, tol=1e-1)
......
......@@ -71,7 +71,8 @@ FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
}
def build(model_config, is_training, add_summaries=True):
def build(model_config, is_training, add_summaries=True,
add_background_class=True):
"""Builds a DetectionModel based on the model config.
Args:
......@@ -79,7 +80,10 @@ def build(model_config, is_training, add_summaries=True):
DetectionModel.
is_training: True if this model is being built for training purposes.
add_summaries: Whether to add tensorflow summaries in the model graph.
add_background_class: Whether to add an implicit background class to one-hot
encodings of groundtruth labels. Set to false if using groundtruth labels
with an explicit background class or using multiclass scores instead of
truth in the case of distillation. Ignored in the case of faster_rcnn.
Returns:
DetectionModel based on the config.
......@@ -90,7 +94,8 @@ def build(model_config, is_training, add_summaries=True):
raise ValueError('model_config not of type model_pb2.DetectionModel.')
meta_architecture = model_config.WhichOneof('model')
if meta_architecture == 'ssd':
return _build_ssd_model(model_config.ssd, is_training, add_summaries)
return _build_ssd_model(model_config.ssd, is_training, add_summaries,
add_background_class)
if meta_architecture == 'faster_rcnn':
return _build_faster_rcnn_model(model_config.faster_rcnn, is_training,
add_summaries)
......@@ -98,19 +103,13 @@ def build(model_config, is_training, add_summaries=True):
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
reuse_weights=None,
inplace_batchnorm_update=False):
reuse_weights=None):
"""Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
Args:
feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs. When
this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Returns:
ssd_meta_arch.SSDFeatureExtractor based on config.
......@@ -122,24 +121,25 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
depth_multiplier = feature_extractor_config.depth_multiplier
min_depth = feature_extractor_config.min_depth
pad_to_multiple = feature_extractor_config.pad_to_multiple
batch_norm_trainable = feature_extractor_config.batch_norm_trainable
use_explicit_padding = feature_extractor_config.use_explicit_padding
use_depthwise = feature_extractor_config.use_depthwise
conv_hyperparams = hyperparams_builder.build(
feature_extractor_config.conv_hyperparams, is_training)
override_base_feature_extractor_hyperparams = (
feature_extractor_config.override_base_feature_extractor_hyperparams)
if feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP:
raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
return feature_extractor_class(is_training, depth_multiplier, min_depth,
pad_to_multiple, conv_hyperparams,
batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise,
inplace_batchnorm_update)
return feature_extractor_class(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise,
override_base_feature_extractor_hyperparams)
def _build_ssd_model(ssd_config, is_training, add_summaries):
def _build_ssd_model(ssd_config, is_training, add_summaries,
add_background_class=True):
"""Builds an SSD detection model based on the model config.
Args:
......@@ -147,7 +147,10 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
SSDMetaArch.
is_training: True if this model is being built for training purposes.
add_summaries: Whether to add tf summaries in the model.
add_background_class: Whether to add an implicit background class to one-hot
encodings of groundtruth labels. Set to false if using groundtruth labels
with an explicit background class or using multiclass scores instead of
truth in the case of distillation.
Returns:
SSDMetaArch based on the config.
......@@ -160,8 +163,7 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
# Feature extractor
feature_extractor = _build_ssd_feature_extractor(
feature_extractor_config=ssd_config.feature_extractor,
is_training=is_training,
inplace_batchnorm_update=ssd_config.inplace_batchnorm_update)
is_training=is_training)
box_coder = box_coder_builder.build(ssd_config.box_coder)
matcher = matcher_builder.build(ssd_config.matcher)
......@@ -203,7 +205,10 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
normalize_loss_by_num_matches,
hard_example_miner,
add_summaries=add_summaries,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize)
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=ssd_config.freeze_batchnorm,
inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
add_background_class=add_background_class)
def _build_faster_rcnn_feature_extractor(
......@@ -276,7 +281,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
frcnn_config.first_stage_anchor_generator)
first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
first_stage_box_predictor_arg_scope = hyperparams_builder.build(
first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
first_stage_box_predictor_kernel_size = (
frcnn_config.first_stage_box_predictor_kernel_size)
......@@ -329,8 +334,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
'number_of_stages': number_of_stages,
'first_stage_anchor_generator': first_stage_anchor_generator,
'first_stage_atrous_rate': first_stage_atrous_rate,
'first_stage_box_predictor_arg_scope':
first_stage_box_predictor_arg_scope,
'first_stage_box_predictor_arg_scope_fn':
first_stage_box_predictor_arg_scope_fn,
'first_stage_box_predictor_kernel_size':
first_stage_box_predictor_kernel_size,
'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
......
......@@ -83,6 +83,7 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -154,6 +155,7 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
override_base_feature_extractor_hyperparams: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -225,7 +227,6 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -298,6 +299,7 @@ class ModelBuilderTest(tf.test.TestCase):
def test_create_ssd_mobilenet_v1_model_from_config(self):
model_text_proto = """
ssd {
freeze_batchnorm: true
inplace_batchnorm_update: true
feature_extractor {
type: 'ssd_mobilenet_v1'
......@@ -311,7 +313,6 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -368,8 +369,9 @@ class ModelBuilderTest(tf.test.TestCase):
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDMobileNetV1FeatureExtractor)
self.assertTrue(model._feature_extractor._batch_norm_trainable)
self.assertTrue(model._normalize_loc_loss_by_codesize)
self.assertTrue(model._freeze_batchnorm)
self.assertTrue(model._inplace_batchnorm_update)
def test_create_ssd_mobilenet_v2_model_from_config(self):
model_text_proto = """
......@@ -386,7 +388,6 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -443,7 +444,6 @@ class ModelBuilderTest(tf.test.TestCase):
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDMobileNetV2FeatureExtractor)
self.assertTrue(model._feature_extractor._batch_norm_trainable)
self.assertTrue(model._normalize_loc_loss_by_codesize)
def test_create_embedded_ssd_mobilenet_v1_model_from_config(self):
......@@ -461,7 +461,6 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
......
......@@ -147,7 +147,7 @@ class RfcnBoxPredictor(BoxPredictor):
def __init__(self,
is_training,
num_classes,
conv_hyperparams,
conv_hyperparams_fn,
num_spatial_bins,
depth,
crop_size,
......@@ -160,8 +160,8 @@ class RfcnBoxPredictor(BoxPredictor):
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for conolutional
layers.
conv_hyperparams_fn: A function to construct tf-slim arg_scope with
hyperparameters for convolutional layers.
num_spatial_bins: A list of two integers `[spatial_bins_y,
spatial_bins_x]`.
depth: Target depth to reduce the input feature maps to.
......@@ -169,7 +169,7 @@ class RfcnBoxPredictor(BoxPredictor):
box_code_size: Size of encoding for each box.
"""
super(RfcnBoxPredictor, self).__init__(is_training, num_classes)
self._conv_hyperparams = conv_hyperparams
self._conv_hyperparams_fn = conv_hyperparams_fn
self._num_spatial_bins = num_spatial_bins
self._depth = depth
self._crop_size = crop_size
......@@ -227,7 +227,7 @@ class RfcnBoxPredictor(BoxPredictor):
return tf.reshape(ones_mat * multiplier, [-1])
net = image_feature
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth')
# Location predictions.
location_feature_map_depth = (self._num_spatial_bins[0] *
......@@ -297,16 +297,17 @@ class MaskRCNNBoxPredictor(BoxPredictor):
def __init__(self,
is_training,
num_classes,
fc_hyperparams,
fc_hyperparams_fn,
use_dropout,
dropout_keep_prob,
box_code_size,
conv_hyperparams=None,
conv_hyperparams_fn=None,
predict_instance_masks=False,
mask_height=14,
mask_width=14,
mask_prediction_num_conv_layers=2,
mask_prediction_conv_depth=256,
masks_are_class_agnostic=False,
predict_keypoints=False):
"""Constructor.
......@@ -316,16 +317,16 @@ class MaskRCNNBoxPredictor(BoxPredictor):
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
fc_hyperparams: Slim arg_scope with hyperparameters for fully
connected ops.
fc_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for fully connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
box_code_size: Size of encoding for each box.
conv_hyperparams: Slim arg_scope with hyperparameters for convolution
ops.
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
predict_instance_masks: Whether to predict object masks inside detection
boxes.
mask_height: Desired output mask height. The default value is 14.
......@@ -337,6 +338,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
to 0, the depth of the convolution layers will be automatically chosen
based on the number of object classes and the number of channels in the
image features.
masks_are_class_agnostic: Boolean determining if the mask-head is
class-agnostic or not.
predict_keypoints: Whether to predict keypoints insde detection boxes.
......@@ -347,21 +350,22 @@ class MaskRCNNBoxPredictor(BoxPredictor):
ValueError: If mask_prediction_num_conv_layers is smaller than two.
"""
super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes)
self._fc_hyperparams = fc_hyperparams
self._fc_hyperparams_fn = fc_hyperparams_fn
self._use_dropout = use_dropout
self._box_code_size = box_code_size
self._dropout_keep_prob = dropout_keep_prob
self._conv_hyperparams = conv_hyperparams
self._conv_hyperparams_fn = conv_hyperparams_fn
self._predict_instance_masks = predict_instance_masks
self._mask_height = mask_height
self._mask_width = mask_width
self._mask_prediction_num_conv_layers = mask_prediction_num_conv_layers
self._mask_prediction_conv_depth = mask_prediction_conv_depth
self._masks_are_class_agnostic = masks_are_class_agnostic
self._predict_keypoints = predict_keypoints
if self._predict_keypoints:
raise ValueError('Keypoint prediction is unimplemented.')
if ((self._predict_instance_masks or self._predict_keypoints) and
self._conv_hyperparams is None):
self._conv_hyperparams_fn is None):
raise ValueError('`conv_hyperparams` must be provided when predicting '
'masks.')
if self._mask_prediction_num_conv_layers < 2:
......@@ -399,7 +403,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
flattened_image_features = slim.dropout(flattened_image_features,
keep_prob=self._dropout_keep_prob,
is_training=self._is_training)
with slim.arg_scope(self._fc_hyperparams):
with slim.arg_scope(self._fc_hyperparams_fn()):
box_encodings = slim.fully_connected(
flattened_image_features,
self._num_classes * self._box_code_size,
......@@ -463,7 +467,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
num_feature_channels = image_features.get_shape().as_list()[3]
num_conv_channels = self._get_mask_predictor_conv_depth(
num_feature_channels, self.num_classes)
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
upsampled_features = tf.image.resize_bilinear(
image_features,
[self._mask_height, self._mask_width],
......@@ -473,8 +477,9 @@ class MaskRCNNBoxPredictor(BoxPredictor):
upsampled_features,
num_outputs=num_conv_channels,
kernel_size=[3, 3])
num_masks = 1 if self._masks_are_class_agnostic else self.num_classes
mask_predictions = slim.conv2d(upsampled_features,
num_outputs=self.num_classes,
num_outputs=num_masks,
activation_fn=None,
kernel_size=[3, 3])
return tf.expand_dims(
......@@ -578,7 +583,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
def __init__(self,
is_training,
num_classes,
conv_hyperparams,
conv_hyperparams_fn,
min_depth,
max_depth,
num_layers_before_predictor,
......@@ -597,8 +602,9 @@ class ConvolutionalBoxPredictor(BoxPredictor):
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
min_depth: Minumum feature depth prior to predicting box encodings
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
min_depth: Minimum feature depth prior to predicting box encodings
and class predictions.
max_depth: Maximum feature depth prior to predicting box encodings
and class predictions. If max_depth is set to 0, no additional
......@@ -626,7 +632,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes)
if min_depth > max_depth:
raise ValueError('min_depth should be less than or equal to max_depth')
self._conv_hyperparams = conv_hyperparams
self._conv_hyperparams_fn = conv_hyperparams_fn
self._min_depth = min_depth
self._max_depth = max_depth
self._num_layers_before_predictor = num_layers_before_predictor
......@@ -679,7 +685,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
# Add a slot for the background class.
num_class_slots = self.num_classes + 1
net = image_feature
with slim.arg_scope(self._conv_hyperparams), \
with slim.arg_scope(self._conv_hyperparams_fn()), \
slim.arg_scope([slim.dropout], is_training=self._is_training):
# Add additional conv layers before the class predictor.
features_depth = static_shape.get_depth(image_feature.get_shape())
......@@ -767,7 +773,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
def __init__(self,
is_training,
num_classes,
conv_hyperparams,
conv_hyperparams_fn,
depth,
num_layers_before_predictor,
box_code_size,
......@@ -781,7 +787,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
depth: depth of conv layers.
num_layers_before_predictor: Number of the additional conv layers before
the predictor.
......@@ -792,7 +799,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
"""
super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training,
num_classes)
self._conv_hyperparams = conv_hyperparams
self._conv_hyperparams_fn = conv_hyperparams_fn
self._depth = depth
self._num_layers_before_predictor = num_layers_before_predictor
self._box_code_size = box_code_size
......@@ -846,7 +853,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
num_class_slots = self.num_classes + 1
box_encodings_net = image_feature
class_predictions_net = image_feature
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
for i in range(self._num_layers_before_predictor):
box_encodings_net = slim.conv2d(
box_encodings_net,
......
......@@ -49,7 +49,7 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
......@@ -75,7 +75,7 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
......@@ -86,11 +86,11 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
conv_hyperparams=self._build_arg_scope_with_hyperparams(
conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(
op_type=hyperparams_pb2.Hyperparams.CONV),
predict_instance_masks=True)
box_predictions = mask_box_predictor.predict(
......@@ -108,7 +108,7 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4)
......@@ -125,7 +125,7 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
......@@ -155,7 +155,7 @@ class RfcnBoxPredictorTest(tf.test.TestCase):
rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
is_training=False,
num_classes=2,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
num_spatial_bins=[3, 3],
depth=4,
crop_size=[12, 12],
......@@ -205,7 +205,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -234,7 +234,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -265,7 +265,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -297,7 +297,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -344,7 +344,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -416,7 +416,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
......@@ -442,7 +442,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
......@@ -471,7 +471,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
......@@ -500,7 +500,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=2,
box_code_size=4)
......@@ -553,7 +553,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
......
......@@ -69,7 +69,7 @@ class DetectionModel(object):
Args:
num_classes: number of classes. Note that num_classes *does not* include
background categories that might be implicitly be predicted in various
background categories that might be implicitly predicted in various
implementations.
"""
self._num_classes = num_classes
......
......@@ -119,6 +119,9 @@ class PreprocessorTest(tf.test.TestCase):
[[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
return boxes
def createTestMultiClassScores(self):
return tf.constant([[1.0, 0.0], [0.5, 0.5]], dtype=tf.float32)
def expectedImagesAfterNormalization(self):
images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
[-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
......@@ -269,6 +272,9 @@ class PreprocessorTest(tf.test.TestCase):
def expectedLabelsAfterThresholding(self):
return tf.constant([1], dtype=tf.float32)
def expectedMultiClassScoresAfterThresholding(self):
return tf.constant([[1.0, 0.0]], dtype=tf.float32)
def expectedMasksAfterThresholding(self):
mask = np.array([
[[255.0, 0.0, 0.0],
......@@ -345,6 +351,28 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(
retained_label_scores_, expected_retained_label_scores_)
def testRetainBoxesAboveThresholdWithMultiClassScores(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
multiclass_scores = self.createTestMultiClassScores()
(_, _, _,
retained_multiclass_scores) = preprocessor.retain_boxes_above_threshold(
boxes,
labels,
label_scores,
multiclass_scores=multiclass_scores,
threshold=0.6)
with self.test_session() as sess:
(retained_multiclass_scores_,
expected_retained_multiclass_scores_) = sess.run([
retained_multiclass_scores,
self.expectedMultiClassScoresAfterThresholding()
])
self.assertAllClose(retained_multiclass_scores_,
expected_retained_multiclass_scores_)
def testRetainBoxesAboveThresholdWithMasks(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
......@@ -1264,6 +1292,56 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(distorted_boxes_, expected_boxes_)
self.assertAllEqual(distorted_labels_, expected_labels_)
def testRandomCropImageWithMultiClassScores(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_crop_image, {}))
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
multiclass_scores = self.createTestMultiClassScores()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.multiclass_scores: multiclass_scores
}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_multiclass_scores = distorted_tensor_dict[
fields.InputDataFields.multiclass_scores]
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
multiclass_scores_rank = tf.rank(multiclass_scores)
distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_,
distorted_images_rank_, multiclass_scores_rank_,
distorted_multiclass_scores_rank_,
distorted_multiclass_scores_) = sess.run([
boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank,
distorted_images_rank, multiclass_scores_rank,
distorted_multiclass_scores_rank, distorted_multiclass_scores
])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
self.assertAllEqual(multiclass_scores_rank_,
distorted_multiclass_scores_rank_)
self.assertAllEqual(distorted_boxes_.shape[0],
distorted_multiclass_scores_.shape[0])
def testStrictRandomCropImageWithLabelScores(self):
image = self.createColorfulTestImage()[0]
boxes = self.createTestBoxes()
......@@ -2510,6 +2588,49 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropWithMultiClassScores(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}), (preprocessor.ssd_random_crop, {})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
multiclass_scores = self.createTestMultiClassScores()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.multiclass_scores: multiclass_scores,
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_multiclass_scores=True)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_multiclass_scores = distorted_tensor_dict[
fields.InputDataFields.multiclass_scores]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_, distorted_images_rank_,
multiclass_scores_, distorted_multiclass_scores_) = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank,
multiclass_scores, distorted_multiclass_scores
])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
self.assertAllEqual(multiclass_scores_, distorted_multiclass_scores_)
def testSSDRandomCropPad(self):
images = self.createTestImages()
boxes = self.createTestBoxes()
......@@ -2562,28 +2683,31 @@ class PreprocessorTest(tf.test.TestCase):
def _testSSDRandomCropFixedAspectRatio(self,
include_label_scores,
include_multiclass_scores,
include_instance_masks,
include_keypoints):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}), (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels
fields.InputDataFields.groundtruth_classes: labels,
}
if include_label_scores:
label_scores = self.createTestLabelScores()
tensor_dict[fields.InputDataFields.groundtruth_label_scores] = (
label_scores)
if include_multiclass_scores:
multiclass_scores = self.createTestMultiClassScores()
tensor_dict[fields.InputDataFields.multiclass_scores] = (
multiclass_scores)
if include_instance_masks:
masks = self.createTestMasks()
tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
......@@ -2593,6 +2717,7 @@ class PreprocessorTest(tf.test.TestCase):
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_label_scores=include_label_scores,
include_multiclass_scores=include_multiclass_scores,
include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints)
distorted_tensor_dict = preprocessor.preprocess(
......@@ -2615,16 +2740,25 @@ class PreprocessorTest(tf.test.TestCase):
def testSSDRandomCropFixedAspectRatio(self):
self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
include_multiclass_scores=False,
include_instance_masks=False,
include_keypoints=False)
def testSSDRandomCropFixedAspectRatioWithMultiClassScores(self):
self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
include_multiclass_scores=True,
include_instance_masks=False,
include_keypoints=False)
def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
include_multiclass_scores=False,
include_instance_masks=True,
include_keypoints=True)
def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self):
self._testSSDRandomCropFixedAspectRatio(include_label_scores=True,
include_multiclass_scores=False,
include_instance_masks=True,
include_keypoints=True)
......
......@@ -61,6 +61,9 @@ class InputDataFields(object):
num_groundtruth_boxes: number of groundtruth boxes.
true_image_shapes: true shapes of images in the resized images, as resized
images can be padded with zeros.
verified_labels: list of human-verified image-level labels (note, that a
label can be verified both as positive and negative).
multiclass_scores: the label score per class for each box.
"""
image = 'image'
original_image = 'original_image'
......@@ -86,6 +89,8 @@ class InputDataFields(object):
groundtruth_weights = 'groundtruth_weights'
num_groundtruth_boxes = 'num_groundtruth_boxes'
true_image_shape = 'true_image_shape'
verified_labels = 'verified_labels'
multiclass_scores = 'multiclass_scores'
class DetectionResultFields(object):
......
......@@ -104,8 +104,7 @@ def dict_to_tf_example(data,
truncated = []
poses = []
difficult_obj = []
if data.has_key('object'):
if 'object' in data:
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
......
......@@ -136,35 +136,36 @@ def dict_to_tf_example(data,
poses = []
difficult_obj = []
masks = []
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
if faces_only:
xmin = float(obj['bndbox']['xmin'])
xmax = float(obj['bndbox']['xmax'])
ymin = float(obj['bndbox']['ymin'])
ymax = float(obj['bndbox']['ymax'])
else:
xmin = float(np.min(nonzero_x_indices))
xmax = float(np.max(nonzero_x_indices))
ymin = float(np.min(nonzero_y_indices))
ymax = float(np.max(nonzero_y_indices))
xmins.append(xmin / width)
ymins.append(ymin / height)
xmaxs.append(xmax / width)
ymaxs.append(ymax / height)
class_name = get_class_name_from_filename(data['filename'])
classes_text.append(class_name.encode('utf8'))
classes.append(label_map_dict[class_name])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
if not faces_only:
mask_remapped = (mask_np != 2).astype(np.uint8)
masks.append(mask_remapped)
if 'object' in data:
for obj in data['object']:
difficult = bool(int(obj['difficult']))
if ignore_difficult_instances and difficult:
continue
difficult_obj.append(int(difficult))
if faces_only:
xmin = float(obj['bndbox']['xmin'])
xmax = float(obj['bndbox']['xmax'])
ymin = float(obj['bndbox']['ymin'])
ymax = float(obj['bndbox']['ymax'])
else:
xmin = float(np.min(nonzero_x_indices))
xmax = float(np.max(nonzero_x_indices))
ymin = float(np.min(nonzero_y_indices))
ymax = float(np.max(nonzero_y_indices))
xmins.append(xmin / width)
ymins.append(ymin / height)
xmaxs.append(xmax / width)
ymaxs.append(ymax / height)
class_name = get_class_name_from_filename(data['filename'])
classes_text.append(class_name.encode('utf8'))
classes.append(label_map_dict[class_name])
truncated.append(int(obj['truncated']))
poses.append(obj['pose'].encode('utf8'))
if not faces_only:
mask_remapped = (mask_np != 2).astype(np.uint8)
masks.append(mask_remapped)
feature_dict = {
'image/height': dataset_util.int64_feature(height),
......
......@@ -31,7 +31,7 @@ while for the Weighted PASCAL VOC metric the final mAP value will be influenced
Similar to pascal voc 2007 detection metric, but computes the intersection over
union based on the object masks instead of object boxes.
## Weighted PASCAL VOC instance segmentation metric
## Weighted PASCAL VOC instance segmentation metric
`EvalConfig.metrics_set='weighted_pascal_voc_instance_segmentation_metrics'`
......
......@@ -229,7 +229,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
number_of_stages,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_arg_scope_fn,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
......@@ -291,8 +291,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
denser resolutions. The atrous rate is used to compensate for the
denser feature maps by using an effectively larger receptive field.
(This should typically be set to 1).
first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d,
separable_conv2d and fully_connected ops for the RPN box predictor.
first_stage_box_predictor_arg_scope_fn: A function to construct tf-slim
arg_scope for conv2d, separable_conv2d and fully_connected ops for the
RPN box predictor.
first_stage_box_predictor_kernel_size: Kernel size to use for the
convolution op just prior to RPN box predictions.
first_stage_box_predictor_depth: Output depth for the convolution op
......@@ -396,8 +397,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
# (First stage) Region proposal network parameters
self._first_stage_anchor_generator = first_stage_anchor_generator
self._first_stage_atrous_rate = first_stage_atrous_rate
self._first_stage_box_predictor_arg_scope = (
first_stage_box_predictor_arg_scope)
self._first_stage_box_predictor_arg_scope_fn = (
first_stage_box_predictor_arg_scope_fn)
self._first_stage_box_predictor_kernel_size = (
first_stage_box_predictor_kernel_size)
self._first_stage_box_predictor_depth = first_stage_box_predictor_depth
......@@ -406,7 +407,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
positive_fraction=first_stage_positive_balance_fraction)
self._first_stage_box_predictor = box_predictor.ConvolutionalBoxPredictor(
self._is_training, num_classes=1,
conv_hyperparams=self._first_stage_box_predictor_arg_scope,
conv_hyperparams_fn=self._first_stage_box_predictor_arg_scope_fn,
min_depth=0, max_depth=0, num_layers_before_predictor=0,
use_dropout=False, dropout_keep_prob=1.0, kernel_size=1,
box_code_size=self._box_coder.code_size)
......@@ -450,8 +451,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
if self._number_of_stages <= 0 or self._number_of_stages > 3:
raise ValueError('Number of stages should be a value in {1, 2, 3}.')
if self._is_training and self._number_of_stages == 3:
self._number_of_stages = 2
@property
def first_stage_feature_extractor_scope(self):
......@@ -738,9 +737,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
of the image.
6) box_classifier_features: a 4-D float32 tensor representing the
features for each proposal.
7) mask_predictions: (optional) a 4-D tensor with shape
[total_num_padded_proposals, num_classes, mask_height, mask_width]
containing instance mask predictions.
"""
image_shape_2d = self._image_batch_shape_2d(image_shape)
proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
......@@ -756,20 +752,18 @@ class FasterRCNNMetaArch(model.DetectionModel):
flattened_proposal_feature_maps,
scope=self.second_stage_feature_extractor_scope))
predict_auxiliary_outputs = False
if self._number_of_stages == 2:
predict_auxiliary_outputs = True
box_predictions = self._mask_rcnn_box_predictor.predict(
[box_classifier_features],
num_predictions_per_location=[1],
scope=self.second_stage_box_predictor_scope,
predict_boxes_and_classes=True,
predict_auxiliary_outputs=predict_auxiliary_outputs)
predict_boxes_and_classes=True)
refined_box_encodings = tf.squeeze(
box_predictions[box_predictor.BOX_ENCODINGS], axis=1)
class_predictions_with_background = tf.squeeze(box_predictions[
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1)
box_predictions[box_predictor.BOX_ENCODINGS],
axis=1, name='all_refined_box_encodings')
class_predictions_with_background = tf.squeeze(
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND],
axis=1, name='all_class_predictions_with_background')
absolute_proposal_boxes = ops.normalized_to_image_coordinates(
proposal_boxes_normalized, image_shape, self._parallel_iterations)
......@@ -783,16 +777,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
'box_classifier_features': box_classifier_features,
'proposal_boxes_normalized': proposal_boxes_normalized,
}
if box_predictor.MASK_PREDICTIONS in box_predictions:
mask_predictions = tf.squeeze(box_predictions[
box_predictor.MASK_PREDICTIONS], axis=1)
prediction_dict['mask_predictions'] = mask_predictions
return prediction_dict
def _predict_third_stage(self, prediction_dict, image_shapes):
"""Predicts non-box, non-class outputs using refined detections.
For training, masks as predicted directly on the box_classifier_features,
which are region-features from the initial anchor boxes.
For inference, this happens after calling the post-processing stage, such
that masks are only calculated for the top scored boxes.
Args:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) refined_box_encodings: a 3-D tensor with shape
......@@ -813,53 +808,73 @@ class FasterRCNNMetaArch(model.DetectionModel):
4) proposal_boxes: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes in absolute coordinates.
5) box_classifier_features: a 4-D float32 tensor representing the
features for each proposal.
image_shapes: A 2-D int32 tensors of shape [batch_size, 3] containing
shapes of images in the batch.
Returns:
prediction_dict: a dictionary that in addition to the input predictions
does hold the following predictions as well:
1) mask_predictions: (optional) a 4-D tensor with shape
1) mask_predictions: a 4-D tensor with shape
[batch_size, max_detection, mask_height, mask_width] containing
instance mask predictions.
"""
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'],
image_shapes)
prediction_dict.update(detections_dict)
detection_boxes = detections_dict[
fields.DetectionResultFields.detection_boxes]
detection_classes = detections_dict[
fields.DetectionResultFields.detection_classes]
rpn_features_to_crop = prediction_dict['rpn_features_to_crop']
batch_size = tf.shape(detection_boxes)[0]
max_detection = tf.shape(detection_boxes)[1]
flattened_detected_feature_maps = (
self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, detection_boxes))
detected_box_classifier_features = (
self._feature_extractor.extract_box_classifier_features(
flattened_detected_feature_maps,
scope=self.second_stage_feature_extractor_scope))
box_predictions = self._mask_rcnn_box_predictor.predict(
[detected_box_classifier_features],
num_predictions_per_location=[1],
scope=self.second_stage_box_predictor_scope,
predict_boxes_and_classes=False,
predict_auxiliary_outputs=True)
if box_predictor.MASK_PREDICTIONS in box_predictions:
if self._is_training:
curr_box_classifier_features = prediction_dict['box_classifier_features']
detection_classes = prediction_dict['class_predictions_with_background']
box_predictions = self._mask_rcnn_box_predictor.predict(
[curr_box_classifier_features],
num_predictions_per_location=[1],
scope=self.second_stage_box_predictor_scope,
predict_boxes_and_classes=False,
predict_auxiliary_outputs=True)
prediction_dict['mask_predictions'] = tf.squeeze(box_predictions[
box_predictor.MASK_PREDICTIONS], axis=1)
else:
detections_dict = self._postprocess_box_classifier(
prediction_dict['refined_box_encodings'],
prediction_dict['class_predictions_with_background'],
prediction_dict['proposal_boxes'],
prediction_dict['num_proposals'],
image_shapes)
prediction_dict.update(detections_dict)
detection_boxes = detections_dict[
fields.DetectionResultFields.detection_boxes]
detection_classes = detections_dict[
fields.DetectionResultFields.detection_classes]
rpn_features_to_crop = prediction_dict['rpn_features_to_crop']
batch_size = tf.shape(detection_boxes)[0]
max_detection = tf.shape(detection_boxes)[1]
flattened_detected_feature_maps = (
self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, detection_boxes))
curr_box_classifier_features = (
self._feature_extractor.extract_box_classifier_features(
flattened_detected_feature_maps,
scope=self.second_stage_feature_extractor_scope))
box_predictions = self._mask_rcnn_box_predictor.predict(
[curr_box_classifier_features],
num_predictions_per_location=[1],
scope=self.second_stage_box_predictor_scope,
predict_boxes_and_classes=False,
predict_auxiliary_outputs=True)
detection_masks = tf.squeeze(box_predictions[
box_predictor.MASK_PREDICTIONS], axis=1)
detection_masks = self._gather_instance_masks(detection_masks,
detection_classes)
mask_height = tf.shape(detection_masks)[1]
mask_width = tf.shape(detection_masks)[2]
_, num_classes, mask_height, mask_width = (
detection_masks.get_shape().as_list())
_, max_detection = detection_classes.get_shape().as_list()
if num_classes > 1:
detection_masks = self._gather_instance_masks(
detection_masks, detection_classes)
prediction_dict[fields.DetectionResultFields.detection_masks] = (
tf.reshape(detection_masks,
[batch_size, max_detection, mask_height, mask_width]))
return prediction_dict
def _gather_instance_masks(self, instance_masks, classes):
......@@ -873,16 +888,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
masks: a 3-D float32 tensor with shape [K, mask_height, mask_width].
"""
_, num_classes, height, width = instance_masks.get_shape().as_list()
k = tf.shape(instance_masks)[0]
num_mask_classes = tf.shape(instance_masks)[1]
instance_mask_height = tf.shape(instance_masks)[2]
instance_mask_width = tf.shape(instance_masks)[3]
classes = tf.reshape(classes, [-1])
instance_masks = tf.reshape(instance_masks, [
-1, instance_mask_height, instance_mask_width
])
return tf.gather(instance_masks,
tf.range(k) * num_mask_classes + tf.to_int32(classes))
instance_masks = tf.reshape(instance_masks, [-1, height, width])
classes = tf.to_int32(tf.reshape(classes, [-1]))
gather_idx = tf.range(k) * num_classes + classes
return tf.gather(instance_masks, gather_idx)
def _extract_rpn_feature_maps(self, preprocessed_inputs):
"""Extracts RPN features.
......@@ -914,7 +925,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
anchors = box_list_ops.concatenate(
self._first_stage_anchor_generator.generate([(feature_map_shape[1],
feature_map_shape[2])]))
with slim.arg_scope(self._first_stage_box_predictor_arg_scope):
with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()):
kernel_size = self._first_stage_box_predictor_kernel_size
rpn_box_predictor_features = slim.conv2d(
rpn_features_to_crop,
......@@ -1814,11 +1825,18 @@ class FasterRCNNMetaArch(model.DetectionModel):
# Pad the prediction_masks with to add zeros for background class to be
# consistent with class predictions.
prediction_masks_with_background = tf.pad(
prediction_masks, [[0, 0], [1, 0], [0, 0], [0, 0]])
prediction_masks_masked_by_class_targets = tf.boolean_mask(
prediction_masks_with_background,
tf.greater(one_hot_flat_cls_targets_with_background, 0))
if prediction_masks.get_shape().as_list()[1] == 1:
# Class agnostic masks or masks for one-class prediction. Logic for
# both cases is the same since background predictions are ignored
# through the batch_mask_target_weights.
prediction_masks_masked_by_class_targets = prediction_masks
else:
prediction_masks_with_background = tf.pad(
prediction_masks, [[0, 0], [1, 0], [0, 0], [0, 0]])
prediction_masks_masked_by_class_targets = tf.boolean_mask(
prediction_masks_with_background,
tf.greater(one_hot_flat_cls_targets_with_background, 0))
mask_height = prediction_masks.shape[2].value
mask_width = prediction_masks.shape[3].value
reshaped_prediction_masks = tf.reshape(
......
......@@ -15,6 +15,7 @@
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
from absl.testing import parameterized
import numpy as np
import tensorflow as tf
......@@ -22,7 +23,8 @@ from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib
class FasterRCNNMetaArchTest(
faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase):
faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase,
parameterized.TestCase):
def test_postprocess_second_stage_only_inference_mode_with_masks(self):
model = self._build_model(
......@@ -83,8 +85,12 @@ class FasterRCNNMetaArchTest(
self.assertTrue(np.amax(detections_out['detection_masks'] <= 1.0))
self.assertTrue(np.amin(detections_out['detection_masks'] >= 0.0))
@parameterized.parameters(
{'masks_are_class_agnostic': False},
{'masks_are_class_agnostic': True},
)
def test_predict_correct_shapes_in_inference_mode_three_stages_with_masks(
self):
self, masks_are_class_agnostic):
batch_size = 2
image_size = 10
max_num_proposals = 8
......@@ -126,7 +132,8 @@ class FasterRCNNMetaArchTest(
is_training=False,
number_of_stages=3,
second_stage_batch_size=2,
predict_masks=True)
predict_masks=True,
masks_are_class_agnostic=masks_are_class_agnostic)
preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape)
_, true_image_shapes = model.preprocess(preprocessed_inputs)
result_tensor_dict = model.predict(preprocessed_inputs,
......@@ -153,16 +160,20 @@ class FasterRCNNMetaArchTest(
self.assertAllEqual(tensor_dict_out['detection_scores'].shape, [2, 5])
self.assertAllEqual(tensor_dict_out['num_detections'].shape, [2])
@parameterized.parameters(
{'masks_are_class_agnostic': False},
{'masks_are_class_agnostic': True},
)
def test_predict_gives_correct_shapes_in_train_mode_both_stages_with_masks(
self):
self, masks_are_class_agnostic):
test_graph = tf.Graph()
with test_graph.as_default():
model = self._build_model(
is_training=True,
number_of_stages=2,
number_of_stages=3,
second_stage_batch_size=7,
predict_masks=True)
predict_masks=True,
masks_are_class_agnostic=masks_are_class_agnostic)
batch_size = 2
image_size = 10
max_num_proposals = 7
......@@ -184,6 +195,7 @@ class FasterRCNNMetaArchTest(
groundtruth_classes_list)
result_tensor_dict = model.predict(preprocessed_inputs, true_image_shapes)
mask_shape_1 = 1 if masks_are_class_agnostic else model._num_classes
expected_shapes = {
'rpn_box_predictor_features': (2, image_size, image_size, 512),
'rpn_features_to_crop': (2, image_size, image_size, 3),
......@@ -197,7 +209,7 @@ class FasterRCNNMetaArchTest(
self._get_box_classifier_features_shape(
image_size, batch_size, max_num_proposals, initial_crop_size,
maxpool_stride, 3),
'mask_predictions': (2 * max_num_proposals, 2, 14, 14)
'mask_predictions': (2 * max_num_proposals, mask_shape_1, 14, 14)
}
init_op = tf.global_variables_initializer()
......
......@@ -90,10 +90,13 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
"""
return box_predictor_text_proto
def _add_mask_to_second_stage_box_predictor_text_proto(self):
def _add_mask_to_second_stage_box_predictor_text_proto(
self, masks_are_class_agnostic=False):
agnostic = 'true' if masks_are_class_agnostic else 'false'
box_predictor_text_proto = """
mask_rcnn_box_predictor {
predict_instance_masks: true
masks_are_class_agnostic: """ + agnostic + """
mask_height: 14
mask_width: 14
conv_hyperparams {
......@@ -114,13 +117,14 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
return box_predictor_text_proto
def _get_second_stage_box_predictor(self, num_classes, is_training,
predict_masks):
predict_masks, masks_are_class_agnostic):
box_predictor_proto = box_predictor_pb2.BoxPredictor()
text_format.Merge(self._get_second_stage_box_predictor_text_proto(),
box_predictor_proto)
if predict_masks:
text_format.Merge(
self._add_mask_to_second_stage_box_predictor_text_proto(),
self._add_mask_to_second_stage_box_predictor_text_proto(
masks_are_class_agnostic),
box_predictor_proto)
return box_predictor_builder.build(
......@@ -146,7 +150,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
hard_mining=False,
softmax_second_stage_classification_loss=True,
predict_masks=False,
pad_to_max_dimension=None):
pad_to_max_dimension=None,
masks_are_class_agnostic=False):
def image_resizer_fn(image, masks=None):
"""Fake image resizer function."""
......@@ -196,7 +201,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
}
}
"""
first_stage_box_predictor_arg_scope = (
first_stage_box_predictor_arg_scope_fn = (
self._build_arg_scope_with_hyperparams(
first_stage_box_predictor_hyperparams_text_proto, is_training))
......@@ -255,8 +260,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'number_of_stages': number_of_stages,
'first_stage_anchor_generator': first_stage_anchor_generator,
'first_stage_atrous_rate': first_stage_atrous_rate,
'first_stage_box_predictor_arg_scope':
first_stage_box_predictor_arg_scope,
'first_stage_box_predictor_arg_scope_fn':
first_stage_box_predictor_arg_scope_fn,
'first_stage_box_predictor_kernel_size':
first_stage_box_predictor_kernel_size,
'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
......@@ -287,7 +292,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self._get_second_stage_box_predictor(
num_classes=num_classes,
is_training=is_training,
predict_masks=predict_masks), **common_kwargs)
predict_masks=predict_masks,
masks_are_class_agnostic=masks_are_class_agnostic), **common_kwargs)
def test_predict_gives_correct_shapes_in_inference_mode_first_stage_only(
self):
......
......@@ -56,7 +56,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
number_of_stages,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_arg_scope_fn,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
......@@ -103,8 +103,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
denser resolutions. The atrous rate is used to compensate for the
denser feature maps by using an effectively larger receptive field.
(This should typically be set to 1).
first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d,
separable_conv2d and fully_connected ops for the RPN box predictor.
first_stage_box_predictor_arg_scope_fn: A function to generate tf-slim
arg_scope for conv2d, separable_conv2d and fully_connected ops for the
RPN box predictor.
first_stage_box_predictor_kernel_size: Kernel size to use for the
convolution op just prior to RPN box predictions.
first_stage_box_predictor_depth: Output depth for the convolution op
......@@ -174,7 +175,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
number_of_stages,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_arg_scope_fn,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment