Commit 6b72b5cd authored by Zhichao Lu's avatar Zhichao Lu Committed by pkulzc
Browse files

Merged commit includes the following changes:

191649512  by Zhichao Lu:

    Introduce two parameters in ssd.proto - freeze_batchnorm, inplace_batchnorm_update - and set up slim arg_scopes in ssd_meta_arch.py such that applies it to all batchnorm ops in the predict() method.

    This centralizes the control of freezing and doing inplace batchnorm updates.

--
191620303  by Zhichao Lu:

    Modifications to the preprocessor to support multiclass scores

--
191610773  by Zhichao Lu:

    Adding multiclass_scores to InputDataFields and adding padding for multiclass_scores.

--
191595011  by Zhichao Lu:

    Contains implementation of the detection metric for the Open Images Challenge.

--
191449408  by Zhichao Lu:

    Change hyperparams_builder to return a callable so the users can inherit values from outer arg_scopes. This allows us to easily set batch_norm parameters like "is_training" and "inplace_batchnorm_update" for all feature extractors from the base class and propagate it correctly to the nested scopes.

--
191437008  by Zhichao Lu:

    Contains implementation of the Recall@N and MedianRank@N metrics.

--
191385254  by Zhichao Lu:

    Add config rewrite flag to eval.py

--
191382500  by Zhichao Lu:

    Fix bug for config_util.

--

PiperOrigin-RevId: 191649512
parent 143464d2
......@@ -49,12 +49,12 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
if box_predictor_oneof == 'convolutional_box_predictor':
conv_box_predictor = box_predictor_config.convolutional_box_predictor
conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.ConvolutionalBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
conv_hyperparams_fn=conv_hyperparams_fn,
min_depth=conv_box_predictor.min_depth,
max_depth=conv_box_predictor.max_depth,
num_layers_before_predictor=(conv_box_predictor.
......@@ -73,12 +73,12 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
if box_predictor_oneof == 'weight_shared_convolutional_box_predictor':
conv_box_predictor = (box_predictor_config.
weight_shared_convolutional_box_predictor)
conv_hyperparams = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
conv_hyperparams_fn=conv_hyperparams_fn,
depth=conv_box_predictor.depth,
num_layers_before_predictor=(conv_box_predictor.
num_layers_before_predictor),
......@@ -90,20 +90,20 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
if box_predictor_oneof == 'mask_rcnn_box_predictor':
mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor
fc_hyperparams = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
is_training)
conv_hyperparams = None
fc_hyperparams_fn = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams,
is_training)
conv_hyperparams_fn = None
if mask_rcnn_box_predictor.HasField('conv_hyperparams'):
conv_hyperparams = argscope_fn(mask_rcnn_box_predictor.conv_hyperparams,
is_training)
conv_hyperparams_fn = argscope_fn(
mask_rcnn_box_predictor.conv_hyperparams, is_training)
box_predictor_object = box_predictor.MaskRCNNBoxPredictor(
is_training=is_training,
num_classes=num_classes,
fc_hyperparams=fc_hyperparams,
fc_hyperparams_fn=fc_hyperparams_fn,
use_dropout=mask_rcnn_box_predictor.use_dropout,
dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability,
box_code_size=mask_rcnn_box_predictor.box_code_size,
conv_hyperparams=conv_hyperparams,
conv_hyperparams_fn=conv_hyperparams_fn,
predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks,
mask_height=mask_rcnn_box_predictor.mask_height,
mask_width=mask_rcnn_box_predictor.mask_width,
......@@ -116,12 +116,12 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
if box_predictor_oneof == 'rfcn_box_predictor':
rfcn_box_predictor = box_predictor_config.rfcn_box_predictor
conv_hyperparams = argscope_fn(rfcn_box_predictor.conv_hyperparams,
is_training)
conv_hyperparams_fn = argscope_fn(rfcn_box_predictor.conv_hyperparams,
is_training)
box_predictor_object = box_predictor.RfcnBoxPredictor(
is_training=is_training,
num_classes=num_classes,
conv_hyperparams=conv_hyperparams,
conv_hyperparams_fn=conv_hyperparams_fn,
crop_size=[rfcn_box_predictor.crop_height,
rfcn_box_predictor.crop_width],
num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height,
......
......@@ -54,7 +54,7 @@ class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
self.assertAlmostEqual((hyperparams_proto.regularizer.
l1_regularizer.weight),
(conv_hyperparams_actual.regularizer.l1_regularizer.
......@@ -183,7 +183,7 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
self.assertAlmostEqual((hyperparams_proto.regularizer.
l1_regularizer.weight),
(conv_hyperparams_actual.regularizer.l1_regularizer.
......@@ -297,7 +297,7 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
is_training=False,
num_classes=10)
mock_argscope_fn.assert_called_with(hyperparams_proto, False)
self.assertEqual(box_predictor._fc_hyperparams, 'arg_scope')
self.assertEqual(box_predictor._fc_hyperparams_fn, 'arg_scope')
def test_non_default_mask_rcnn_box_predictor(self):
fc_hyperparams_text_proto = """
......@@ -417,7 +417,7 @@ class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
box_predictor_config=box_predictor_proto,
is_training=False,
num_classes=10)
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams
(conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
self.assertAlmostEqual((hyperparams_proto.regularizer.
l1_regularizer.weight),
(conv_hyperparams_actual.regularizer.l1_regularizer.
......
......@@ -72,7 +72,9 @@ def _get_padding_shapes(dataset, max_num_boxes=None, num_classes=None,
fields.InputDataFields.num_groundtruth_boxes: [],
fields.InputDataFields.groundtruth_label_types: [max_num_boxes],
fields.InputDataFields.groundtruth_label_scores: [max_num_boxes],
fields.InputDataFields.true_image_shape: [3]
fields.InputDataFields.true_image_shape: [3],
fields.InputDataFields.multiclass_scores: [
max_num_boxes, num_classes + 1 if num_classes is not None else None],
}
# Determine whether groundtruth_classes are integers or one-hot encodings, and
# apply batching appropriately.
......
......@@ -43,7 +43,8 @@ def build(hyperparams_config, is_training):
is_training: Whether the network is in training mode.
Returns:
arg_scope: tf-slim arg_scope containing hyperparameters for ops.
arg_scope_fn: A function to construct tf-slim arg_scope containing
hyperparameters for ops.
Raises:
ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
......@@ -64,16 +65,18 @@ def build(hyperparams_config, is_training):
if hyperparams_config.HasField('op') and (
hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
affected_ops = [slim.fully_connected]
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm,
normalizer_params=batch_norm_params) as sc:
return sc
def scope_fn():
with slim.arg_scope(
affected_ops,
weights_regularizer=_build_regularizer(
hyperparams_config.regularizer),
weights_initializer=_build_initializer(
hyperparams_config.initializer),
activation_fn=_build_activation_fn(hyperparams_config.activation),
normalizer_fn=batch_norm,
normalizer_params=batch_norm_params) as sc:
return sc
return scope_fn
def _build_activation_fn(activation_fn):
......
......@@ -45,7 +45,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(self._get_scope_key(slim.conv2d) in scope)
def test_default_arg_scope_has_separable_conv2d_op(self):
......@@ -61,7 +63,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(self._get_scope_key(slim.separable_conv2d) in scope)
def test_default_arg_scope_has_conv2d_transpose_op(self):
......@@ -77,7 +81,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(self._get_scope_key(slim.conv2d_transpose) in scope)
def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
......@@ -94,7 +100,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
self.assertTrue(self._get_scope_key(slim.fully_connected) in scope)
def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
......@@ -110,7 +118,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
kwargs_1, kwargs_2, kwargs_3 = scope.values()
self.assertDictEqual(kwargs_1, kwargs_2)
self.assertDictEqual(kwargs_1, kwargs_3)
......@@ -129,7 +139,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
regularizer = conv_scope_arguments['weights_regularizer']
weights = np.array([1., -1, 4., 2.])
......@@ -151,7 +163,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
regularizer = conv_scope_arguments['weights_regularizer']
......@@ -180,7 +194,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
......@@ -210,7 +226,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=False)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=False)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
......@@ -240,7 +258,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
batch_norm_params = conv_scope_arguments['normalizer_params']
......@@ -263,7 +283,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
self.assertEqual(conv_scope_arguments['normalizer_params'], None)
......@@ -282,7 +304,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['activation_fn'], None)
......@@ -300,7 +324,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
......@@ -318,7 +344,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
......@@ -351,7 +379,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
......@@ -373,7 +403,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
......@@ -395,7 +427,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
......@@ -417,7 +451,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
......@@ -438,7 +474,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
......@@ -459,7 +497,9 @@ class HyperparamsBuilderTest(tf.test.TestCase):
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
scope = hyperparams_builder.build(conv_hyperparams_proto, is_training=True)
scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
is_training=True)
scope = scope_fn()
conv_scope_arguments = scope.values()[0]
initializer = conv_scope_arguments['weights_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
......
......@@ -98,19 +98,13 @@ def build(model_config, is_training, add_summaries=True):
def _build_ssd_feature_extractor(feature_extractor_config, is_training,
reuse_weights=None,
inplace_batchnorm_update=False):
reuse_weights=None):
"""Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
Args:
feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
is_training: True if this feature extractor is being built for training.
reuse_weights: if the feature extractor should reuse weights.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs. When
this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Returns:
ssd_meta_arch.SSDFeatureExtractor based on config.
......@@ -122,7 +116,6 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
depth_multiplier = feature_extractor_config.depth_multiplier
min_depth = feature_extractor_config.min_depth
pad_to_multiple = feature_extractor_config.pad_to_multiple
batch_norm_trainable = feature_extractor_config.batch_norm_trainable
use_explicit_padding = feature_extractor_config.use_explicit_padding
use_depthwise = feature_extractor_config.use_depthwise
conv_hyperparams = hyperparams_builder.build(
......@@ -132,11 +125,9 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
return feature_extractor_class(is_training, depth_multiplier, min_depth,
pad_to_multiple, conv_hyperparams,
batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise,
inplace_batchnorm_update)
return feature_extractor_class(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise)
def _build_ssd_model(ssd_config, is_training, add_summaries):
......@@ -160,8 +151,7 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
# Feature extractor
feature_extractor = _build_ssd_feature_extractor(
feature_extractor_config=ssd_config.feature_extractor,
is_training=is_training,
inplace_batchnorm_update=ssd_config.inplace_batchnorm_update)
is_training=is_training)
box_coder = box_coder_builder.build(ssd_config.box_coder)
matcher = matcher_builder.build(ssd_config.matcher)
......@@ -203,7 +193,9 @@ def _build_ssd_model(ssd_config, is_training, add_summaries):
normalize_loss_by_num_matches,
hard_example_miner,
add_summaries=add_summaries,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize)
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=ssd_config.freeze_batchnorm,
inplace_batchnorm_update=ssd_config.inplace_batchnorm_update)
def _build_faster_rcnn_feature_extractor(
......@@ -276,7 +268,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
frcnn_config.first_stage_anchor_generator)
first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
first_stage_box_predictor_arg_scope = hyperparams_builder.build(
first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
first_stage_box_predictor_kernel_size = (
frcnn_config.first_stage_box_predictor_kernel_size)
......@@ -329,8 +321,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
'number_of_stages': number_of_stages,
'first_stage_anchor_generator': first_stage_anchor_generator,
'first_stage_atrous_rate': first_stage_atrous_rate,
'first_stage_box_predictor_arg_scope':
first_stage_box_predictor_arg_scope,
'first_stage_box_predictor_arg_scope_fn':
first_stage_box_predictor_arg_scope_fn,
'first_stage_box_predictor_kernel_size':
first_stage_box_predictor_kernel_size,
'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
......
......@@ -225,7 +225,6 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -298,6 +297,7 @@ class ModelBuilderTest(tf.test.TestCase):
def test_create_ssd_mobilenet_v1_model_from_config(self):
model_text_proto = """
ssd {
freeze_batchnorm: true
inplace_batchnorm_update: true
feature_extractor {
type: 'ssd_mobilenet_v1'
......@@ -311,7 +311,6 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -368,8 +367,9 @@ class ModelBuilderTest(tf.test.TestCase):
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDMobileNetV1FeatureExtractor)
self.assertTrue(model._feature_extractor._batch_norm_trainable)
self.assertTrue(model._normalize_loc_loss_by_codesize)
self.assertTrue(model._freeze_batchnorm)
self.assertTrue(model._inplace_batchnorm_update)
def test_create_ssd_mobilenet_v2_model_from_config(self):
model_text_proto = """
......@@ -386,7 +386,6 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
......@@ -443,7 +442,6 @@ class ModelBuilderTest(tf.test.TestCase):
self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
self.assertIsInstance(model._feature_extractor,
SSDMobileNetV2FeatureExtractor)
self.assertTrue(model._feature_extractor._batch_norm_trainable)
self.assertTrue(model._normalize_loc_loss_by_codesize)
def test_create_embedded_ssd_mobilenet_v1_model_from_config(self):
......@@ -461,7 +459,6 @@ class ModelBuilderTest(tf.test.TestCase):
}
}
}
batch_norm_trainable: true
}
box_coder {
faster_rcnn_box_coder {
......
......@@ -147,7 +147,7 @@ class RfcnBoxPredictor(BoxPredictor):
def __init__(self,
is_training,
num_classes,
conv_hyperparams,
conv_hyperparams_fn,
num_spatial_bins,
depth,
crop_size,
......@@ -160,8 +160,8 @@ class RfcnBoxPredictor(BoxPredictor):
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for conolutional
layers.
conv_hyperparams_fn: A function to construct tf-slim arg_scope with
hyperparameters for convolutional layers.
num_spatial_bins: A list of two integers `[spatial_bins_y,
spatial_bins_x]`.
depth: Target depth to reduce the input feature maps to.
......@@ -169,7 +169,7 @@ class RfcnBoxPredictor(BoxPredictor):
box_code_size: Size of encoding for each box.
"""
super(RfcnBoxPredictor, self).__init__(is_training, num_classes)
self._conv_hyperparams = conv_hyperparams
self._conv_hyperparams_fn = conv_hyperparams_fn
self._num_spatial_bins = num_spatial_bins
self._depth = depth
self._crop_size = crop_size
......@@ -227,7 +227,7 @@ class RfcnBoxPredictor(BoxPredictor):
return tf.reshape(ones_mat * multiplier, [-1])
net = image_feature
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
net = slim.conv2d(net, self._depth, [1, 1], scope='reduce_depth')
# Location predictions.
location_feature_map_depth = (self._num_spatial_bins[0] *
......@@ -297,11 +297,11 @@ class MaskRCNNBoxPredictor(BoxPredictor):
def __init__(self,
is_training,
num_classes,
fc_hyperparams,
fc_hyperparams_fn,
use_dropout,
dropout_keep_prob,
box_code_size,
conv_hyperparams=None,
conv_hyperparams_fn=None,
predict_instance_masks=False,
mask_height=14,
mask_width=14,
......@@ -316,16 +316,16 @@ class MaskRCNNBoxPredictor(BoxPredictor):
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
fc_hyperparams: Slim arg_scope with hyperparameters for fully
connected ops.
fc_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for fully connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
box_code_size: Size of encoding for each box.
conv_hyperparams: Slim arg_scope with hyperparameters for convolution
ops.
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
predict_instance_masks: Whether to predict object masks inside detection
boxes.
mask_height: Desired output mask height. The default value is 14.
......@@ -347,11 +347,11 @@ class MaskRCNNBoxPredictor(BoxPredictor):
ValueError: If mask_prediction_num_conv_layers is smaller than two.
"""
super(MaskRCNNBoxPredictor, self).__init__(is_training, num_classes)
self._fc_hyperparams = fc_hyperparams
self._fc_hyperparams_fn = fc_hyperparams_fn
self._use_dropout = use_dropout
self._box_code_size = box_code_size
self._dropout_keep_prob = dropout_keep_prob
self._conv_hyperparams = conv_hyperparams
self._conv_hyperparams_fn = conv_hyperparams_fn
self._predict_instance_masks = predict_instance_masks
self._mask_height = mask_height
self._mask_width = mask_width
......@@ -361,7 +361,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
if self._predict_keypoints:
raise ValueError('Keypoint prediction is unimplemented.')
if ((self._predict_instance_masks or self._predict_keypoints) and
self._conv_hyperparams is None):
self._conv_hyperparams_fn is None):
raise ValueError('`conv_hyperparams` must be provided when predicting '
'masks.')
if self._mask_prediction_num_conv_layers < 2:
......@@ -399,7 +399,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
flattened_image_features = slim.dropout(flattened_image_features,
keep_prob=self._dropout_keep_prob,
is_training=self._is_training)
with slim.arg_scope(self._fc_hyperparams):
with slim.arg_scope(self._fc_hyperparams_fn()):
box_encodings = slim.fully_connected(
flattened_image_features,
self._num_classes * self._box_code_size,
......@@ -463,7 +463,7 @@ class MaskRCNNBoxPredictor(BoxPredictor):
num_feature_channels = image_features.get_shape().as_list()[3]
num_conv_channels = self._get_mask_predictor_conv_depth(
num_feature_channels, self.num_classes)
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
upsampled_features = tf.image.resize_bilinear(
image_features,
[self._mask_height, self._mask_width],
......@@ -578,7 +578,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
def __init__(self,
is_training,
num_classes,
conv_hyperparams,
conv_hyperparams_fn,
min_depth,
max_depth,
num_layers_before_predictor,
......@@ -597,8 +597,9 @@ class ConvolutionalBoxPredictor(BoxPredictor):
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
min_depth: Minumum feature depth prior to predicting box encodings
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
min_depth: Minimum feature depth prior to predicting box encodings
and class predictions.
max_depth: Maximum feature depth prior to predicting box encodings
and class predictions. If max_depth is set to 0, no additional
......@@ -626,7 +627,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
super(ConvolutionalBoxPredictor, self).__init__(is_training, num_classes)
if min_depth > max_depth:
raise ValueError('min_depth should be less than or equal to max_depth')
self._conv_hyperparams = conv_hyperparams
self._conv_hyperparams_fn = conv_hyperparams_fn
self._min_depth = min_depth
self._max_depth = max_depth
self._num_layers_before_predictor = num_layers_before_predictor
......@@ -679,7 +680,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
# Add a slot for the background class.
num_class_slots = self.num_classes + 1
net = image_feature
with slim.arg_scope(self._conv_hyperparams), \
with slim.arg_scope(self._conv_hyperparams_fn()), \
slim.arg_scope([slim.dropout], is_training=self._is_training):
# Add additional conv layers before the class predictor.
features_depth = static_shape.get_depth(image_feature.get_shape())
......@@ -767,7 +768,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
def __init__(self,
is_training,
num_classes,
conv_hyperparams,
conv_hyperparams_fn,
depth,
num_layers_before_predictor,
box_code_size,
......@@ -781,7 +782,8 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
conv_hyperparams_fn: A function to generate tf-slim arg_scope with
hyperparameters for convolution ops.
depth: depth of conv layers.
num_layers_before_predictor: Number of the additional conv layers before
the predictor.
......@@ -792,7 +794,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
"""
super(WeightSharedConvolutionalBoxPredictor, self).__init__(is_training,
num_classes)
self._conv_hyperparams = conv_hyperparams
self._conv_hyperparams_fn = conv_hyperparams_fn
self._depth = depth
self._num_layers_before_predictor = num_layers_before_predictor
self._box_code_size = box_code_size
......@@ -846,7 +848,7 @@ class WeightSharedConvolutionalBoxPredictor(BoxPredictor):
num_class_slots = self.num_classes + 1
box_encodings_net = image_feature
class_predictions_net = image_feature
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
for i in range(self._num_layers_before_predictor):
box_encodings_net = slim.conv2d(
box_encodings_net,
......
......@@ -49,7 +49,7 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
......@@ -75,7 +75,7 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
......@@ -86,11 +86,11 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
conv_hyperparams=self._build_arg_scope_with_hyperparams(
conv_hyperparams_fn=self._build_arg_scope_with_hyperparams(
op_type=hyperparams_pb2.Hyperparams.CONV),
predict_instance_masks=True)
box_predictions = mask_box_predictor.predict(
......@@ -108,7 +108,7 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
mask_box_predictor = box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4)
......@@ -125,7 +125,7 @@ class MaskRCNNBoxPredictorTest(tf.test.TestCase):
box_predictor.MaskRCNNBoxPredictor(
is_training=False,
num_classes=5,
fc_hyperparams=self._build_arg_scope_with_hyperparams(),
fc_hyperparams_fn=self._build_arg_scope_with_hyperparams(),
use_dropout=False,
dropout_keep_prob=0.5,
box_code_size=4,
......@@ -155,7 +155,7 @@ class RfcnBoxPredictorTest(tf.test.TestCase):
rfcn_box_predictor = box_predictor.RfcnBoxPredictor(
is_training=False,
num_classes=2,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
num_spatial_bins=[3, 3],
depth=4,
crop_size=[12, 12],
......@@ -205,7 +205,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -234,7 +234,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -265,7 +265,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -297,7 +297,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -344,7 +344,7 @@ class ConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.ConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
min_depth=0,
max_depth=32,
num_layers_before_predictor=1,
......@@ -416,7 +416,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
......@@ -442,7 +442,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
......@@ -471,7 +471,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
......@@ -500,7 +500,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=num_classes_without_background,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=2,
box_code_size=4)
......@@ -553,7 +553,7 @@ class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase):
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor(
is_training=False,
num_classes=0,
conv_hyperparams=self._build_arg_scope_with_conv_hyperparams(),
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(),
depth=32,
num_layers_before_predictor=1,
box_code_size=4)
......
......@@ -119,6 +119,9 @@ class PreprocessorTest(tf.test.TestCase):
[[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
return boxes
def createTestMultiClassScores(self):
return tf.constant([[1.0, 0.0], [0.5, 0.5]], dtype=tf.float32)
def expectedImagesAfterNormalization(self):
images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
[-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
......@@ -269,6 +272,9 @@ class PreprocessorTest(tf.test.TestCase):
def expectedLabelsAfterThresholding(self):
return tf.constant([1], dtype=tf.float32)
def expectedMultiClassScoresAfterThresholding(self):
return tf.constant([[1.0, 0.0]], dtype=tf.float32)
def expectedMasksAfterThresholding(self):
mask = np.array([
[[255.0, 0.0, 0.0],
......@@ -345,6 +351,28 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(
retained_label_scores_, expected_retained_label_scores_)
def testRetainBoxesAboveThresholdWithMultiClassScores(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
label_scores = self.createTestLabelScores()
multiclass_scores = self.createTestMultiClassScores()
(_, _, _,
retained_multiclass_scores) = preprocessor.retain_boxes_above_threshold(
boxes,
labels,
label_scores,
multiclass_scores=multiclass_scores,
threshold=0.6)
with self.test_session() as sess:
(retained_multiclass_scores_,
expected_retained_multiclass_scores_) = sess.run([
retained_multiclass_scores,
self.expectedMultiClassScoresAfterThresholding()
])
self.assertAllClose(retained_multiclass_scores_,
expected_retained_multiclass_scores_)
def testRetainBoxesAboveThresholdWithMasks(self):
boxes = self.createTestBoxes()
labels = self.createTestLabels()
......@@ -1264,6 +1292,48 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllClose(distorted_boxes_, expected_boxes_)
self.assertAllEqual(distorted_labels_, expected_labels_)
def testRandomCropImageWithMultiClassScores(self):
preprocessing_options = []
preprocessing_options.append((preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}))
preprocessing_options.append((preprocessor.random_crop_image, {}))
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
multiclass_scores = self.createTestMultiClassScores()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.multiclass_scores: multiclass_scores
}
distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
preprocessing_options)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_multiclass_scores = distorted_tensor_dict[
fields.InputDataFields.multiclass_scores]
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_, distorted_images_rank_,
multiclass_scores_, distorted_multiclass_scores_) = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank,
multiclass_scores, distorted_multiclass_scores
])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
self.assertAllEqual(multiclass_scores_, distorted_multiclass_scores_)
def testStrictRandomCropImageWithLabelScores(self):
image = self.createColorfulTestImage()[0]
boxes = self.createTestBoxes()
......@@ -2510,6 +2580,49 @@ class PreprocessorTest(tf.test.TestCase):
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
def testSSDRandomCropWithMultiClassScores(self):
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}), (preprocessor.ssd_random_crop, {})]
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
multiclass_scores = self.createTestMultiClassScores()
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels,
fields.InputDataFields.multiclass_scores: multiclass_scores,
}
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_multiclass_scores=True)
distorted_tensor_dict = preprocessor.preprocess(
tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
distorted_boxes = distorted_tensor_dict[
fields.InputDataFields.groundtruth_boxes]
distorted_multiclass_scores = distorted_tensor_dict[
fields.InputDataFields.multiclass_scores]
images_rank = tf.rank(images)
distorted_images_rank = tf.rank(distorted_images)
boxes_rank = tf.rank(boxes)
distorted_boxes_rank = tf.rank(distorted_boxes)
with self.test_session() as sess:
(boxes_rank_, distorted_boxes_rank_, images_rank_, distorted_images_rank_,
multiclass_scores_, distorted_multiclass_scores_) = sess.run([
boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank,
multiclass_scores, distorted_multiclass_scores
])
self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
self.assertAllEqual(images_rank_, distorted_images_rank_)
self.assertAllEqual(multiclass_scores_, distorted_multiclass_scores_)
def testSSDRandomCropPad(self):
images = self.createTestImages()
boxes = self.createTestBoxes()
......@@ -2562,28 +2675,31 @@ class PreprocessorTest(tf.test.TestCase):
def _testSSDRandomCropFixedAspectRatio(self,
include_label_scores,
include_multiclass_scores,
include_instance_masks,
include_keypoints):
images = self.createTestImages()
boxes = self.createTestBoxes()
labels = self.createTestLabels()
preprocessing_options = [
(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}),
(preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
preprocessing_options = [(preprocessor.normalize_image, {
'original_minval': 0,
'original_maxval': 255,
'target_minval': 0,
'target_maxval': 1
}), (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
tensor_dict = {
fields.InputDataFields.image: images,
fields.InputDataFields.groundtruth_boxes: boxes,
fields.InputDataFields.groundtruth_classes: labels
fields.InputDataFields.groundtruth_classes: labels,
}
if include_label_scores:
label_scores = self.createTestLabelScores()
tensor_dict[fields.InputDataFields.groundtruth_label_scores] = (
label_scores)
if include_multiclass_scores:
multiclass_scores = self.createTestMultiClassScores()
tensor_dict[fields.InputDataFields.multiclass_scores] = (
multiclass_scores)
if include_instance_masks:
masks = self.createTestMasks()
tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
......@@ -2593,6 +2709,7 @@ class PreprocessorTest(tf.test.TestCase):
preprocessor_arg_map = preprocessor.get_default_func_arg_map(
include_label_scores=include_label_scores,
include_multiclass_scores=include_multiclass_scores,
include_instance_masks=include_instance_masks,
include_keypoints=include_keypoints)
distorted_tensor_dict = preprocessor.preprocess(
......@@ -2615,16 +2732,25 @@ class PreprocessorTest(tf.test.TestCase):
def testSSDRandomCropFixedAspectRatio(self):
self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
include_multiclass_scores=False,
include_instance_masks=False,
include_keypoints=False)
def testSSDRandomCropFixedAspectRatioWithMultiClassScores(self):
self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
include_multiclass_scores=True,
include_instance_masks=False,
include_keypoints=False)
def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
self._testSSDRandomCropFixedAspectRatio(include_label_scores=False,
include_multiclass_scores=False,
include_instance_masks=True,
include_keypoints=True)
def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self):
self._testSSDRandomCropFixedAspectRatio(include_label_scores=True,
include_multiclass_scores=False,
include_instance_masks=True,
include_keypoints=True)
......
......@@ -61,6 +61,9 @@ class InputDataFields(object):
num_groundtruth_boxes: number of groundtruth boxes.
true_image_shapes: true shapes of images in the resized images, as resized
images can be padded with zeros.
verified_labels: list of human-verified image-level labels (note, that a
label can be verified both as positive and negative).
multiclass_scores: the label score per class for each box.
"""
image = 'image'
original_image = 'original_image'
......@@ -86,6 +89,8 @@ class InputDataFields(object):
groundtruth_weights = 'groundtruth_weights'
num_groundtruth_boxes = 'num_groundtruth_boxes'
true_image_shape = 'true_image_shape'
verified_labels = 'verified_labels'
multiclass_scores = 'multiclass_scores'
class DetectionResultFields(object):
......
confidential;1;confidentialit,confidentiality
dogfood;1;
fishfood;1;
catfood;1;
teamfood;1;
droidfood;1;
//go/;1;
//sites/;1;
a/google.com;1;
corp.google.com;1;
.googleplex.com;1;
sandbox.;1;wallet-web.sandbox.,sandbox.google.com/checkout, sandbox.,paymentssandbox
stupid;1;astupidi
caution:;2;
fixme:;2;
fixme(;2;
internal only;2;
internal_only;2;
backdoor;2;
STOPSHIP;2;
ridiculous;1;
notasecret;1;
@google.com;1;noreply@google.com
$RE:chmod [0-9]?777;3;chmod (0)777
mactruck;2;
seastar;2;
......@@ -229,7 +229,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
number_of_stages,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_arg_scope_fn,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
......@@ -291,8 +291,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
denser resolutions. The atrous rate is used to compensate for the
denser feature maps by using an effectively larger receptive field.
(This should typically be set to 1).
first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d,
separable_conv2d and fully_connected ops for the RPN box predictor.
first_stage_box_predictor_arg_scope_fn: A function to construct tf-slim
arg_scope for conv2d, separable_conv2d and fully_connected ops for the
RPN box predictor.
first_stage_box_predictor_kernel_size: Kernel size to use for the
convolution op just prior to RPN box predictions.
first_stage_box_predictor_depth: Output depth for the convolution op
......@@ -396,8 +397,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
# (First stage) Region proposal network parameters
self._first_stage_anchor_generator = first_stage_anchor_generator
self._first_stage_atrous_rate = first_stage_atrous_rate
self._first_stage_box_predictor_arg_scope = (
first_stage_box_predictor_arg_scope)
self._first_stage_box_predictor_arg_scope_fn = (
first_stage_box_predictor_arg_scope_fn)
self._first_stage_box_predictor_kernel_size = (
first_stage_box_predictor_kernel_size)
self._first_stage_box_predictor_depth = first_stage_box_predictor_depth
......@@ -406,7 +407,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
positive_fraction=first_stage_positive_balance_fraction)
self._first_stage_box_predictor = box_predictor.ConvolutionalBoxPredictor(
self._is_training, num_classes=1,
conv_hyperparams=self._first_stage_box_predictor_arg_scope,
conv_hyperparams_fn=self._first_stage_box_predictor_arg_scope_fn,
min_depth=0, max_depth=0, num_layers_before_predictor=0,
use_dropout=False, dropout_keep_prob=1.0, kernel_size=1,
box_code_size=self._box_coder.code_size)
......@@ -914,7 +915,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
anchors = box_list_ops.concatenate(
self._first_stage_anchor_generator.generate([(feature_map_shape[1],
feature_map_shape[2])]))
with slim.arg_scope(self._first_stage_box_predictor_arg_scope):
with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()):
kernel_size = self._first_stage_box_predictor_kernel_size
rpn_box_predictor_features = slim.conv2d(
rpn_features_to_crop,
......
......@@ -196,7 +196,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
}
}
"""
first_stage_box_predictor_arg_scope = (
first_stage_box_predictor_arg_scope_fn = (
self._build_arg_scope_with_hyperparams(
first_stage_box_predictor_hyperparams_text_proto, is_training))
......@@ -255,8 +255,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'number_of_stages': number_of_stages,
'first_stage_anchor_generator': first_stage_anchor_generator,
'first_stage_atrous_rate': first_stage_atrous_rate,
'first_stage_box_predictor_arg_scope':
first_stage_box_predictor_arg_scope,
'first_stage_box_predictor_arg_scope_fn':
first_stage_box_predictor_arg_scope_fn,
'first_stage_box_predictor_kernel_size':
first_stage_box_predictor_kernel_size,
'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
......
......@@ -56,7 +56,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
number_of_stages,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_arg_scope_fn,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
......@@ -103,8 +103,9 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
denser resolutions. The atrous rate is used to compensate for the
denser feature maps by using an effectively larger receptive field.
(This should typically be set to 1).
first_stage_box_predictor_arg_scope: Slim arg_scope for conv2d,
separable_conv2d and fully_connected ops for the RPN box predictor.
first_stage_box_predictor_arg_scope_fn: A function to generate tf-slim
arg_scope for conv2d, separable_conv2d and fully_connected ops for the
RPN box predictor.
first_stage_box_predictor_kernel_size: Kernel size to use for the
convolution op just prior to RPN box predictions.
first_stage_box_predictor_depth: Output depth for the convolution op
......@@ -174,7 +175,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
number_of_stages,
first_stage_anchor_generator,
first_stage_atrous_rate,
first_stage_box_predictor_arg_scope,
first_stage_box_predictor_arg_scope_fn,
first_stage_box_predictor_kernel_size,
first_stage_box_predictor_depth,
first_stage_minibatch_size,
......
......@@ -42,12 +42,10 @@ class SSDFeatureExtractor(object):
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""Constructor.
Args:
......@@ -56,27 +54,19 @@ class SSDFeatureExtractor(object):
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
reuse_weights: whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
"""
self._is_training = is_training
self._depth_multiplier = depth_multiplier
self._min_depth = min_depth
self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams = conv_hyperparams
self._batch_norm_trainable = batch_norm_trainable
self._inplace_batchnorm_update = inplace_batchnorm_update
self._conv_hyperparams_fn = conv_hyperparams_fn
self._reuse_weights = reuse_weights
self._use_explicit_padding = use_explicit_padding
self._use_depthwise = use_depthwise
......@@ -106,28 +96,6 @@ class SSDFeatureExtractor(object):
This function is responsible for extracting feature maps from preprocessed
images.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
Returns:
feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]
"""
batchnorm_updates_collections = (None if self._inplace_batchnorm_update
else tf.GraphKeys.UPDATE_OPS)
with slim.arg_scope([slim.batch_norm],
updates_collections=batchnorm_updates_collections):
return self._extract_features(preprocessed_inputs)
@abstractmethod
def _extract_features(self, preprocessed_inputs):
"""Extracts features from preprocessed inputs.
This function is responsible for extracting feature maps from preprocessed
images.
Args:
preprocessed_inputs: a [batch, height, width, channels] float tensor
representing a batch of images.
......@@ -162,7 +130,9 @@ class SSDMetaArch(model.DetectionModel):
normalize_loss_by_num_matches,
hard_example_miner,
add_summaries=True,
normalize_loc_loss_by_codesize=False):
normalize_loc_loss_by_codesize=False,
freeze_batchnorm=False,
inplace_batchnorm_update=False):
"""SSDMetaArch Constructor.
TODO(rathodv,jonathanhuang): group NMS parameters + score converter into
......@@ -209,9 +179,19 @@ class SSDMetaArch(model.DetectionModel):
should be added to tensorflow graph.
normalize_loc_loss_by_codesize: whether to normalize localization loss
by code size of the box encoder.
freeze_batchnorm: Whether to freeze batch norm parameters during
training or not. When training with a small batch size (e.g. 1), it is
desirable to freeze batch norm update and use pretrained batch norm
params.
inplace_batchnorm_update: Whether to update batch norm moving average
values inplace. When this is false train op must add a control
dependency on tf.graphkeys.UPDATE_OPS collection in order to update
batch norm statistics.
"""
super(SSDMetaArch, self).__init__(num_classes=box_predictor.num_classes)
self._is_training = is_training
self._freeze_batchnorm = freeze_batchnorm
self._inplace_batchnorm_update = inplace_batchnorm_update
# Needed for fine-tuning from classification checkpoints whose
# variables do not have the feature extractor scope.
......@@ -372,32 +352,40 @@ class SSDMetaArch(model.DetectionModel):
5) anchors: 2-D float tensor of shape [num_anchors, 4] containing
the generated anchors in normalized coordinates.
"""
with tf.variable_scope(None, self._extract_features_scope,
[preprocessed_inputs]):
feature_maps = self._feature_extractor.extract_features(
batchnorm_updates_collections = (None if self._inplace_batchnorm_update
else tf.GraphKeys.UPDATE_OPS)
with slim.arg_scope([slim.batch_norm],
is_training=(self._is_training and
not self._freeze_batchnorm),
updates_collections=batchnorm_updates_collections):
with tf.variable_scope(None, self._extract_features_scope,
[preprocessed_inputs]):
feature_maps = self._feature_extractor.extract_features(
preprocessed_inputs)
feature_map_spatial_dims = self._get_feature_map_spatial_dims(
feature_maps)
image_shape = shape_utils.combined_static_and_dynamic_shape(
preprocessed_inputs)
feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
image_shape = shape_utils.combined_static_and_dynamic_shape(
preprocessed_inputs)
self._anchors = box_list_ops.concatenate(
self._anchor_generator.generate(
feature_map_spatial_dims,
im_height=image_shape[1],
im_width=image_shape[2]))
prediction_dict = self._box_predictor.predict(
feature_maps, self._anchor_generator.num_anchors_per_location())
box_encodings = tf.squeeze(
tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
class_predictions_with_background = tf.concat(
prediction_dict['class_predictions_with_background'], axis=1)
predictions_dict = {
'preprocessed_inputs': preprocessed_inputs,
'box_encodings': box_encodings,
'class_predictions_with_background': class_predictions_with_background,
'feature_maps': feature_maps,
'anchors': self._anchors.get()
}
return predictions_dict
self._anchors = box_list_ops.concatenate(
self._anchor_generator.generate(
feature_map_spatial_dims,
im_height=image_shape[1],
im_width=image_shape[2]))
prediction_dict = self._box_predictor.predict(
feature_maps, self._anchor_generator.num_anchors_per_location())
box_encodings = tf.squeeze(
tf.concat(prediction_dict['box_encodings'], axis=1), axis=2)
class_predictions_with_background = tf.concat(
prediction_dict['class_predictions_with_background'], axis=1)
predictions_dict = {
'preprocessed_inputs': preprocessed_inputs,
'box_encodings': box_encodings,
'class_predictions_with_background':
class_predictions_with_background,
'feature_maps': feature_maps,
'anchors': self._anchors.get()
}
return predictions_dict
def _get_feature_map_spatial_dims(self, feature_maps):
"""Return list of spatial dimensions for each feature map in a list.
......
......@@ -38,8 +38,7 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
batch_norm_trainable=True,
conv_hyperparams=None)
conv_hyperparams_fn=None)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
......@@ -124,7 +123,8 @@ class SsdMetaArchTest(test_case.TestCase):
non_max_suppression_fn, tf.identity, classification_loss,
localization_loss, classification_loss_weight, localization_loss_weight,
normalize_loss_by_num_matches, hard_example_miner, add_summaries=False,
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize)
normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
freeze_batchnorm=False, inplace_batchnorm_update=False)
return model, num_classes, mock_anchor_generator.num_anchors(), code_size
def test_preprocess_preserves_shapes_with_dynamic_input_image(self):
......
......@@ -49,12 +49,10 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
depth_multiplier,
min_depth,
pad_to_multiple,
conv_hyperparams,
batch_norm_trainable=True,
conv_hyperparams_fn,
reuse_weights=None,
use_explicit_padding=False,
use_depthwise=False,
inplace_batchnorm_update=False):
use_depthwise=False):
"""MobileNetV1 Feature Extractor for Embedded-friendly SSD Models.
Args:
......@@ -63,20 +61,12 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to. For EmbeddedSSD it must be set to 1.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
conv_hyperparams_fn: A function to construct tf slim arg_scope for conv2d
and separable_conv2d ops.
reuse_weights: Whether to reuse variables. Default is None.
use_explicit_padding: Whether to use explicit padding when extracting
features. Default is False.
use_depthwise: Whether to use depthwise convolutions. Default is False.
inplace_batchnorm_update: Whether to update batch_norm inplace during
training. This is required for batch norm to work correctly on TPUs.
When this is false, user must add a control dependency on
tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
norm moving average parameters.
Raises:
ValueError: upon invalid `pad_to_multiple` values.
......@@ -87,10 +77,9 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
super(EmbeddedSSDMobileNetV1FeatureExtractor, self).__init__(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable, reuse_weights,
use_explicit_padding, use_depthwise, inplace_batchnorm_update)
conv_hyperparams_fn, reuse_weights, use_explicit_padding, use_depthwise)
def _extract_features(self, preprocessed_inputs):
def extract_features(self, preprocessed_inputs):
"""Extract features from preprocessed inputs.
Args:
......@@ -130,7 +119,7 @@ class EmbeddedSSDMobileNetV1FeatureExtractor(
'use_depthwise': self._use_depthwise,
}
with slim.arg_scope(self._conv_hyperparams):
with slim.arg_scope(self._conv_hyperparams_fn()):
with slim.arg_scope([slim.batch_norm], fused=False):
with tf.variable_scope('MobilenetV1',
reuse=self._reuse_weights) as scope:
......
......@@ -25,7 +25,7 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
ssd_feature_extractor_test.SsdFeatureExtractorTestBase):
def _create_feature_extractor(self, depth_multiplier, pad_to_multiple,
is_training=True, batch_norm_trainable=True):
is_training=True):
"""Constructs a new feature extractor.
Args:
......@@ -33,18 +33,15 @@ class EmbeddedSSDMobileNetV1FeatureExtractorTest(
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
is_training: whether the network is in training mode.
batch_norm_trainable: whether to update batch norm parameters during
training.
Returns:
an ssd_meta_arch.SSDFeatureExtractor object.
"""
min_depth = 32
conv_hyperparams = {}
return (embedded_ssd_mobilenet_v1_feature_extractor.
EmbeddedSSDMobileNetV1FeatureExtractor(
is_training, depth_multiplier, min_depth, pad_to_multiple,
conv_hyperparams, batch_norm_trainable))
self.conv_hyperparams_fn))
def test_extract_features_returns_correct_shapes_256(self):
image_height = 256
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment