Unverified Commit 02a9969e authored by pkulzc's avatar pkulzc Committed by GitHub
Browse files

Refactor object detection box predictors and fix some issues with model_main. (#4965)

* Merged commit includes the following changes:
206852642  by Zhichao Lu:

    Build the balanced_positive_negative_sampler in the model builder for FasterRCNN. Also adds an option to use the static implementation of the sampler.

--
206803260  by Zhichao Lu:

    Fixes a misplaced argument in resnet fpn feature extractor.

--
206682736  by Zhichao Lu:

    This CL modifies the SSD meta architecture to support both Slim-based and Keras-based box predictors, and begins preparation for Keras box predictor support in the other meta architectures.

    Concretely, this CL adds a new `KerasBoxPredictor` base class and makes the meta architectures appropriately call whichever box predictors they are using.

    We can switch the non-ssd meta architectures to fully support Keras box predictors once the Keras Convolutional Box Predictor CL is submitted.

--
206669634  by Zhichao Lu:

    Adds an alternate m...
parent d135ed9c
...@@ -79,7 +79,7 @@ Extras: ...@@ -79,7 +79,7 @@ Extras:
Run the evaluation for the Open Images Challenge 2018</a><br> Run the evaluation for the Open Images Challenge 2018</a><br>
* <a href='g3doc/tpu_compatibility.md'> * <a href='g3doc/tpu_compatibility.md'>
TPU compatible detection pipelines</a><br> TPU compatible detection pipelines</a><br>
* <a href='g3doc/running_on_mobile_tensorflowlite.md'> * <a href='g3doc/running_on_mobile_tensorflowlite.md'>
Running object detection on mobile devices with TensorFlow Lite</a><br> Running object detection on mobile devices with TensorFlow Lite</a><br>
## Getting Help ## Getting Help
......
...@@ -157,12 +157,10 @@ class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator): ...@@ -157,12 +157,10 @@ class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
correspond to an 8x8 layer followed by a 7x7 layer. correspond to an 8x8 layer followed by a 7x7 layer.
im_height: the height of the image to generate the grid for. If both im_height: the height of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the absolute coordinates, otherwise normalized coordinates are produced.
grid.
im_width: the width of the image to generate the grid for. If both im_width: the width of the image to generate the grid for. If both
im_height and im_width are 1, the generated anchors default to im_height and im_width are 1, the generated anchors default to
normalized coordinates, otherwise absolute coordinates are used for the absolute coordinates, otherwise normalized coordinates are produced.
grid.
Returns: Returns:
boxes_list: a list of BoxLists each holding anchor boxes corresponding to boxes_list: a list of BoxLists each holding anchor boxes corresponding to
......
...@@ -57,14 +57,12 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator): ...@@ -57,14 +57,12 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
self._scales_per_octave = scales_per_octave self._scales_per_octave = scales_per_octave
self._normalize_coordinates = normalize_coordinates self._normalize_coordinates = normalize_coordinates
scales = [2**(float(scale) / scales_per_octave)
for scale in xrange(scales_per_octave)]
aspects = list(aspect_ratios)
for level in range(min_level, max_level + 1): for level in range(min_level, max_level + 1):
anchor_stride = [2**level, 2**level] anchor_stride = [2**level, 2**level]
scales = []
aspects = []
for scale in range(scales_per_octave):
scales.append(2**(float(scale) / scales_per_octave))
for aspect_ratio in aspect_ratios:
aspects.append(aspect_ratio)
base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale] base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale]
self._anchor_grid_info.append({ self._anchor_grid_info.append({
'level': level, 'level': level,
...@@ -84,7 +82,7 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator): ...@@ -84,7 +82,7 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
return len(self._anchor_grid_info) * [ return len(self._anchor_grid_info) * [
len(self._aspect_ratios) * self._scales_per_octave] len(self._aspect_ratios) * self._scales_per_octave]
def _generate(self, feature_map_shape_list, im_height, im_width): def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
"""Generates a collection of bounding boxes to be used as anchors. """Generates a collection of bounding boxes to be used as anchors.
Currently we require the input image shape to be statically defined. That Currently we require the input image shape to be statically defined. That
...@@ -95,14 +93,20 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator): ...@@ -95,14 +93,20 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
format [(height_0, width_0), (height_1, width_1), ...]. For example, format [(height_0, width_0), (height_1, width_1), ...]. For example,
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
correspond to an 8x8 layer followed by a 7x7 layer. correspond to an 8x8 layer followed by a 7x7 layer.
im_height: the height of the image to generate the grid for. im_height: the height of the image to generate the grid for. If both
im_width: the width of the image to generate the grid for. im_height and im_width are 1, anchors can only be generated in
absolute coordinates.
im_width: the width of the image to generate the grid for. If both
im_height and im_width are 1, anchors can only be generated in
absolute coordinates.
Returns: Returns:
boxes_list: a list of BoxLists each holding anchor boxes corresponding to boxes_list: a list of BoxLists each holding anchor boxes corresponding to
the input feature map shapes. the input feature map shapes.
Raises: Raises:
ValueError: if im_height and im_width are not integers. ValueError: if im_height and im_width are not integers.
ValueError: if im_height and im_width are 1, but normalized coordinates
were requested.
""" """
if not isinstance(im_height, int) or not isinstance(im_width, int): if not isinstance(im_height, int) or not isinstance(im_width, int):
raise ValueError('MultiscaleGridAnchorGenerator currently requires ' raise ValueError('MultiscaleGridAnchorGenerator currently requires '
...@@ -118,9 +122,9 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator): ...@@ -118,9 +122,9 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
feat_h = feat_shape[0] feat_h = feat_shape[0]
feat_w = feat_shape[1] feat_w = feat_shape[1]
anchor_offset = [0, 0] anchor_offset = [0, 0]
if im_height % 2.0**level == 0: if im_height % 2.0**level == 0 or im_height == 1:
anchor_offset[0] = stride / 2.0 anchor_offset[0] = stride / 2.0
if im_width % 2.0**level == 0: if im_width % 2.0**level == 0 or im_width == 1:
anchor_offset[1] = stride / 2.0 anchor_offset[1] = stride / 2.0
ag = grid_anchor_generator.GridAnchorGenerator( ag = grid_anchor_generator.GridAnchorGenerator(
scales, scales,
...@@ -131,6 +135,11 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator): ...@@ -131,6 +135,11 @@ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
(anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)]) (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)])
if self._normalize_coordinates: if self._normalize_coordinates:
if im_height == 1 or im_width == 1:
raise ValueError(
'Normalized coordinates were requested upon construction of the '
'MultiscaleGridAnchorGenerator, but a subsequent call to '
'generate did not supply dimension information.')
anchor_grid = box_list_ops.to_normalized_coordinates( anchor_grid = box_list_ops.to_normalized_coordinates(
anchor_grid, im_height, im_width, check_range=False) anchor_grid, im_height, im_width, check_range=False)
anchor_grid_list.append(anchor_grid) anchor_grid_list.append(anchor_grid)
......
...@@ -47,6 +47,40 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase): ...@@ -47,6 +47,40 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
anchor_corners_out = anchor_corners.eval() anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners) self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_single_anchor_unit_dimensions(self):
min_level = 5
max_level = 5
anchor_scale = 1.0
aspect_ratios = [1.0]
scales_per_octave = 1
im_height = 1
im_width = 1
feature_map_shape_list = [(2, 2)]
# Positive offsets are produced.
exp_anchor_corners = [[0, 0, 32, 32],
[0, 32, 32, 64],
[32, 0, 64, 32],
[32, 32, 64, 64]]
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False)
anchors_list = anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width)
anchor_corners = anchors_list[0].get()
with self.test_session():
anchor_corners_out = anchor_corners.eval()
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
def test_construct_normalized_anchors_fails_with_unit_dimensions(self):
anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level=5, max_level=5, anchor_scale=1.0, aspect_ratios=[1.0],
scales_per_octave=1, normalize_coordinates=True)
with self.assertRaisesRegexp(ValueError, 'Normalized coordinates'):
anchor_generator.generate(
feature_map_shape_list=[(2, 2)], im_height=1, im_width=1)
def test_construct_single_anchor_in_normalized_coordinates(self): def test_construct_single_anchor_in_normalized_coordinates(self):
min_level = 5 min_level = 5
max_level = 5 max_level = 5
...@@ -94,7 +128,7 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase): ...@@ -94,7 +128,7 @@ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
anchor_generator = mg.MultiscaleGridAnchorGenerator( anchor_generator = mg.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave, min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates=False) normalize_coordinates=False)
with self.assertRaises(ValueError): with self.assertRaisesRegexp(ValueError, 'statically defined'):
anchor_generator.generate( anchor_generator.generate(
feature_map_shape_list, im_height=im_height, im_width=im_width) feature_map_shape_list, im_height=im_height, im_width=im_width)
......
...@@ -15,7 +15,12 @@ ...@@ -15,7 +15,12 @@
"""Function to build box predictor from configuration.""" """Function to build box predictor from configuration."""
from object_detection.core import box_predictor from object_detection.predictors import convolutional_box_predictor
from object_detection.predictors import mask_rcnn_box_predictor
from object_detection.predictors import rfcn_box_predictor
from object_detection.predictors.mask_rcnn_heads import box_head
from object_detection.predictors.mask_rcnn_heads import class_head
from object_detection.predictors.mask_rcnn_heads import mask_head
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
...@@ -48,92 +53,112 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes): ...@@ -48,92 +53,112 @@ def build(argscope_fn, box_predictor_config, is_training, num_classes):
box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof') box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')
if box_predictor_oneof == 'convolutional_box_predictor': if box_predictor_oneof == 'convolutional_box_predictor':
conv_box_predictor = box_predictor_config.convolutional_box_predictor config_box_predictor = box_predictor_config.convolutional_box_predictor
conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams, conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams,
is_training) is_training)
box_predictor_object = box_predictor.ConvolutionalBoxPredictor( box_predictor_object = (
is_training=is_training, convolutional_box_predictor.ConvolutionalBoxPredictor(
num_classes=num_classes, is_training=is_training,
conv_hyperparams_fn=conv_hyperparams_fn, num_classes=num_classes,
min_depth=conv_box_predictor.min_depth, conv_hyperparams_fn=conv_hyperparams_fn,
max_depth=conv_box_predictor.max_depth, min_depth=config_box_predictor.min_depth,
num_layers_before_predictor=(conv_box_predictor. max_depth=config_box_predictor.max_depth,
num_layers_before_predictor), num_layers_before_predictor=(
use_dropout=conv_box_predictor.use_dropout, config_box_predictor.num_layers_before_predictor),
dropout_keep_prob=conv_box_predictor.dropout_keep_probability, use_dropout=config_box_predictor.use_dropout,
kernel_size=conv_box_predictor.kernel_size, dropout_keep_prob=config_box_predictor.dropout_keep_probability,
box_code_size=conv_box_predictor.box_code_size, kernel_size=config_box_predictor.kernel_size,
apply_sigmoid_to_scores=conv_box_predictor.apply_sigmoid_to_scores, box_code_size=config_box_predictor.box_code_size,
class_prediction_bias_init=(conv_box_predictor. apply_sigmoid_to_scores=config_box_predictor.
class_prediction_bias_init), apply_sigmoid_to_scores,
use_depthwise=conv_box_predictor.use_depthwise class_prediction_bias_init=(
) config_box_predictor.class_prediction_bias_init),
use_depthwise=config_box_predictor.use_depthwise))
return box_predictor_object return box_predictor_object
if box_predictor_oneof == 'weight_shared_convolutional_box_predictor': if box_predictor_oneof == 'weight_shared_convolutional_box_predictor':
conv_box_predictor = (box_predictor_config. config_box_predictor = (
weight_shared_convolutional_box_predictor) box_predictor_config.weight_shared_convolutional_box_predictor)
conv_hyperparams_fn = argscope_fn(conv_box_predictor.conv_hyperparams, conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams,
is_training) is_training)
box_predictor_object = box_predictor.WeightSharedConvolutionalBoxPredictor( apply_batch_norm = config_box_predictor.conv_hyperparams.HasField(
is_training=is_training, 'batch_norm')
num_classes=num_classes, box_predictor_object = (
conv_hyperparams_fn=conv_hyperparams_fn, convolutional_box_predictor.WeightSharedConvolutionalBoxPredictor(
depth=conv_box_predictor.depth, is_training=is_training,
num_layers_before_predictor=( num_classes=num_classes,
conv_box_predictor.num_layers_before_predictor), conv_hyperparams_fn=conv_hyperparams_fn,
kernel_size=conv_box_predictor.kernel_size, depth=config_box_predictor.depth,
box_code_size=conv_box_predictor.box_code_size, num_layers_before_predictor=(
class_prediction_bias_init=conv_box_predictor. config_box_predictor.num_layers_before_predictor),
class_prediction_bias_init, kernel_size=config_box_predictor.kernel_size,
use_dropout=conv_box_predictor.use_dropout, box_code_size=config_box_predictor.box_code_size,
dropout_keep_prob=conv_box_predictor.dropout_keep_probability, class_prediction_bias_init=config_box_predictor.
share_prediction_tower=conv_box_predictor.share_prediction_tower) class_prediction_bias_init,
use_dropout=config_box_predictor.use_dropout,
dropout_keep_prob=config_box_predictor.dropout_keep_probability,
share_prediction_tower=config_box_predictor.share_prediction_tower,
apply_batch_norm=apply_batch_norm))
return box_predictor_object return box_predictor_object
if box_predictor_oneof == 'mask_rcnn_box_predictor': if box_predictor_oneof == 'mask_rcnn_box_predictor':
mask_rcnn_box_predictor = box_predictor_config.mask_rcnn_box_predictor config_box_predictor = box_predictor_config.mask_rcnn_box_predictor
fc_hyperparams_fn = argscope_fn(mask_rcnn_box_predictor.fc_hyperparams, fc_hyperparams_fn = argscope_fn(config_box_predictor.fc_hyperparams,
is_training) is_training)
conv_hyperparams_fn = None conv_hyperparams_fn = None
if mask_rcnn_box_predictor.HasField('conv_hyperparams'): if config_box_predictor.HasField('conv_hyperparams'):
conv_hyperparams_fn = argscope_fn( conv_hyperparams_fn = argscope_fn(
mask_rcnn_box_predictor.conv_hyperparams, is_training) config_box_predictor.conv_hyperparams, is_training)
box_predictor_object = box_predictor.MaskRCNNBoxPredictor( box_prediction_head = box_head.BoxHead(
is_training=is_training, is_training=is_training,
num_classes=num_classes, num_classes=num_classes,
fc_hyperparams_fn=fc_hyperparams_fn, fc_hyperparams_fn=fc_hyperparams_fn,
use_dropout=mask_rcnn_box_predictor.use_dropout, use_dropout=config_box_predictor.use_dropout,
dropout_keep_prob=mask_rcnn_box_predictor.dropout_keep_probability, dropout_keep_prob=config_box_predictor.dropout_keep_probability,
box_code_size=mask_rcnn_box_predictor.box_code_size, box_code_size=config_box_predictor.box_code_size,
conv_hyperparams_fn=conv_hyperparams_fn,
predict_instance_masks=mask_rcnn_box_predictor.predict_instance_masks,
mask_height=mask_rcnn_box_predictor.mask_height,
mask_width=mask_rcnn_box_predictor.mask_width,
mask_prediction_num_conv_layers=(
mask_rcnn_box_predictor.mask_prediction_num_conv_layers),
mask_prediction_conv_depth=(
mask_rcnn_box_predictor.mask_prediction_conv_depth),
masks_are_class_agnostic=(
mask_rcnn_box_predictor.masks_are_class_agnostic),
predict_keypoints=mask_rcnn_box_predictor.predict_keypoints,
share_box_across_classes=( share_box_across_classes=(
mask_rcnn_box_predictor.share_box_across_classes)) config_box_predictor.share_box_across_classes))
class_prediction_head = class_head.ClassHead(
is_training=is_training,
num_classes=num_classes,
fc_hyperparams_fn=fc_hyperparams_fn,
use_dropout=config_box_predictor.use_dropout,
dropout_keep_prob=config_box_predictor.dropout_keep_probability)
third_stage_heads = {}
if config_box_predictor.predict_instance_masks:
third_stage_heads[
mask_rcnn_box_predictor.MASK_PREDICTIONS] = mask_head.MaskHead(
num_classes=num_classes,
conv_hyperparams_fn=conv_hyperparams_fn,
mask_height=config_box_predictor.mask_height,
mask_width=config_box_predictor.mask_width,
mask_prediction_num_conv_layers=(
config_box_predictor.mask_prediction_num_conv_layers),
mask_prediction_conv_depth=(
config_box_predictor.mask_prediction_conv_depth),
masks_are_class_agnostic=(
config_box_predictor.masks_are_class_agnostic))
box_predictor_object = mask_rcnn_box_predictor.MaskRCNNBoxPredictor(
is_training=is_training,
num_classes=num_classes,
box_prediction_head=box_prediction_head,
class_prediction_head=class_prediction_head,
third_stage_heads=third_stage_heads)
return box_predictor_object return box_predictor_object
if box_predictor_oneof == 'rfcn_box_predictor': if box_predictor_oneof == 'rfcn_box_predictor':
rfcn_box_predictor = box_predictor_config.rfcn_box_predictor config_box_predictor = box_predictor_config.rfcn_box_predictor
conv_hyperparams_fn = argscope_fn(rfcn_box_predictor.conv_hyperparams, conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams,
is_training) is_training)
box_predictor_object = box_predictor.RfcnBoxPredictor( box_predictor_object = rfcn_box_predictor.RfcnBoxPredictor(
is_training=is_training, is_training=is_training,
num_classes=num_classes, num_classes=num_classes,
conv_hyperparams_fn=conv_hyperparams_fn, conv_hyperparams_fn=conv_hyperparams_fn,
crop_size=[rfcn_box_predictor.crop_height, crop_size=[config_box_predictor.crop_height,
rfcn_box_predictor.crop_width], config_box_predictor.crop_width],
num_spatial_bins=[rfcn_box_predictor.num_spatial_bins_height, num_spatial_bins=[config_box_predictor.num_spatial_bins_height,
rfcn_box_predictor.num_spatial_bins_width], config_box_predictor.num_spatial_bins_width],
depth=rfcn_box_predictor.depth, depth=config_box_predictor.depth,
box_code_size=rfcn_box_predictor.box_code_size) box_code_size=config_box_predictor.box_code_size)
return box_predictor_object return box_predictor_object
raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof)) raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof))
...@@ -20,6 +20,7 @@ import tensorflow as tf ...@@ -20,6 +20,7 @@ import tensorflow as tf
from google.protobuf import text_format from google.protobuf import text_format
from object_detection.builders import box_predictor_builder from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder from object_detection.builders import hyperparams_builder
from object_detection.predictors import mask_rcnn_box_predictor
from object_detection.protos import box_predictor_pb2 from object_detection.protos import box_predictor_pb2
from object_detection.protos import hyperparams_pb2 from object_detection.protos import hyperparams_pb2
...@@ -239,6 +240,7 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -239,6 +240,7 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0) self.assertAlmostEqual(box_predictor._class_prediction_bias_init, 4.0)
self.assertEqual(box_predictor.num_classes, 10) self.assertEqual(box_predictor.num_classes, 10)
self.assertFalse(box_predictor._is_training) self.assertFalse(box_predictor._is_training)
self.assertEqual(box_predictor._apply_batch_norm, False)
def test_construct_default_conv_box_predictor(self): def test_construct_default_conv_box_predictor(self):
box_predictor_text_proto = """ box_predictor_text_proto = """
...@@ -265,6 +267,37 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -265,6 +267,37 @@ class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
self.assertEqual(box_predictor._num_layers_before_predictor, 0) self.assertEqual(box_predictor._num_layers_before_predictor, 0)
self.assertEqual(box_predictor.num_classes, 90) self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training) self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._apply_batch_norm, False)
def test_construct_default_conv_box_predictor_with_batch_norm(self):
box_predictor_text_proto = """
weight_shared_convolutional_box_predictor {
conv_hyperparams {
regularizer {
l1_regularizer {
}
}
batch_norm {
train: true
}
initializer {
truncated_normal_initializer {
}
}
}
}"""
box_predictor_proto = box_predictor_pb2.BoxPredictor()
text_format.Merge(box_predictor_text_proto, box_predictor_proto)
box_predictor = box_predictor_builder.build(
argscope_fn=hyperparams_builder.build,
box_predictor_config=box_predictor_proto,
is_training=True,
num_classes=90)
self.assertEqual(box_predictor._depth, 0)
self.assertEqual(box_predictor._num_layers_before_predictor, 0)
self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._apply_batch_norm, True)
class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
...@@ -297,7 +330,10 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -297,7 +330,10 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
is_training=False, is_training=False,
num_classes=10) num_classes=10)
mock_argscope_fn.assert_called_with(hyperparams_proto, False) mock_argscope_fn.assert_called_with(hyperparams_proto, False)
self.assertEqual(box_predictor._fc_hyperparams_fn, 'arg_scope') self.assertEqual(box_predictor._box_prediction_head._fc_hyperparams_fn,
'arg_scope')
self.assertEqual(box_predictor._class_prediction_head._fc_hyperparams_fn,
'arg_scope')
def test_non_default_mask_rcnn_box_predictor(self): def test_non_default_mask_rcnn_box_predictor(self):
fc_hyperparams_text_proto = """ fc_hyperparams_text_proto = """
...@@ -334,12 +370,16 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -334,12 +370,16 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
box_predictor_config=box_predictor_proto, box_predictor_config=box_predictor_proto,
is_training=True, is_training=True,
num_classes=90) num_classes=90)
self.assertTrue(box_predictor._use_dropout) box_head = box_predictor._box_prediction_head
self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.8) class_head = box_predictor._class_prediction_head
self.assertTrue(box_head._use_dropout)
self.assertTrue(class_head._use_dropout)
self.assertAlmostEqual(box_head._dropout_keep_prob, 0.8)
self.assertAlmostEqual(class_head._dropout_keep_prob, 0.8)
self.assertEqual(box_predictor.num_classes, 90) self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training) self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 3) self.assertEqual(box_head._box_code_size, 3)
self.assertEqual(box_predictor._share_box_across_classes, True) self.assertEqual(box_head._share_box_across_classes, True)
def test_build_default_mask_rcnn_box_predictor(self): def test_build_default_mask_rcnn_box_predictor(self):
box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto = box_predictor_pb2.BoxPredictor()
...@@ -350,13 +390,15 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -350,13 +390,15 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
box_predictor_config=box_predictor_proto, box_predictor_config=box_predictor_proto,
is_training=True, is_training=True,
num_classes=90) num_classes=90)
self.assertFalse(box_predictor._use_dropout) box_head = box_predictor._box_prediction_head
self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5) class_head = box_predictor._class_prediction_head
self.assertFalse(box_head._use_dropout)
self.assertFalse(class_head._use_dropout)
self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5)
self.assertEqual(box_predictor.num_classes, 90) self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training) self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 4) self.assertEqual(box_head._box_code_size, 4)
self.assertFalse(box_predictor._predict_instance_masks) self.assertEqual(len(box_predictor._third_stage_heads.keys()), 0)
self.assertFalse(box_predictor._predict_keypoints)
def test_build_box_predictor_with_mask_branch(self): def test_build_box_predictor_with_mask_branch(self):
box_predictor_proto = box_predictor_pb2.BoxPredictor() box_predictor_proto = box_predictor_pb2.BoxPredictor()
...@@ -379,14 +421,21 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase): ...@@ -379,14 +421,21 @@ class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
True), True),
mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams, mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams,
True)], any_order=True) True)], any_order=True)
self.assertFalse(box_predictor._use_dropout) box_head = box_predictor._box_prediction_head
self.assertAlmostEqual(box_predictor._dropout_keep_prob, 0.5) class_head = box_predictor._class_prediction_head
third_stage_heads = box_predictor._third_stage_heads
self.assertFalse(box_head._use_dropout)
self.assertFalse(class_head._use_dropout)
self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5)
self.assertAlmostEqual(class_head._dropout_keep_prob, 0.5)
self.assertEqual(box_predictor.num_classes, 90) self.assertEqual(box_predictor.num_classes, 90)
self.assertTrue(box_predictor._is_training) self.assertTrue(box_predictor._is_training)
self.assertEqual(box_predictor._box_code_size, 4) self.assertEqual(box_head._box_code_size, 4)
self.assertTrue(box_predictor._predict_instance_masks) self.assertTrue(
self.assertEqual(box_predictor._mask_prediction_conv_depth, 512) mask_rcnn_box_predictor.MASK_PREDICTIONS in third_stage_heads)
self.assertFalse(box_predictor._predict_keypoints) self.assertEqual(
third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS]
._mask_prediction_conv_depth, 512)
class RfcnBoxPredictorBuilderTest(tf.test.TestCase): class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
......
...@@ -22,6 +22,95 @@ from object_detection.utils import context_manager ...@@ -22,6 +22,95 @@ from object_detection.utils import context_manager
slim = tf.contrib.slim slim = tf.contrib.slim
class KerasLayerHyperparams(object):
"""
A hyperparameter configuration object for Keras layers used in
Object Detection models.
"""
def __init__(self, hyperparams_config):
"""Builds keras hyperparameter config for layers based on the proto config.
It automatically converts from Slim layer hyperparameter configs to
Keras layer hyperparameters. Namely, it:
- Builds Keras initializers/regularizers instead of Slim ones
- sets weights_regularizer/initializer to kernel_regularizer/initializer
- converts batchnorm decay to momentum
- converts Slim l2 regularizer weights to the equivalent Keras l2 weights
Contains a hyperparameter configuration for ops that specifies kernel
initializer, kernel regularizer, activation. Also contains parameters for
batch norm operators based on the configuration.
Note that if the batch_norm parameters are not specified in the config
(i.e. left to default) then batch norm is excluded from the config.
Args:
hyperparams_config: hyperparams.proto object containing
hyperparameters.
Raises:
ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
"""
if not isinstance(hyperparams_config,
hyperparams_pb2.Hyperparams):
raise ValueError('hyperparams_config not of type '
'hyperparams_pb.Hyperparams.')
self._batch_norm_params = None
if hyperparams_config.HasField('batch_norm'):
self._batch_norm_params = _build_keras_batch_norm_params(
hyperparams_config.batch_norm)
self._op_params = {
'kernel_regularizer': _build_keras_regularizer(
hyperparams_config.regularizer),
'kernel_initializer': _build_initializer(
hyperparams_config.initializer, build_for_keras=True),
'activation': _build_activation_fn(hyperparams_config.activation)
}
def use_batch_norm(self):
return self._batch_norm_params is not None
def batch_norm_params(self, **overrides):
"""Returns a dict containing batchnorm layer construction hyperparameters.
Optionally overrides values in the batchnorm hyperparam dict. Overrides
only apply to individual calls of this method, and do not affect
future calls.
Args:
**overrides: keyword arguments to override in the hyperparams dictionary
Returns: dict containing the layer construction keyword arguments, with
values overridden by the `overrides` keyword arguments.
"""
if self._batch_norm_params is None:
new_batch_norm_params = dict()
else:
new_batch_norm_params = self._batch_norm_params.copy()
new_batch_norm_params.update(overrides)
return new_batch_norm_params
def params(self, **overrides):
"""Returns a dict containing the layer construction hyperparameters to use.
Optionally overrides values in the returned dict. Overrides
only apply to individual calls of this method, and do not affect
future calls.
Args:
**overrides: keyword arguments to override in the hyperparams dictionary.
Returns: dict containing the layer construction keyword arguments, with
values overridden by the `overrides` keyword arguments.
"""
new_params = self._op_params.copy()
new_params.update(**overrides)
return new_params
def build(hyperparams_config, is_training): def build(hyperparams_config, is_training):
"""Builds tf-slim arg_scope for convolution ops based on the config. """Builds tf-slim arg_scope for convolution ops based on the config.
...@@ -72,7 +161,7 @@ def build(hyperparams_config, is_training): ...@@ -72,7 +161,7 @@ def build(hyperparams_config, is_training):
context_manager.IdentityContextManager()): context_manager.IdentityContextManager()):
with slim.arg_scope( with slim.arg_scope(
affected_ops, affected_ops,
weights_regularizer=_build_regularizer( weights_regularizer=_build_slim_regularizer(
hyperparams_config.regularizer), hyperparams_config.regularizer),
weights_initializer=_build_initializer( weights_initializer=_build_initializer(
hyperparams_config.initializer), hyperparams_config.initializer),
...@@ -104,7 +193,7 @@ def _build_activation_fn(activation_fn): ...@@ -104,7 +193,7 @@ def _build_activation_fn(activation_fn):
raise ValueError('Unknown activation function: {}'.format(activation_fn)) raise ValueError('Unknown activation function: {}'.format(activation_fn))
def _build_regularizer(regularizer): def _build_slim_regularizer(regularizer):
"""Builds a tf-slim regularizer from config. """Builds a tf-slim regularizer from config.
Args: Args:
...@@ -124,11 +213,36 @@ def _build_regularizer(regularizer): ...@@ -124,11 +213,36 @@ def _build_regularizer(regularizer):
raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof)) raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
def _build_initializer(initializer): def _build_keras_regularizer(regularizer):
"""Builds a keras regularizer from config.
Args:
regularizer: hyperparams_pb2.Hyperparams.regularizer proto.
Returns:
Keras regularizer.
Raises:
ValueError: On unknown regularizer.
"""
regularizer_oneof = regularizer.WhichOneof('regularizer_oneof')
if regularizer_oneof == 'l1_regularizer':
return tf.keras.regularizers.l1(float(regularizer.l1_regularizer.weight))
if regularizer_oneof == 'l2_regularizer':
# The Keras L2 regularizer weight differs from the Slim L2 regularizer
# weight by a factor of 2
return tf.keras.regularizers.l2(
float(regularizer.l2_regularizer.weight * 0.5))
raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
def _build_initializer(initializer, build_for_keras=False):
"""Build a tf initializer from config. """Build a tf initializer from config.
Args: Args:
initializer: hyperparams_pb2.Hyperparams.regularizer proto. initializer: hyperparams_pb2.Hyperparams.regularizer proto.
build_for_keras: Whether the initializers should be built for Keras
operators. If false builds for Slim.
Returns: Returns:
tf initializer. tf initializer.
...@@ -151,10 +265,42 @@ def _build_initializer(initializer): ...@@ -151,10 +265,42 @@ def _build_initializer(initializer):
mode = enum_descriptor.values_by_number[initializer. mode = enum_descriptor.values_by_number[initializer.
variance_scaling_initializer. variance_scaling_initializer.
mode].name mode].name
return slim.variance_scaling_initializer( if build_for_keras:
factor=initializer.variance_scaling_initializer.factor, if initializer.variance_scaling_initializer.uniform:
mode=mode, return tf.variance_scaling_initializer(
uniform=initializer.variance_scaling_initializer.uniform) scale=initializer.variance_scaling_initializer.factor,
mode=mode.lower(),
distribution='uniform')
else:
# In TF 1.9 release and earlier, the truncated_normal distribution was
# not supported correctly. So, in these earlier versions of tensorflow,
# the ValueError will be raised, and we manually truncate the
# distribution scale.
#
# It is insufficient to just set distribution to `normal` from the
# start, because the `normal` distribution in newer Tensorflow versions
# creates a truncated distribution, whereas it created untruncated
# distributions in older versions.
try:
return tf.variance_scaling_initializer(
scale=initializer.variance_scaling_initializer.factor,
mode=mode.lower(),
distribution='truncated_normal')
except ValueError:
truncate_constant = 0.87962566103423978
truncated_scale = initializer.variance_scaling_initializer.factor / (
truncate_constant * truncate_constant
)
return tf.variance_scaling_initializer(
scale=truncated_scale,
mode=mode.lower(),
distribution='normal')
else:
return slim.variance_scaling_initializer(
factor=initializer.variance_scaling_initializer.factor,
mode=mode,
uniform=initializer.variance_scaling_initializer.uniform)
raise ValueError('Unknown initializer function: {}'.format( raise ValueError('Unknown initializer function: {}'.format(
initializer_oneof)) initializer_oneof))
...@@ -180,3 +326,25 @@ def _build_batch_norm_params(batch_norm, is_training): ...@@ -180,3 +326,25 @@ def _build_batch_norm_params(batch_norm, is_training):
'is_training': is_training and batch_norm.train, 'is_training': is_training and batch_norm.train,
} }
return batch_norm_params return batch_norm_params
def _build_keras_batch_norm_params(batch_norm):
"""Build a dictionary of Keras BatchNormalization params from config.
Args:
batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto.
Returns:
A dictionary containing Keras BatchNormalization parameters.
"""
# Note: Although decay is defined to be 1 - momentum in batch_norm,
# decay in the slim batch_norm layers was erroneously defined and is
# actually the same as momentum in the Keras batch_norm layers.
# For context, see: github.com/keras-team/keras/issues/6839
batch_norm_params = {
'momentum': batch_norm.decay,
'center': batch_norm.center,
'scale': batch_norm.scale,
'epsilon': batch_norm.epsilon,
}
return batch_norm_params
...@@ -149,6 +149,29 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -149,6 +149,29 @@ class HyperparamsBuilderTest(tf.test.TestCase):
result = sess.run(regularizer(tf.constant(weights))) result = sess.run(regularizer(tf.constant(weights)))
self.assertAllClose(np.abs(weights).sum() * 0.5, result) self.assertAllClose(np.abs(weights).sum() * 0.5, result)
def test_return_l1_regularized_weights_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l1_regularizer {
weight: 0.5
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
regularizer = keras_config.params()['kernel_regularizer']
weights = np.array([1., -1, 4., 2.])
with self.test_session() as sess:
result = sess.run(regularizer(tf.constant(weights)))
self.assertAllClose(np.abs(weights).sum() * 0.5, result)
def test_return_l2_regularizer_weights(self): def test_return_l2_regularizer_weights(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -174,6 +197,29 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -174,6 +197,29 @@ class HyperparamsBuilderTest(tf.test.TestCase):
result = sess.run(regularizer(tf.constant(weights))) result = sess.run(regularizer(tf.constant(weights)))
self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result) self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
def test_return_l2_regularizer_weights_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
weight: 0.42
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
regularizer = keras_config.params()['kernel_regularizer']
weights = np.array([1., -1, 4., 2.])
with self.test_session() as sess:
result = sess.run(regularizer(tf.constant(weights)))
self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
def test_return_non_default_batch_norm_params_with_train_during_train(self): def test_return_non_default_batch_norm_params_with_train_during_train(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -206,6 +252,66 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -206,6 +252,66 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self.assertTrue(batch_norm_params['scale']) self.assertTrue(batch_norm_params['scale'])
self.assertTrue(batch_norm_params['is_training']) self.assertTrue(batch_norm_params['is_training'])
def test_return_non_default_batch_norm_params_keras(
self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
decay: 0.7
center: false
scale: true
epsilon: 0.03
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
self.assertTrue(keras_config.use_batch_norm())
batch_norm_params = keras_config.batch_norm_params()
self.assertAlmostEqual(batch_norm_params['momentum'], 0.7)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
self.assertTrue(batch_norm_params['scale'])
def test_return_non_default_batch_norm_params_keras_override(
self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
batch_norm {
decay: 0.7
center: false
scale: true
epsilon: 0.03
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
self.assertTrue(keras_config.use_batch_norm())
batch_norm_params = keras_config.batch_norm_params(momentum=0.4)
self.assertAlmostEqual(batch_norm_params['momentum'], 0.4)
self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
self.assertFalse(batch_norm_params['center'])
self.assertTrue(batch_norm_params['scale'])
def test_return_batch_norm_params_with_notrain_during_eval(self): def test_return_batch_norm_params_with_notrain_during_eval(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -289,6 +395,24 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -289,6 +395,24 @@ class HyperparamsBuilderTest(tf.test.TestCase):
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['normalizer_fn'], None) self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
def test_do_not_use_batch_norm_if_default_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
self.assertFalse(keras_config.use_batch_norm())
self.assertEqual(keras_config.batch_norm_params(), {})
def test_use_none_activation(self): def test_use_none_activation(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -309,6 +433,24 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -309,6 +433,24 @@ class HyperparamsBuilderTest(tf.test.TestCase):
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], None) self.assertEqual(conv_scope_arguments['activation_fn'], None)
def test_use_none_activation_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: NONE
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
self.assertEqual(keras_config.params()['activation'], None)
def test_use_relu_activation(self): def test_use_relu_activation(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -329,6 +471,24 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -329,6 +471,24 @@ class HyperparamsBuilderTest(tf.test.TestCase):
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu) self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
def test_use_relu_activation_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: RELU
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
self.assertEqual(keras_config.params()['activation'], tf.nn.relu)
def test_use_relu_6_activation(self): def test_use_relu_6_activation(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -349,6 +509,43 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -349,6 +509,43 @@ class HyperparamsBuilderTest(tf.test.TestCase):
conv_scope_arguments = scope[_get_scope_key(slim.conv2d)] conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6) self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
def test_use_relu_6_activation_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: RELU_6
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
self.assertEqual(keras_config.params()['activation'], tf.nn.relu6)
def test_override_activation_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
activation: RELU_6
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
new_params = keras_config.params(activation=tf.nn.relu)
self.assertEqual(new_params['activation'], tf.nn.relu)
def _assert_variance_in_range(self, initializer, shape, variance, def _assert_variance_in_range(self, initializer, shape, variance,
tol=1e-2): tol=1e-2):
with tf.Graph().as_default() as g: with tf.Graph().as_default() as g:
...@@ -386,6 +583,29 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -386,6 +583,29 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40], self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.) variance=2. / 100.)
def test_variance_in_range_with_variance_scaling_initializer_fan_in_keras(
self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
variance_scaling_initializer {
factor: 2.0
mode: FAN_IN
uniform: false
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
initializer = keras_config.params()['kernel_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
def test_variance_in_range_with_variance_scaling_initializer_fan_out(self): def test_variance_in_range_with_variance_scaling_initializer_fan_out(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -410,6 +630,29 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -410,6 +630,29 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40], self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 40.) variance=2. / 40.)
def test_variance_in_range_with_variance_scaling_initializer_fan_out_keras(
self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
variance_scaling_initializer {
factor: 2.0
mode: FAN_OUT
uniform: false
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
initializer = keras_config.params()['kernel_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 40.)
def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self): def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -434,6 +677,29 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -434,6 +677,29 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40], self._assert_variance_in_range(initializer, shape=[100, 40],
variance=4. / (100. + 40.)) variance=4. / (100. + 40.))
def test_variance_in_range_with_variance_scaling_initializer_fan_avg_keras(
self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
variance_scaling_initializer {
factor: 2.0
mode: FAN_AVG
uniform: false
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
initializer = keras_config.params()['kernel_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=4. / (100. + 40.))
def test_variance_in_range_with_variance_scaling_initializer_uniform(self): def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -458,6 +724,29 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -458,6 +724,29 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40], self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.) variance=2. / 100.)
def test_variance_in_range_with_variance_scaling_initializer_uniform_keras(
self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
variance_scaling_initializer {
factor: 2.0
mode: FAN_IN
uniform: true
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
initializer = keras_config.params()['kernel_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=2. / 100.)
def test_variance_in_range_with_truncated_normal_initializer(self): def test_variance_in_range_with_truncated_normal_initializer(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -481,6 +770,27 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -481,6 +770,27 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40], self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.49, tol=1e-1) variance=0.49, tol=1e-1)
def test_variance_in_range_with_truncated_normal_initializer_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.8
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
initializer = keras_config.params()['kernel_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.49, tol=1e-1)
def test_variance_in_range_with_random_normal_initializer(self): def test_variance_in_range_with_random_normal_initializer(self):
conv_hyperparams_text_proto = """ conv_hyperparams_text_proto = """
regularizer { regularizer {
...@@ -504,6 +814,27 @@ class HyperparamsBuilderTest(tf.test.TestCase): ...@@ -504,6 +814,27 @@ class HyperparamsBuilderTest(tf.test.TestCase):
self._assert_variance_in_range(initializer, shape=[100, 40], self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.64, tol=1e-1) variance=0.64, tol=1e-1)
def test_variance_in_range_with_random_normal_initializer_keras(self):
conv_hyperparams_text_proto = """
regularizer {
l2_regularizer {
}
}
initializer {
random_normal_initializer {
mean: 0.0
stddev: 0.8
}
}
"""
conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
keras_config = hyperparams_builder.KerasLayerHyperparams(
conv_hyperparams_proto)
initializer = keras_config.params()['kernel_initializer']
self._assert_variance_in_range(initializer, shape=[100, 40],
variance=0.64, tol=1e-1)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -46,6 +46,20 @@ class ImageResizerBuilderTest(tf.test.TestCase): ...@@ -46,6 +46,20 @@ class ImageResizerBuilderTest(tf.test.TestCase):
input_shape, image_resizer_text_proto) input_shape, image_resizer_text_proto)
self.assertEqual(output_shape, expected_output_shape) self.assertEqual(output_shape, expected_output_shape)
def test_build_keep_aspect_ratio_resizer_grayscale(self):
image_resizer_text_proto = """
keep_aspect_ratio_resizer {
min_dimension: 10
max_dimension: 20
convert_to_grayscale: true
}
"""
input_shape = (50, 25, 3)
expected_output_shape = (20, 10, 1)
output_shape = self._shape_of_resized_random_image_given_text_proto(
input_shape, image_resizer_text_proto)
self.assertEqual(output_shape, expected_output_shape)
def test_build_keep_aspect_ratio_resizer_with_padding(self): def test_build_keep_aspect_ratio_resizer_with_padding(self):
image_resizer_text_proto = """ image_resizer_text_proto = """
keep_aspect_ratio_resizer { keep_aspect_ratio_resizer {
...@@ -76,6 +90,20 @@ class ImageResizerBuilderTest(tf.test.TestCase): ...@@ -76,6 +90,20 @@ class ImageResizerBuilderTest(tf.test.TestCase):
input_shape, image_resizer_text_proto) input_shape, image_resizer_text_proto)
self.assertEqual(output_shape, expected_output_shape) self.assertEqual(output_shape, expected_output_shape)
def test_built_fixed_shape_resizer_grayscale(self):
image_resizer_text_proto = """
fixed_shape_resizer {
height: 10
width: 20
convert_to_grayscale: true
}
"""
input_shape = (50, 25, 3)
expected_output_shape = (10, 20, 1)
output_shape = self._shape_of_resized_random_image_given_text_proto(
input_shape, image_resizer_text_proto)
self.assertEqual(output_shape, expected_output_shape)
def test_raises_error_on_invalid_input(self): def test_raises_error_on_invalid_input(self):
invalid_input = 'invalid_input' invalid_input = 'invalid_input'
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
......
...@@ -23,7 +23,8 @@ from object_detection.builders import losses_builder ...@@ -23,7 +23,8 @@ from object_detection.builders import losses_builder
from object_detection.builders import matcher_builder from object_detection.builders import matcher_builder
from object_detection.builders import post_processing_builder from object_detection.builders import post_processing_builder
from object_detection.builders import region_similarity_calculator_builder as sim_calc from object_detection.builders import region_similarity_calculator_builder as sim_calc
from object_detection.core import box_predictor from object_detection.core import balanced_positive_negative_sampler as sampler
from object_detection.core import target_assigner
from object_detection.meta_architectures import faster_rcnn_meta_arch from object_detection.meta_architectures import faster_rcnn_meta_arch
from object_detection.meta_architectures import rfcn_meta_arch from object_detection.meta_architectures import rfcn_meta_arch
from object_detection.meta_architectures import ssd_meta_arch from object_detection.meta_architectures import ssd_meta_arch
...@@ -41,6 +42,7 @@ from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobile ...@@ -41,6 +42,7 @@ from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobile
from object_detection.models.ssd_mobilenet_v1_fpn_feature_extractor import SSDMobileNetV1FpnFeatureExtractor from object_detection.models.ssd_mobilenet_v1_fpn_feature_extractor import SSDMobileNetV1FpnFeatureExtractor
from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMobileNetV1PpnFeatureExtractor from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMobileNetV1PpnFeatureExtractor
from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
from object_detection.predictors import rfcn_box_predictor
from object_detection.protos import model_pb2 from object_detection.protos import model_pb2
# A map of names to SSD feature extractors. # A map of names to SSD feature extractors.
...@@ -142,10 +144,34 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training, ...@@ -142,10 +144,34 @@ def _build_ssd_feature_extractor(feature_extractor_config, is_training,
raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type)) raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type] feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
return feature_extractor_class( kwargs = {
is_training, depth_multiplier, min_depth, pad_to_multiple, 'is_training':
conv_hyperparams, reuse_weights, use_explicit_padding, use_depthwise, is_training,
override_base_feature_extractor_hyperparams) 'depth_multiplier':
depth_multiplier,
'min_depth':
min_depth,
'pad_to_multiple':
pad_to_multiple,
'conv_hyperparams_fn':
conv_hyperparams,
'reuse_weights':
reuse_weights,
'use_explicit_padding':
use_explicit_padding,
'use_depthwise':
use_depthwise,
'override_base_feature_extractor_hyperparams':
override_base_feature_extractor_hyperparams
}
if feature_extractor_config.HasField('fpn'):
kwargs.update({
'fpn_min_level': feature_extractor_config.fpn.min_level,
'fpn_max_level': feature_extractor_config.fpn.max_level,
})
return feature_extractor_class(**kwargs)
def _build_ssd_model(ssd_config, is_training, add_summaries, def _build_ssd_model(ssd_config, is_training, add_summaries,
...@@ -291,6 +317,10 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -291,6 +317,10 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
first_stage_anchor_generator = anchor_generator_builder.build( first_stage_anchor_generator = anchor_generator_builder.build(
frcnn_config.first_stage_anchor_generator) frcnn_config.first_stage_anchor_generator)
first_stage_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN',
'proposal',
use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build( first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training) frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
...@@ -298,8 +328,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -298,8 +328,9 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
frcnn_config.first_stage_box_predictor_kernel_size) frcnn_config.first_stage_box_predictor_kernel_size)
first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
first_stage_positive_balance_fraction = ( first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
frcnn_config.first_stage_positive_balance_fraction) positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
is_static=frcnn_config.use_static_balanced_label_sampler)
first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold first_stage_nms_score_threshold = frcnn_config.first_stage_nms_score_threshold
first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold first_stage_nms_iou_threshold = frcnn_config.first_stage_nms_iou_threshold
first_stage_max_proposals = frcnn_config.first_stage_max_proposals first_stage_max_proposals = frcnn_config.first_stage_max_proposals
...@@ -311,13 +342,19 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -311,13 +342,19 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
maxpool_kernel_size = frcnn_config.maxpool_kernel_size maxpool_kernel_size = frcnn_config.maxpool_kernel_size
maxpool_stride = frcnn_config.maxpool_stride maxpool_stride = frcnn_config.maxpool_stride
second_stage_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN',
'detection',
use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
second_stage_box_predictor = box_predictor_builder.build( second_stage_box_predictor = box_predictor_builder.build(
hyperparams_builder.build, hyperparams_builder.build,
frcnn_config.second_stage_box_predictor, frcnn_config.second_stage_box_predictor,
is_training=is_training, is_training=is_training,
num_classes=num_classes) num_classes=num_classes)
second_stage_batch_size = frcnn_config.second_stage_batch_size second_stage_batch_size = frcnn_config.second_stage_batch_size
second_stage_balance_fraction = frcnn_config.second_stage_balance_fraction second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
positive_fraction=frcnn_config.second_stage_balance_fraction,
is_static=frcnn_config.use_static_balanced_label_sampler)
(second_stage_non_max_suppression_fn, second_stage_score_conversion_fn (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
) = post_processing_builder.build(frcnn_config.second_stage_post_processing) ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
second_stage_localization_loss_weight = ( second_stage_localization_loss_weight = (
...@@ -338,6 +375,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -338,6 +375,8 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
second_stage_localization_loss_weight) second_stage_localization_loss_weight)
use_matmul_crop_and_resize = (frcnn_config.use_matmul_crop_and_resize) use_matmul_crop_and_resize = (frcnn_config.use_matmul_crop_and_resize)
clip_anchors_to_image = (
frcnn_config.clip_anchors_to_image)
common_kwargs = { common_kwargs = {
'is_training': is_training, 'is_training': is_training,
...@@ -346,6 +385,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -346,6 +385,7 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
'feature_extractor': feature_extractor, 'feature_extractor': feature_extractor,
'number_of_stages': number_of_stages, 'number_of_stages': number_of_stages,
'first_stage_anchor_generator': first_stage_anchor_generator, 'first_stage_anchor_generator': first_stage_anchor_generator,
'first_stage_target_assigner': first_stage_target_assigner,
'first_stage_atrous_rate': first_stage_atrous_rate, 'first_stage_atrous_rate': first_stage_atrous_rate,
'first_stage_box_predictor_arg_scope_fn': 'first_stage_box_predictor_arg_scope_fn':
first_stage_box_predictor_arg_scope_fn, first_stage_box_predictor_arg_scope_fn,
...@@ -353,15 +393,15 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -353,15 +393,15 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
first_stage_box_predictor_kernel_size, first_stage_box_predictor_kernel_size,
'first_stage_box_predictor_depth': first_stage_box_predictor_depth, 'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
'first_stage_minibatch_size': first_stage_minibatch_size, 'first_stage_minibatch_size': first_stage_minibatch_size,
'first_stage_positive_balance_fraction': 'first_stage_sampler': first_stage_sampler,
first_stage_positive_balance_fraction,
'first_stage_nms_score_threshold': first_stage_nms_score_threshold, 'first_stage_nms_score_threshold': first_stage_nms_score_threshold,
'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold, 'first_stage_nms_iou_threshold': first_stage_nms_iou_threshold,
'first_stage_max_proposals': first_stage_max_proposals, 'first_stage_max_proposals': first_stage_max_proposals,
'first_stage_localization_loss_weight': first_stage_loc_loss_weight, 'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
'first_stage_objectness_loss_weight': first_stage_obj_loss_weight, 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
'second_stage_target_assigner': second_stage_target_assigner,
'second_stage_batch_size': second_stage_batch_size, 'second_stage_batch_size': second_stage_batch_size,
'second_stage_balance_fraction': second_stage_balance_fraction, 'second_stage_sampler': second_stage_sampler,
'second_stage_non_max_suppression_fn': 'second_stage_non_max_suppression_fn':
second_stage_non_max_suppression_fn, second_stage_non_max_suppression_fn,
'second_stage_score_conversion_fn': second_stage_score_conversion_fn, 'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
...@@ -373,10 +413,12 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): ...@@ -373,10 +413,12 @@ def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
second_stage_classification_loss_weight, second_stage_classification_loss_weight,
'hard_example_miner': hard_example_miner, 'hard_example_miner': hard_example_miner,
'add_summaries': add_summaries, 'add_summaries': add_summaries,
'use_matmul_crop_and_resize': use_matmul_crop_and_resize 'use_matmul_crop_and_resize': use_matmul_crop_and_resize,
'clip_anchors_to_image': clip_anchors_to_image
} }
if isinstance(second_stage_box_predictor, box_predictor.RfcnBoxPredictor): if isinstance(second_stage_box_predictor,
rfcn_box_predictor.RfcnBoxPredictor):
return rfcn_meta_arch.RFCNMetaArch( return rfcn_meta_arch.RFCNMetaArch(
second_stage_rfcn_box_predictor=second_stage_box_predictor, second_stage_rfcn_box_predictor=second_stage_box_predictor,
**common_kwargs) **common_kwargs)
......
...@@ -54,12 +54,6 @@ SSD_RESNET_V1_FPN_FEAT_MAPS = { ...@@ -54,12 +54,6 @@ SSD_RESNET_V1_FPN_FEAT_MAPS = {
ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor, ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor,
'ssd_resnet152_v1_fpn': 'ssd_resnet152_v1_fpn':
ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor, ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor,
'ssd_resnet50_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet50V1PpnFeatureExtractor,
'ssd_resnet101_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet101V1PpnFeatureExtractor,
'ssd_resnet152_v1_ppn':
ssd_resnet_v1_ppn.SSDResnet152V1PpnFeatureExtractor
} }
SSD_RESNET_V1_PPN_FEAT_MAPS = { SSD_RESNET_V1_PPN_FEAT_MAPS = {
...@@ -235,6 +229,10 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -235,6 +229,10 @@ class ModelBuilderTest(tf.test.TestCase):
ssd { ssd {
feature_extractor { feature_extractor {
type: 'ssd_resnet50_v1_fpn' type: 'ssd_resnet50_v1_fpn'
fpn {
min_level: 3
max_level: 7
}
conv_hyperparams { conv_hyperparams {
regularizer { regularizer {
l2_regularizer { l2_regularizer {
...@@ -479,6 +477,10 @@ class ModelBuilderTest(tf.test.TestCase): ...@@ -479,6 +477,10 @@ class ModelBuilderTest(tf.test.TestCase):
inplace_batchnorm_update: true inplace_batchnorm_update: true
feature_extractor { feature_extractor {
type: 'ssd_mobilenet_v1_fpn' type: 'ssd_mobilenet_v1_fpn'
fpn {
min_level: 3
max_level: 7
}
conv_hyperparams { conv_hyperparams {
regularizer { regularizer {
l2_regularizer { l2_regularizer {
......
...@@ -71,22 +71,38 @@ def _get_dict_from_proto(config): ...@@ -71,22 +71,38 @@ def _get_dict_from_proto(config):
# function that should be used. The PreprocessingStep proto should be parsable # function that should be used. The PreprocessingStep proto should be parsable
# with _get_dict_from_proto. # with _get_dict_from_proto.
PREPROCESSING_FUNCTION_MAP = { PREPROCESSING_FUNCTION_MAP = {
'normalize_image': preprocessor.normalize_image, 'normalize_image':
'random_pixel_value_scale': preprocessor.random_pixel_value_scale, preprocessor.normalize_image,
'random_image_scale': preprocessor.random_image_scale, 'random_pixel_value_scale':
'random_rgb_to_gray': preprocessor.random_rgb_to_gray, preprocessor.random_pixel_value_scale,
'random_adjust_brightness': preprocessor.random_adjust_brightness, 'random_image_scale':
'random_adjust_contrast': preprocessor.random_adjust_contrast, preprocessor.random_image_scale,
'random_adjust_hue': preprocessor.random_adjust_hue, 'random_rgb_to_gray':
'random_adjust_saturation': preprocessor.random_adjust_saturation, preprocessor.random_rgb_to_gray,
'random_distort_color': preprocessor.random_distort_color, 'random_adjust_brightness':
'random_jitter_boxes': preprocessor.random_jitter_boxes, preprocessor.random_adjust_brightness,
'random_crop_to_aspect_ratio': preprocessor.random_crop_to_aspect_ratio, 'random_adjust_contrast':
'random_black_patches': preprocessor.random_black_patches, preprocessor.random_adjust_contrast,
'rgb_to_gray': preprocessor.rgb_to_gray, 'random_adjust_hue':
preprocessor.random_adjust_hue,
'random_adjust_saturation':
preprocessor.random_adjust_saturation,
'random_distort_color':
preprocessor.random_distort_color,
'random_jitter_boxes':
preprocessor.random_jitter_boxes,
'random_crop_to_aspect_ratio':
preprocessor.random_crop_to_aspect_ratio,
'random_black_patches':
preprocessor.random_black_patches,
'rgb_to_gray':
preprocessor.rgb_to_gray,
'scale_boxes_to_pixel_coordinates': ( 'scale_boxes_to_pixel_coordinates': (
preprocessor.scale_boxes_to_pixel_coordinates), preprocessor.scale_boxes_to_pixel_coordinates),
'subtract_channel_mean': preprocessor.subtract_channel_mean, 'subtract_channel_mean':
preprocessor.subtract_channel_mean,
'convert_class_logits_to_softmax':
preprocessor.convert_class_logits_to_softmax,
} }
......
...@@ -561,6 +561,18 @@ class PreprocessorBuilderTest(tf.test.TestCase): ...@@ -561,6 +561,18 @@ class PreprocessorBuilderTest(tf.test.TestCase):
'min_padded_size_ratio': (1.0, 1.0), 'min_padded_size_ratio': (1.0, 1.0),
'max_padded_size_ratio': (2.0, 2.0)}) 'max_padded_size_ratio': (2.0, 2.0)})
def test_build_normalize_image_convert_class_logits_to_softmax(self):
preprocessor_text_proto = """
convert_class_logits_to_softmax {
temperature: 2
}
"""
preprocessor_proto = preprocessor_pb2.PreprocessingStep()
text_format.Merge(preprocessor_text_proto, preprocessor_proto)
function, args = preprocessor_builder.build(preprocessor_proto)
self.assertEqual(function, preprocessor.convert_class_logits_to_softmax)
self.assertEqual(args, {'temperature': 2})
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -51,6 +51,9 @@ def build(region_similarity_calculator_config): ...@@ -51,6 +51,9 @@ def build(region_similarity_calculator_config):
return region_similarity_calculator.IoaSimilarity() return region_similarity_calculator.IoaSimilarity()
if similarity_calculator == 'neg_sq_dist_similarity': if similarity_calculator == 'neg_sq_dist_similarity':
return region_similarity_calculator.NegSqDistSimilarity() return region_similarity_calculator.NegSqDistSimilarity()
if similarity_calculator == 'thresholded_iou_similarity':
return region_similarity_calculator.ThresholdedIouSimilarity(
region_similarity_calculator_config.thresholded_iou_similarity.threshold
)
raise ValueError('Unknown region similarity calculator.') raise ValueError('Unknown region similarity calculator.')
...@@ -29,17 +29,19 @@ the minibatch_sampler base class. ...@@ -29,17 +29,19 @@ the minibatch_sampler base class.
import tensorflow as tf import tensorflow as tf
from object_detection.core import minibatch_sampler from object_detection.core import minibatch_sampler
from object_detection.utils import ops
class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
"""Subsamples minibatches to a desired balance of positives and negatives.""" """Subsamples minibatches to a desired balance of positives and negatives."""
def __init__(self, positive_fraction=0.5): def __init__(self, positive_fraction=0.5, is_static=False):
"""Constructs a minibatch sampler. """Constructs a minibatch sampler.
Args: Args:
positive_fraction: desired fraction of positive examples (scalar in [0,1]) positive_fraction: desired fraction of positive examples (scalar in [0,1])
in the batch. in the batch.
is_static: If True, uses an implementation with static shape guarantees.
Raises: Raises:
ValueError: if positive_fraction < 0, or positive_fraction > 1 ValueError: if positive_fraction < 0, or positive_fraction > 1
...@@ -48,21 +50,159 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -48,21 +50,159 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
raise ValueError('positive_fraction should be in range [0,1]. ' raise ValueError('positive_fraction should be in range [0,1]. '
'Received: %s.' % positive_fraction) 'Received: %s.' % positive_fraction)
self._positive_fraction = positive_fraction self._positive_fraction = positive_fraction
self._is_static = is_static
def subsample(self, indicator, batch_size, labels): def _get_num_pos_neg_samples(self, sorted_indices_tensor, sample_size):
"""Counts the number of positives and negatives numbers to be sampled.
Args:
sorted_indices_tensor: A sorted int32 tensor of shape [N] which contains
the signed indices of the examples where the sign is based on the label
value. The examples that cannot be sampled are set to 0. It samples
atmost sample_size*positive_fraction positive examples and remaining
from negative examples.
sample_size: Size of subsamples.
Returns:
A tuple containing the number of positive and negative labels in the
subsample.
"""
input_length = tf.shape(sorted_indices_tensor)[0]
valid_positive_index = tf.greater(sorted_indices_tensor,
tf.zeros(input_length, tf.int32))
num_sampled_pos = tf.reduce_sum(tf.cast(valid_positive_index, tf.int32))
max_num_positive_samples = tf.constant(
int(sample_size * self._positive_fraction), tf.int32)
num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos)
num_negative_samples = tf.constant(sample_size,
tf.int32) - num_positive_samples
return num_positive_samples, num_negative_samples
def _get_values_from_start_and_end(self, input_tensor, num_start_samples,
num_end_samples, total_num_samples):
"""slices num_start_samples and last num_end_samples from input_tensor.
Args:
input_tensor: An int32 tensor of shape [N] to be sliced.
num_start_samples: Number of examples to be sliced from the beginning
of the input tensor.
num_end_samples: Number of examples to be sliced from the end of the
input tensor.
total_num_samples: Sum of is num_start_samples and num_end_samples. This
should be a scalar.
Returns:
A tensor containing the first num_start_samples and last num_end_samples
from input_tensor.
"""
input_length = tf.shape(input_tensor)[0]
start_positions = tf.less(tf.range(input_length), num_start_samples)
end_positions = tf.greater_equal(
tf.range(input_length), input_length - num_end_samples)
selected_positions = tf.logical_or(start_positions, end_positions)
selected_positions = tf.cast(selected_positions, tf.int32)
indexed_positions = tf.multiply(tf.cumsum(selected_positions),
selected_positions)
one_hot_selector = tf.one_hot(indexed_positions - 1,
total_num_samples,
dtype=tf.int32)
return tf.tensordot(input_tensor, one_hot_selector, axes=[0, 0])
def _static_subsample(self, indicator, batch_size, labels):
"""Returns subsampled minibatch.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
N should be a complie time constant.
batch_size: desired batch size. This scalar cannot be None.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples. N should be a complie time constant.
Returns:
sampled_idx_indicator: boolean tensor of shape [N], True for entries which
are sampled.
Raises:
ValueError: if labels and indicator are not 1D boolean tensors.
"""
# Check if indicator and labels have a static size.
if not indicator.shape.is_fully_defined():
raise ValueError('indicator must be static in shape when is_static is'
'True')
if not labels.shape.is_fully_defined():
raise ValueError('labels must be static in shape when is_static is'
'True')
if not isinstance(batch_size, int):
raise ValueError('batch_size has to be an integer when is_static is'
'True.')
input_length = tf.shape(indicator)[0]
# Shuffle indicator and label. Need to store the permutation to restore the
# order post sampling.
permutation = tf.random_shuffle(tf.range(input_length))
indicator = ops.matmul_gather_on_zeroth_axis(
tf.cast(indicator, tf.float32), permutation)
labels = ops.matmul_gather_on_zeroth_axis(
tf.cast(labels, tf.float32), permutation)
# index (starting from 1) when cls_weight is True, 0 when False
indicator_idx = tf.where(
tf.cast(indicator, tf.bool), tf.range(1, input_length + 1),
tf.zeros(input_length, tf.int32))
# Replace -1 for negative, +1 for positive labels
signed_label = tf.where(
tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32),
tf.scalar_mul(-1, tf.ones(input_length, tf.int32)))
# negative of index for negative label, positive index for positive label,
# 0 when indicator is False.
signed_indicator_idx = tf.multiply(indicator_idx, signed_label)
sorted_signed_indicator_idx = tf.nn.top_k(
signed_indicator_idx, input_length, sorted=True).values
[num_positive_samples,
num_negative_samples] = self._get_num_pos_neg_samples(
sorted_signed_indicator_idx, batch_size)
sampled_idx = self._get_values_from_start_and_end(
sorted_signed_indicator_idx, num_positive_samples,
num_negative_samples, batch_size)
# Shift the indices to start from 0 and remove any samples that are set as
# False.
sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32)
sampled_idx = tf.multiply(
tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32),
sampled_idx)
sampled_idx_indicator = tf.cast(tf.reduce_sum(
tf.one_hot(sampled_idx, depth=input_length),
axis=0), tf.bool)
# project back the order based on stored permutations
reprojections = tf.one_hot(permutation, depth=input_length, dtype=tf.int32)
return tf.cast(tf.tensordot(
tf.cast(sampled_idx_indicator, tf.int32),
reprojections, axes=[0, 0]), tf.bool)
def subsample(self, indicator, batch_size, labels, scope=None):
"""Returns subsampled minibatch. """Returns subsampled minibatch.
Args: Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled. indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size. If None, keeps all positive samples and batch_size: desired batch size. If None, keeps all positive samples and
randomly selects negative samples so that the positive sample fraction randomly selects negative samples so that the positive sample fraction
matches self._positive_fraction. matches self._positive_fraction. It cannot be None is is_static is True.
labels: boolean tensor of shape [N] denoting positive(=True) and negative labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples. (=False) examples.
scope: name scope.
Returns: Returns:
is_sampled: boolean tensor of shape [N], True for entries which are sampled_idx_indicator: boolean tensor of shape [N], True for entries which
sampled. are sampled.
Raises: Raises:
ValueError: if labels and indicator are not 1D boolean tensors. ValueError: if labels and indicator are not 1D boolean tensors.
...@@ -79,27 +219,30 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler): ...@@ -79,27 +219,30 @@ class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
if indicator.dtype != tf.bool: if indicator.dtype != tf.bool:
raise ValueError('indicator should be of type bool. Received: %s' % raise ValueError('indicator should be of type bool. Received: %s' %
indicator.dtype) indicator.dtype)
with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'):
# Only sample from indicated samples if self._is_static:
negative_idx = tf.logical_not(labels) return self._static_subsample(indicator, batch_size, labels)
positive_idx = tf.logical_and(labels, indicator)
negative_idx = tf.logical_and(negative_idx, indicator) else:
# Only sample from indicated samples
# Sample positive and negative samples separately negative_idx = tf.logical_not(labels)
if batch_size is None: positive_idx = tf.logical_and(labels, indicator)
max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx)) negative_idx = tf.logical_and(negative_idx, indicator)
else:
max_num_pos = int(self._positive_fraction * batch_size) # Sample positive and negative samples separately
sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos) if batch_size is None:
num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32)) max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx))
if batch_size is None: else:
negative_positive_ratio = ( max_num_pos = int(self._positive_fraction * batch_size)
1 - self._positive_fraction) / self._positive_fraction sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
max_num_neg = tf.to_int32( num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32))
negative_positive_ratio * tf.to_float(num_sampled_pos)) if batch_size is None:
else: negative_positive_ratio = (
max_num_neg = batch_size - num_sampled_pos 1 - self._positive_fraction) / self._positive_fraction
sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg) max_num_neg = tf.to_int32(
negative_positive_ratio * tf.to_float(num_sampled_pos))
sampled_idx = tf.logical_or(sampled_pos_idx, sampled_neg_idx) else:
return sampled_idx max_num_neg = batch_size - num_sampled_pos
sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
...@@ -24,15 +24,16 @@ from object_detection.utils import test_case ...@@ -24,15 +24,16 @@ from object_detection.utils import test_case
class BalancedPositiveNegativeSamplerTest(test_case.TestCase): class BalancedPositiveNegativeSamplerTest(test_case.TestCase):
def test_subsample_all_examples(self): def _test_subsample_all_examples(self, is_static=False):
numpy_labels = np.random.permutation(300) numpy_labels = np.random.permutation(300)
indicator = tf.constant(np.ones(300) == 1) indicator = tf.constant(np.ones(300) == 1)
numpy_labels = (numpy_labels - 200) > 0 numpy_labels = (numpy_labels - 200) > 0
labels = tf.constant(numpy_labels) labels = tf.constant(numpy_labels)
sampler = (balanced_positive_negative_sampler. sampler = (
BalancedPositiveNegativeSampler()) balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
is_static=is_static))
is_sampled = sampler.subsample(indicator, 64, labels) is_sampled = sampler.subsample(indicator, 64, labels)
with self.test_session() as sess: with self.test_session() as sess:
is_sampled = sess.run(is_sampled) is_sampled = sess.run(is_sampled)
...@@ -41,7 +42,13 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase): ...@@ -41,7 +42,13 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase):
self.assertTrue(sum(np.logical_and( self.assertTrue(sum(np.logical_and(
np.logical_not(numpy_labels), is_sampled)) == 32) np.logical_not(numpy_labels), is_sampled)) == 32)
def test_subsample_selection(self): def test_subsample_all_examples_dynamic(self):
self._test_subsample_all_examples()
def test_subsample_all_examples_static(self):
self._test_subsample_all_examples(is_static=True)
def _test_subsample_selection(self, is_static=False):
# Test random sampling when only some examples can be sampled: # Test random sampling when only some examples can be sampled:
# 100 samples, 20 positives, 10 positives cannot be sampled # 100 samples, 20 positives, 10 positives cannot be sampled
numpy_labels = np.arange(100) numpy_labels = np.arange(100)
...@@ -51,8 +58,9 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase): ...@@ -51,8 +58,9 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase):
labels = tf.constant(numpy_labels) labels = tf.constant(numpy_labels)
sampler = (balanced_positive_negative_sampler. sampler = (
BalancedPositiveNegativeSampler()) balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
is_static=is_static))
is_sampled = sampler.subsample(indicator, 64, labels) is_sampled = sampler.subsample(indicator, 64, labels)
with self.test_session() as sess: with self.test_session() as sess:
is_sampled = sess.run(is_sampled) is_sampled = sess.run(is_sampled)
...@@ -63,6 +71,42 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase): ...@@ -63,6 +71,42 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase):
self.assertAllEqual(is_sampled, np.logical_and(is_sampled, self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
numpy_indicator)) numpy_indicator))
def test_subsample_selection_dynamic(self):
self._test_subsample_selection()
def test_subsample_selection_static(self):
self._test_subsample_selection(is_static=True)
def _test_subsample_selection_larger_batch_size(self, is_static=False):
# Test random sampling when total number of examples that can be sampled are
# less than batch size:
# 100 samples, 50 positives, 40 positives cannot be sampled, batch size 64.
numpy_labels = np.arange(100)
numpy_indicator = numpy_labels < 60
indicator = tf.constant(numpy_indicator)
numpy_labels = (numpy_labels - 50) >= 0
labels = tf.constant(numpy_labels)
sampler = (
balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
is_static=is_static))
is_sampled = sampler.subsample(indicator, 64, labels)
with self.test_session() as sess:
is_sampled = sess.run(is_sampled)
self.assertTrue(sum(is_sampled) == 60)
self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10)
self.assertTrue(
sum(np.logical_and(np.logical_not(numpy_labels), is_sampled)) == 50)
self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
numpy_indicator))
def test_subsample_selection_larger_batch_size_dynamic(self):
self._test_subsample_selection_larger_batch_size()
def test_subsample_selection_larger_batch_size_static(self):
self._test_subsample_selection_larger_batch_size(is_static=True)
def test_subsample_selection_no_batch_size(self): def test_subsample_selection_no_batch_size(self):
# Test random sampling when only some examples can be sampled: # Test random sampling when only some examples can be sampled:
# 1000 samples, 6 positives (5 can be sampled). # 1000 samples, 6 positives (5 can be sampled).
...@@ -85,6 +129,14 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase): ...@@ -85,6 +129,14 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase):
self.assertAllEqual(is_sampled, np.logical_and(is_sampled, self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
numpy_indicator)) numpy_indicator))
def test_subsample_selection_no_batch_size_static(self):
labels = tf.constant([[True, False, False]])
indicator = tf.constant([True, False, True])
sampler = (
balanced_positive_negative_sampler.BalancedPositiveNegativeSampler())
with self.assertRaises(ValueError):
sampler.subsample(indicator, None, labels)
def test_raises_error_with_incorrect_label_shape(self): def test_raises_error_with_incorrect_label_shape(self):
labels = tf.constant([[True, False, False]]) labels = tf.constant([[True, False, False]])
indicator = tf.constant([True, False, True]) indicator = tf.constant([True, False, True])
...@@ -101,6 +153,5 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase): ...@@ -101,6 +153,5 @@ class BalancedPositiveNegativeSamplerTest(test_case.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
sampler.subsample(indicator, 64, labels) sampler.subsample(indicator, 64, labels)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -2925,6 +2925,29 @@ def ssd_random_crop_pad_fixed_aspect_ratio( ...@@ -2925,6 +2925,29 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
return result return result
def convert_class_logits_to_softmax(multiclass_scores, temperature=1.0):
"""Converts multiclass logits to softmax scores after applying temperature.
Args:
multiclass_scores: float32 tensor of shape
[num_instances, num_classes] representing the score for each box for each
class.
temperature: Scale factor to use prior to applying softmax. Larger
temperatures give more uniform distruibutions after softmax.
Returns:
multiclass_scores: float32 tensor of shape
[num_instances, num_classes] with scaling and softmax applied.
"""
# Multiclass scores must be stored as logits. Apply temp and softmax.
multiclass_scores_scaled = tf.divide(
multiclass_scores, temperature, name='scale_logits')
multiclass_scores = tf.nn.softmax(multiclass_scores_scaled, name='softmax')
return multiclass_scores
def get_default_func_arg_map(include_label_scores=False, def get_default_func_arg_map(include_label_scores=False,
include_multiclass_scores=False, include_multiclass_scores=False,
include_instance_masks=False, include_instance_masks=False,
...@@ -3003,8 +3026,7 @@ def get_default_func_arg_map(include_label_scores=False, ...@@ -3003,8 +3026,7 @@ def get_default_func_arg_map(include_label_scores=False,
random_crop_pad_image: (fields.InputDataFields.image, random_crop_pad_image: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_classes,
groundtruth_label_scores, groundtruth_label_scores, multiclass_scores),
multiclass_scores),
random_crop_to_aspect_ratio: ( random_crop_to_aspect_ratio: (
fields.InputDataFields.image, fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_boxes,
...@@ -3051,20 +3073,15 @@ def get_default_func_arg_map(include_label_scores=False, ...@@ -3051,20 +3073,15 @@ def get_default_func_arg_map(include_label_scores=False,
subtract_channel_mean: (fields.InputDataFields.image,), subtract_channel_mean: (fields.InputDataFields.image,),
one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,), one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
rgb_to_gray: (fields.InputDataFields.image,), rgb_to_gray: (fields.InputDataFields.image,),
ssd_random_crop: ( ssd_random_crop: (fields.InputDataFields.image,
fields.InputDataFields.image, fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_classes,
fields.InputDataFields.groundtruth_classes, groundtruth_label_scores, multiclass_scores,
groundtruth_label_scores, groundtruth_instance_masks, groundtruth_keypoints),
multiclass_scores,
groundtruth_instance_masks,
groundtruth_keypoints
),
ssd_random_crop_pad: (fields.InputDataFields.image, ssd_random_crop_pad: (fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_boxes,
fields.InputDataFields.groundtruth_classes, fields.InputDataFields.groundtruth_classes,
groundtruth_label_scores, groundtruth_label_scores, multiclass_scores),
multiclass_scores),
ssd_random_crop_fixed_aspect_ratio: ( ssd_random_crop_fixed_aspect_ratio: (
fields.InputDataFields.image, fields.InputDataFields.image,
fields.InputDataFields.groundtruth_boxes, fields.InputDataFields.groundtruth_boxes,
...@@ -3079,6 +3096,7 @@ def get_default_func_arg_map(include_label_scores=False, ...@@ -3079,6 +3096,7 @@ def get_default_func_arg_map(include_label_scores=False,
groundtruth_instance_masks, groundtruth_instance_masks,
groundtruth_keypoints, groundtruth_keypoints,
), ),
convert_class_logits_to_softmax: (multiclass_scores,),
} }
return prep_func_arg_map return prep_func_arg_map
......
...@@ -2844,5 +2844,24 @@ class PreprocessorTest(tf.test.TestCase): ...@@ -2844,5 +2844,24 @@ class PreprocessorTest(tf.test.TestCase):
include_instance_masks=True, include_instance_masks=True,
include_keypoints=True) include_keypoints=True)
def testConvertClassLogitsToSoftmax(self):
multiclass_scores = tf.constant(
[[1.0, 0.0], [0.5, 0.5], [1000, 1]], dtype=tf.float32)
temperature = 2.0
converted_multiclass_scores = (
preprocessor.convert_class_logits_to_softmax(
multiclass_scores=multiclass_scores, temperature=temperature))
expected_converted_multiclass_scores = [[[0.62245935, 0.37754068],
[0.5, 0.5], [1, 0]]]
with self.test_session() as sess:
(converted_multiclass_scores_) = sess.run([converted_multiclass_scores])
self.assertAllClose(converted_multiclass_scores_,
expected_converted_multiclass_scores)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -24,6 +24,7 @@ from abc import abstractmethod ...@@ -24,6 +24,7 @@ from abc import abstractmethod
import tensorflow as tf import tensorflow as tf
from object_detection.core import box_list_ops from object_detection.core import box_list_ops
from object_detection.core import standard_fields as fields
class RegionSimilarityCalculator(object): class RegionSimilarityCalculator(object):
...@@ -33,7 +34,7 @@ class RegionSimilarityCalculator(object): ...@@ -33,7 +34,7 @@ class RegionSimilarityCalculator(object):
def compare(self, boxlist1, boxlist2, scope=None): def compare(self, boxlist1, boxlist2, scope=None):
"""Computes matrix of pairwise similarity between BoxLists. """Computes matrix of pairwise similarity between BoxLists.
This op (to be overriden) computes a measure of pairwise similarity between This op (to be overridden) computes a measure of pairwise similarity between
the boxes in the given BoxLists. Higher values indicate more similarity. the boxes in the given BoxLists. Higher values indicate more similarity.
Note that this method simply measures similarity and does not explicitly Note that this method simply measures similarity and does not explicitly
...@@ -112,3 +113,42 @@ class IoaSimilarity(RegionSimilarityCalculator): ...@@ -112,3 +113,42 @@ class IoaSimilarity(RegionSimilarityCalculator):
A tensor with shape [N, M] representing pairwise IOA scores. A tensor with shape [N, M] representing pairwise IOA scores.
""" """
return box_list_ops.ioa(boxlist1, boxlist2) return box_list_ops.ioa(boxlist1, boxlist2)
class ThresholdedIouSimilarity(RegionSimilarityCalculator):
"""Class to compute similarity based on thresholded IOU and score.
This class computes pairwise similarity between two BoxLists based on IOU and
a 'score' present in boxlist1. If IOU > threshold, then the entry in the
output pairwise tensor will contain `score`, otherwise 0.
"""
def __init__(self, iou_threshold=0):
"""Initialize the ThresholdedIouSimilarity.
Args:
iou_threshold: For a given pair of boxes, if the IOU is > iou_threshold,
then the comparison result will be the foreground probability of
the first box, otherwise it will be zero.
"""
self._iou_threshold = iou_threshold
def _compare(self, boxlist1, boxlist2):
"""Compute pairwise IOU similarity between the two BoxLists and score.
Args:
boxlist1: BoxList holding N boxes. Must have a score field.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing scores threholded by pairwise
iou scores.
"""
ious = box_list_ops.iou(boxlist1, boxlist2)
scores = boxlist1.get_field(fields.BoxListFields.scores)
scores = tf.expand_dims(scores, axis=1)
row_replicated_scores = tf.tile(scores, [1, tf.shape(ious)[-1]])
thresholded_ious = tf.where(ious > self._iou_threshold,
row_replicated_scores, tf.zeros_like(ious))
return thresholded_ious
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment