Unverified Commit ff88581a authored by vivek rathod's avatar vivek rathod Committed by GitHub
Browse files

Merge pull request #2629 from tombstone/meta_arch_update

update post_processing module, builders, and meta architectures.
parents 018e62f0 aeeaf9a3
...@@ -28,8 +28,8 @@ def build(post_processing_config): ...@@ -28,8 +28,8 @@ def build(post_processing_config):
configuration. configuration.
Non-max suppression callable takes `boxes`, `scores`, and optionally Non-max suppression callable takes `boxes`, `scores`, and optionally
`clip_window`, `parallel_iterations` and `scope` as inputs. It returns `clip_window`, `parallel_iterations` `masks, and `scope` as inputs. It returns
`nms_boxes`, `nms_scores`, `nms_nms_classes` and `num_detections`. See `nms_boxes`, `nms_scores`, `nms_classes` `nms_masks` and `num_detections`. See
post_processing.batch_multiclass_non_max_suppression for the type and shape post_processing.batch_multiclass_non_max_suppression for the type and shape
of these tensors. of these tensors.
...@@ -55,7 +55,8 @@ def build(post_processing_config): ...@@ -55,7 +55,8 @@ def build(post_processing_config):
non_max_suppressor_fn = _build_non_max_suppressor( non_max_suppressor_fn = _build_non_max_suppressor(
post_processing_config.batch_non_max_suppression) post_processing_config.batch_non_max_suppression)
score_converter_fn = _build_score_converter( score_converter_fn = _build_score_converter(
post_processing_config.score_converter) post_processing_config.score_converter,
post_processing_config.logit_scale)
return non_max_suppressor_fn, score_converter_fn return non_max_suppressor_fn, score_converter_fn
...@@ -87,7 +88,17 @@ def _build_non_max_suppressor(nms_config): ...@@ -87,7 +88,17 @@ def _build_non_max_suppressor(nms_config):
return non_max_suppressor_fn return non_max_suppressor_fn
def _build_score_converter(score_converter_config): def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale):
"""Create a function to scale logits then apply a Tensorflow function."""
def score_converter_fn(logits):
scaled_logits = tf.divide(logits, logit_scale, name='scale_logits')
return tf_score_converter_fn(scaled_logits, name='convert_scores')
score_converter_fn.__name__ = '%s_with_logit_scale' % (
tf_score_converter_fn.__name__)
return score_converter_fn
def _build_score_converter(score_converter_config, logit_scale):
"""Builds score converter based on the config. """Builds score converter based on the config.
Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
...@@ -95,6 +106,7 @@ def _build_score_converter(score_converter_config): ...@@ -95,6 +106,7 @@ def _build_score_converter(score_converter_config):
Args: Args:
score_converter_config: post_processing_pb2.PostProcessing.score_converter. score_converter_config: post_processing_pb2.PostProcessing.score_converter.
logit_scale: temperature to use for SOFTMAX score_converter.
Returns: Returns:
Callable score converter op. Callable score converter op.
...@@ -103,9 +115,9 @@ def _build_score_converter(score_converter_config): ...@@ -103,9 +115,9 @@ def _build_score_converter(score_converter_config):
ValueError: On unknown score converter. ValueError: On unknown score converter.
""" """
if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY: if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY:
return tf.identity return _score_converter_fn_with_logit_scale(tf.identity, logit_scale)
if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID: if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID:
return tf.sigmoid return _score_converter_fn_with_logit_scale(tf.sigmoid, logit_scale)
if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX: if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX:
return tf.nn.softmax return _score_converter_fn_with_logit_scale(tf.nn.softmax, logit_scale)
raise ValueError('Unknown score converter.') raise ValueError('Unknown score converter.')
...@@ -48,7 +48,31 @@ class PostProcessingBuilderTest(tf.test.TestCase): ...@@ -48,7 +48,31 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config = post_processing_pb2.PostProcessing() post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config) text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.identity) self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
inputs = tf.constant([1, 1], tf.float32)
outputs = score_converter(inputs)
with self.test_session() as sess:
converted_scores = sess.run(outputs)
expected_converted_scores = sess.run(inputs)
self.assertAllClose(converted_scores, expected_converted_scores)
def test_build_identity_score_converter_with_logit_scale(self):
post_processing_text_proto = """
score_converter: IDENTITY
logit_scale: 2.0
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
inputs = tf.constant([1, 1], tf.float32)
outputs = score_converter(inputs)
with self.test_session() as sess:
converted_scores = sess.run(outputs)
expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32))
self.assertAllClose(converted_scores, expected_converted_scores)
def test_build_sigmoid_score_converter(self): def test_build_sigmoid_score_converter(self):
post_processing_text_proto = """ post_processing_text_proto = """
...@@ -57,7 +81,7 @@ class PostProcessingBuilderTest(tf.test.TestCase): ...@@ -57,7 +81,7 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config = post_processing_pb2.PostProcessing() post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config) text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.sigmoid) self.assertEqual(score_converter.__name__, 'sigmoid_with_logit_scale')
def test_build_softmax_score_converter(self): def test_build_softmax_score_converter(self):
post_processing_text_proto = """ post_processing_text_proto = """
...@@ -66,7 +90,17 @@ class PostProcessingBuilderTest(tf.test.TestCase): ...@@ -66,7 +90,17 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config = post_processing_pb2.PostProcessing() post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config) text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config) _, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter, tf.nn.softmax) self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
def test_build_softmax_score_converter_with_temperature(self):
post_processing_text_proto = """
score_converter: SOFTMAX
logit_scale: 2.0
"""
post_processing_config = post_processing_pb2.PostProcessing()
text_format.Merge(post_processing_text_proto, post_processing_config)
_, score_converter = post_processing_builder.build(post_processing_config)
self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
if __name__ == '__main__': if __name__ == '__main__':
......
...@@ -76,8 +76,6 @@ def multiclass_non_max_suppression(boxes, ...@@ -76,8 +76,6 @@ def multiclass_non_max_suppression(boxes,
a BoxList holding M boxes with a rank-1 scores field representing a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box. and a rank-1 classes field representing a class label for each box.
If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
contain masks, keypoints, keypoint_heatmaps corresponding to boxes.
Raises: Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
...@@ -174,6 +172,7 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -174,6 +172,7 @@ def batch_multiclass_non_max_suppression(boxes,
change_coordinate_frame=False, change_coordinate_frame=False,
num_valid_boxes=None, num_valid_boxes=None,
masks=None, masks=None,
additional_fields=None,
scope=None, scope=None,
parallel_iterations=32): parallel_iterations=32):
"""Multi-class version of non maximum suppression that operates on a batch. """Multi-class version of non maximum suppression that operates on a batch.
...@@ -203,11 +202,13 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -203,11 +202,13 @@ def batch_multiclass_non_max_suppression(boxes,
is provided) is provided)
num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
[batch_size] representing the number of valid boxes to be considered [batch_size] representing the number of valid boxes to be considered
for each image in the batch. This parameter allows for ignoring zero for each image in the batch. This parameter allows for ignoring zero
paddings. paddings.
masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width] masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
float32 tensor containing box masks. `q` can be either number of classes float32 tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class. or 1 depending on whether a separate mask is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose dimensions are [batch_size, num_anchors, ...].
scope: tf scope name. scope: tf scope name.
parallel_iterations: (optional) number of batch items to process in parallel_iterations: (optional) number of batch items to process in
parallel. parallel.
...@@ -223,9 +224,13 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -223,9 +224,13 @@ def batch_multiclass_non_max_suppression(boxes,
[batch_size, max_detections, mask_height, mask_width] float32 tensor [batch_size, max_detections, mask_height, mask_width] float32 tensor
containing masks for each selected box. This is set to None if input containing masks for each selected box. This is set to None if input
`masks` is None. `masks` is None.
'nmsed_additional_fields': (optional) a dictionary of
[batch_size, max_detections, ...] float32 tensors corresponding to the
tensors specified in the input `additional_fields`. This is not returned
if input `additional_fields` is None.
'num_detections': A [batch_size] int32 tensor indicating the number of 'num_detections': A [batch_size] int32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i] entries in valid detections per batch item. Only the top num_detections[i] entries in
nms_boxes[i], nms_scores[i] and nms_class[i] are valid. the rest of the nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
entries are zero paddings. entries are zero paddings.
Raises: Raises:
...@@ -239,6 +244,7 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -239,6 +244,7 @@ def batch_multiclass_non_max_suppression(boxes,
'to the third dimension of scores') 'to the third dimension of scores')
original_masks = masks original_masks = masks
original_additional_fields = additional_fields
with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'): with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
boxes_shape = boxes.shape boxes_shape = boxes.shape
batch_size = boxes_shape[0].value batch_size = boxes_shape[0].value
...@@ -255,15 +261,61 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -255,15 +261,61 @@ def batch_multiclass_non_max_suppression(boxes,
num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors
# If masks aren't provided, create dummy masks so we can only have one copy # If masks aren't provided, create dummy masks so we can only have one copy
# of single_image_nms_fn and discard the dummy masks after map_fn. # of _single_image_nms_fn and discard the dummy masks after map_fn.
if masks is None: if masks is None:
masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0]) masks_shape = tf.stack([batch_size, num_anchors, 1, 0, 0])
masks = tf.zeros(masks_shape) masks = tf.zeros(masks_shape)
def single_image_nms_fn(args): if additional_fields is None:
"""Runs NMS on a single image and returns padded output.""" additional_fields = {}
(per_image_boxes, per_image_scores, per_image_masks,
per_image_num_valid_boxes) = args def _single_image_nms_fn(args):
"""Runs NMS on a single image and returns padded output.
Args:
args: A list of tensors consisting of the following:
per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
detections. If `q` is 1 then same boxes are used for all classes
otherwise, if `q` is equal to number of classes, class-specific
boxes are used.
per_image_scores - A [num_anchors, num_classes] float32 tensor
containing the scores for each of the `num_anchors` detections.
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
per_image_additional_fields - (optional) A variable number of float32
tensors each with size [num_anchors, ...].
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
shape [batch_size] representing the number of valid boxes to be
considered for each image in the batch. This parameter allows for
ignoring zero paddings.
Returns:
'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
non-max suppressed boxes.
'nmsed_scores': A [max_detections] float32 tensor containing the scores
for the boxes.
'nmsed_classes': A [max_detections] float32 tensor containing the class
for boxes.
'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
float32 tensor containing masks for each selected box. This is set to
None if input `masks` is None.
'nmsed_additional_fields': (optional) A variable number of float32
tensors each with size [max_detections, ...] corresponding to the
input `per_image_additional_fields`.
'num_detections': A [batch_size] int32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i]
entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
rest of the entries are zero paddings.
"""
per_image_boxes = args[0]
per_image_scores = args[1]
per_image_masks = args[2]
per_image_additional_fields = {
key: value
for key, value in zip(additional_fields, args[3:-1])
}
per_image_num_valid_boxes = args[-1]
per_image_boxes = tf.reshape( per_image_boxes = tf.reshape(
tf.slice(per_image_boxes, 3 * [0], tf.slice(per_image_boxes, 3 * [0],
tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4]) tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
...@@ -271,12 +323,21 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -271,12 +323,21 @@ def batch_multiclass_non_max_suppression(boxes,
tf.slice(per_image_scores, [0, 0], tf.slice(per_image_scores, [0, 0],
tf.stack([per_image_num_valid_boxes, -1])), tf.stack([per_image_num_valid_boxes, -1])),
[-1, num_classes]) [-1, num_classes])
per_image_masks = tf.reshape( per_image_masks = tf.reshape(
tf.slice(per_image_masks, 4 * [0], tf.slice(per_image_masks, 4 * [0],
tf.stack([per_image_num_valid_boxes, -1, -1, -1])), tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
[-1, q, per_image_masks.shape[2].value, [-1, q, per_image_masks.shape[2].value,
per_image_masks.shape[3].value]) per_image_masks.shape[3].value])
if per_image_additional_fields is not None:
for key, tensor in per_image_additional_fields.items():
additional_field_shape = tensor.get_shape()
additional_field_dim = len(additional_field_shape)
per_image_additional_fields[key] = tf.reshape(
tf.slice(per_image_additional_fields[key],
additional_field_dim * [0],
tf.stack([per_image_num_valid_boxes] +
(additional_field_dim - 1) * [-1])),
[-1] + [dim.value for dim in additional_field_shape[1:]])
nmsed_boxlist = multiclass_non_max_suppression( nmsed_boxlist = multiclass_non_max_suppression(
per_image_boxes, per_image_boxes,
per_image_scores, per_image_scores,
...@@ -284,9 +345,10 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -284,9 +345,10 @@ def batch_multiclass_non_max_suppression(boxes,
iou_thresh, iou_thresh,
max_size_per_class, max_size_per_class,
max_total_size, max_total_size,
masks=per_image_masks,
clip_window=clip_window, clip_window=clip_window,
change_coordinate_frame=change_coordinate_frame) change_coordinate_frame=change_coordinate_frame,
masks=per_image_masks,
additional_fields=per_image_additional_fields)
padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist, padded_boxlist = box_list_ops.pad_or_clip_box_list(nmsed_boxlist,
max_total_size) max_total_size)
num_detections = nmsed_boxlist.num_boxes() num_detections = nmsed_boxlist.num_boxes()
...@@ -294,19 +356,40 @@ def batch_multiclass_non_max_suppression(boxes, ...@@ -294,19 +356,40 @@ def batch_multiclass_non_max_suppression(boxes,
nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores) nmsed_scores = padded_boxlist.get_field(fields.BoxListFields.scores)
nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes) nmsed_classes = padded_boxlist.get_field(fields.BoxListFields.classes)
nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks) nmsed_masks = padded_boxlist.get_field(fields.BoxListFields.masks)
return [nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, nmsed_additional_fields = [
num_detections] padded_boxlist.get_field(key) for key in per_image_additional_fields
]
return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
nmsed_additional_fields + [num_detections])
num_additional_fields = 0
if additional_fields is not None:
num_additional_fields = len(additional_fields)
num_nmsed_outputs = 4 + num_additional_fields
(batch_nmsed_boxes, batch_nmsed_scores, batch_outputs = tf.map_fn(
batch_nmsed_classes, batch_nmsed_masks, _single_image_nms_fn,
batch_num_detections) = tf.map_fn( elems=([boxes, scores, masks] + list(additional_fields.values()) +
single_image_nms_fn, [num_valid_boxes]),
elems=[boxes, scores, masks, num_valid_boxes], dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
dtype=[tf.float32, tf.float32, tf.float32, tf.float32, tf.int32], parallel_iterations=parallel_iterations)
parallel_iterations=parallel_iterations)
batch_nmsed_boxes = batch_outputs[0]
batch_nmsed_scores = batch_outputs[1]
batch_nmsed_classes = batch_outputs[2]
batch_nmsed_masks = batch_outputs[3]
batch_nmsed_additional_fields = {
key: value
for key, value in zip(additional_fields, batch_outputs[4:-1])
}
batch_num_detections = batch_outputs[-1]
if original_masks is None: if original_masks is None:
batch_nmsed_masks = None batch_nmsed_masks = None
if original_additional_fields is None:
batch_nmsed_additional_fields = None
return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes, return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
batch_nmsed_masks, batch_num_detections) batch_nmsed_masks, batch_nmsed_additional_fields,
batch_num_detections)
...@@ -497,11 +497,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase): ...@@ -497,11 +497,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
exp_nms_classes = [[0, 0, 1, 0]] exp_nms_classes = [[0, 0, 1, 0]]
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
num_detections) = post_processing.batch_multiclass_non_max_suppression( nmsed_additional_fields, num_detections
boxes, scores, score_thresh, iou_thresh, ) = post_processing.batch_multiclass_non_max_suppression(
max_size_per_class=max_output_size, max_total_size=max_output_size) boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size)
self.assertIsNone(nmsed_masks) self.assertIsNone(nmsed_masks)
self.assertIsNone(nmsed_additional_fields)
with self.test_session() as sess: with self.test_session() as sess:
(nmsed_boxes, nmsed_scores, nmsed_classes, (nmsed_boxes, nmsed_scores, nmsed_classes,
...@@ -544,11 +546,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase): ...@@ -544,11 +546,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[1, 0, 0, 0]]) [1, 0, 0, 0]])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
num_detections) = post_processing.batch_multiclass_non_max_suppression( nmsed_additional_fields, num_detections
boxes, scores, score_thresh, iou_thresh, ) = post_processing.batch_multiclass_non_max_suppression(
max_size_per_class=max_output_size, max_total_size=max_output_size) boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size)
self.assertIsNone(nmsed_masks) self.assertIsNone(nmsed_masks)
self.assertIsNone(nmsed_additional_fields)
# Check static shapes # Check static shapes
self.assertAllEqual(nmsed_boxes.shape.as_list(), self.assertAllEqual(nmsed_boxes.shape.as_list(),
exp_nms_corners.shape) exp_nms_corners.shape)
...@@ -616,11 +620,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase): ...@@ -616,11 +620,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[0, 0], [0, 0]]]]) [[0, 0], [0, 0]]]])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
num_detections) = post_processing.batch_multiclass_non_max_suppression( nmsed_additional_fields, num_detections
boxes, scores, score_thresh, iou_thresh, ) = post_processing.batch_multiclass_non_max_suppression(
max_size_per_class=max_output_size, max_total_size=max_output_size, boxes, scores, score_thresh, iou_thresh,
masks=masks) max_size_per_class=max_output_size, max_total_size=max_output_size,
masks=masks)
self.assertIsNone(nmsed_additional_fields)
# Check static shapes # Check static shapes
self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape) self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape) self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
...@@ -639,6 +645,91 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase): ...@@ -639,6 +645,91 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self.assertAllClose(num_detections, [2, 3]) self.assertAllClose(num_detections, [2, 3])
self.assertAllClose(nmsed_masks, exp_nms_masks) self.assertAllClose(nmsed_masks, exp_nms_masks)
def test_batch_multiclass_nms_with_additional_fields(self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
additional_fields = {
'keypoints': tf.constant(
[[[[6, 7], [8, 9]],
[[0, 1], [2, 3]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]],
[[[13, 14], [15, 16]],
[[8, 9], [10, 11]],
[[10, 11], [12, 13]],
[[0, 0], [0, 0]]]],
tf.float32)
}
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = np.array([[[0, 10, 1, 11],
[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 999, 2, 1004],
[0, 10.1, 1, 11.1],
[0, 100, 1, 101],
[0, 0, 0, 0]]])
exp_nms_scores = np.array([[.95, .9, 0, 0],
[.85, .5, .3, 0]])
exp_nms_classes = np.array([[0, 0, 0, 0],
[1, 0, 0, 0]])
exp_nms_additional_fields = {
'keypoints': np.array([[[[0, 0], [0, 0]],
[[6, 7], [8, 9]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]],
[[[10, 11], [12, 13]],
[[13, 14], [15, 16]],
[[8, 9], [10, 11]],
[[0, 0], [0, 0]]]])
}
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
nmsed_additional_fields, num_detections
) = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size,
additional_fields=additional_fields)
self.assertIsNone(nmsed_masks)
# Check static shapes
self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
self.assertEqual(len(nmsed_additional_fields),
len(exp_nms_additional_fields))
for key in exp_nms_additional_fields:
self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(),
exp_nms_additional_fields[key].shape)
self.assertEqual(num_detections.shape.as_list(), [2])
with self.test_session() as sess:
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
nmsed_additional_fields, num_detections])
self.assertAllClose(nmsed_boxes, exp_nms_corners)
self.assertAllClose(nmsed_scores, exp_nms_scores)
self.assertAllClose(nmsed_classes, exp_nms_classes)
for key in exp_nms_additional_fields:
self.assertAllClose(nmsed_additional_fields[key],
exp_nms_additional_fields[key])
self.assertAllClose(num_detections, [2, 3])
def test_batch_multiclass_nms_with_dynamic_batch_size(self): def test_batch_multiclass_nms_with_dynamic_batch_size(self):
boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4)) boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4))
scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2)) scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2))
...@@ -690,11 +781,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase): ...@@ -690,11 +781,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[0, 0], [0, 0]]]]) [[0, 0], [0, 0]]]])
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
num_detections) = post_processing.batch_multiclass_non_max_suppression( nmsed_additional_fields, num_detections
boxes_placeholder, scores_placeholder, score_thresh, iou_thresh, ) = post_processing.batch_multiclass_non_max_suppression(
max_size_per_class=max_output_size, max_total_size=max_output_size, boxes_placeholder, scores_placeholder, score_thresh, iou_thresh,
masks=masks_placeholder) max_size_per_class=max_output_size, max_total_size=max_output_size,
masks=masks_placeholder)
self.assertIsNone(nmsed_additional_fields)
# Check static shapes # Check static shapes
self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4]) self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4])
self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4]) self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4])
...@@ -765,10 +858,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase): ...@@ -765,10 +858,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[0, 0], [0, 0]]]] [[0, 0], [0, 0]]]]
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
num_detections) = post_processing.batch_multiclass_non_max_suppression( nmsed_additional_fields, num_detections
boxes, scores, score_thresh, iou_thresh, ) = post_processing.batch_multiclass_non_max_suppression(
max_size_per_class=max_output_size, max_total_size=max_output_size, boxes, scores, score_thresh, iou_thresh,
num_valid_boxes=num_valid_boxes, masks=masks) max_size_per_class=max_output_size, max_total_size=max_output_size,
num_valid_boxes=num_valid_boxes, masks=masks)
self.assertIsNone(nmsed_additional_fields)
with self.test_session() as sess: with self.test_session() as sess:
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks, (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
...@@ -780,6 +876,84 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase): ...@@ -780,6 +876,84 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self.assertAllClose(num_detections, [1, 1]) self.assertAllClose(num_detections, [1, 1])
self.assertAllClose(nmsed_masks, exp_nms_masks) self.assertAllClose(nmsed_masks, exp_nms_masks)
def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes(
self):
boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
[[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
[[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
[[0, 10, 1, 11], [0, 10, 1, 11]]],
[[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
[[0, 100, 1, 101], [0, 100, 1, 101]],
[[0, 1000, 1, 1002], [0, 999, 2, 1004]],
[[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
tf.float32)
scores = tf.constant([[[.9, 0.01], [.75, 0.05],
[.6, 0.01], [.95, 0]],
[[.5, 0.01], [.3, 0.01],
[.01, .85], [.01, .5]]])
additional_fields = {
'keypoints': tf.constant(
[[[[6, 7], [8, 9]],
[[0, 1], [2, 3]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]],
[[[13, 14], [15, 16]],
[[8, 9], [10, 11]],
[[10, 11], [12, 13]],
[[0, 0], [0, 0]]]],
tf.float32)
}
num_valid_boxes = tf.constant([1, 1], tf.int32)
score_thresh = 0.1
iou_thresh = .5
max_output_size = 4
exp_nms_corners = [[[0, 0, 1, 1],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]],
[[0, 10.1, 1, 11.1],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]]]
exp_nms_scores = [[.9, 0, 0, 0],
[.5, 0, 0, 0]]
exp_nms_classes = [[0, 0, 0, 0],
[0, 0, 0, 0]]
exp_nms_additional_fields = {
'keypoints': np.array([[[[6, 7], [8, 9]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]],
[[[13, 14], [15, 16]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]],
[[0, 0], [0, 0]]]])
}
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
nmsed_additional_fields, num_detections
) = post_processing.batch_multiclass_non_max_suppression(
boxes, scores, score_thresh, iou_thresh,
max_size_per_class=max_output_size, max_total_size=max_output_size,
num_valid_boxes=num_valid_boxes,
additional_fields=additional_fields)
self.assertIsNone(nmsed_masks)
with self.test_session() as sess:
(nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
nmsed_additional_fields, num_detections])
self.assertAllClose(nmsed_boxes, exp_nms_corners)
self.assertAllClose(nmsed_scores, exp_nms_scores)
self.assertAllClose(nmsed_classes, exp_nms_classes)
for key in exp_nms_additional_fields:
self.assertAllClose(nmsed_additional_fields[key],
exp_nms_additional_fields[key])
self.assertAllClose(num_detections, [1, 1])
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -18,6 +18,7 @@ py_library( ...@@ -18,6 +18,7 @@ py_library(
"//tensorflow_models/object_detection/core:model", "//tensorflow_models/object_detection/core:model",
"//tensorflow_models/object_detection/core:target_assigner", "//tensorflow_models/object_detection/core:target_assigner",
"//tensorflow_models/object_detection/utils:shape_utils", "//tensorflow_models/object_detection/utils:shape_utils",
"//tensorflow_models/object_detection/utils:visualization_utils",
], ],
) )
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch.""" """Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
import numpy as np
import tensorflow as tf import tensorflow as tf
from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib
...@@ -46,19 +47,19 @@ class FasterRCNNMetaArchTest( ...@@ -46,19 +47,19 @@ class FasterRCNNMetaArchTest(
mask_height = 2 mask_height = 2
mask_width = 2 mask_width = 2
mask_predictions = .6 * tf.ones( mask_predictions = 30. * tf.ones(
[total_num_padded_proposals, model.num_classes, [total_num_padded_proposals, model.num_classes,
mask_height, mask_width], dtype=tf.float32) mask_height, mask_width], dtype=tf.float32)
exp_detection_masks = [[[[1, 1], [1, 1]], exp_detection_masks = np.array([[[[1, 1], [1, 1]],
[[1, 1], [1, 1]], [[1, 1], [1, 1]],
[[1, 1], [1, 1]], [[1, 1], [1, 1]],
[[1, 1], [1, 1]], [[1, 1], [1, 1]],
[[1, 1], [1, 1]]], [[1, 1], [1, 1]]],
[[[1, 1], [1, 1]], [[[1, 1], [1, 1]],
[[1, 1], [1, 1]], [[1, 1], [1, 1]],
[[1, 1], [1, 1]], [[1, 1], [1, 1]],
[[1, 1], [1, 1]], [[1, 1], [1, 1]],
[[0, 0], [0, 0]]]] [[0, 0], [0, 0]]]])
detections = model.postprocess({ detections = model.postprocess({
'refined_box_encodings': refined_box_encodings, 'refined_box_encodings': refined_box_encodings,
...@@ -79,6 +80,17 @@ class FasterRCNNMetaArchTest( ...@@ -79,6 +80,17 @@ class FasterRCNNMetaArchTest(
self.assertAllClose(detections_out['detection_masks'], self.assertAllClose(detections_out['detection_masks'],
exp_detection_masks) exp_detection_masks)
def _get_box_classifier_features_shape(self,
image_size,
batch_size,
max_num_proposals,
initial_crop_size,
maxpool_stride,
num_features):
return (batch_size * max_num_proposals,
initial_crop_size/maxpool_stride,
initial_crop_size/maxpool_stride,
num_features)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -73,6 +73,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -73,6 +73,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_score_conversion_fn, second_stage_score_conversion_fn,
second_stage_localization_loss_weight, second_stage_localization_loss_weight,
second_stage_classification_loss_weight, second_stage_classification_loss_weight,
second_stage_classification_loss,
hard_example_miner, hard_example_miner,
parallel_iterations=16): parallel_iterations=16):
"""RFCNMetaArch Constructor. """RFCNMetaArch Constructor.
...@@ -149,6 +150,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -149,6 +150,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
used to convert logits to probabilities. used to convert logits to probabilities.
second_stage_localization_loss_weight: A float second_stage_localization_loss_weight: A float
second_stage_classification_loss_weight: A float second_stage_classification_loss_weight: A float
second_stage_classification_loss: A string indicating which loss function
to use, supports 'softmax' and 'sigmoid'.
hard_example_miner: A losses.HardExampleMiner object (can be None). hard_example_miner: A losses.HardExampleMiner object (can be None).
parallel_iterations: (Optional) The number of iterations allowed to run parallel_iterations: (Optional) The number of iterations allowed to run
in parallel for calls to tf.map_fn. in parallel for calls to tf.map_fn.
...@@ -185,6 +188,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -185,6 +188,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_score_conversion_fn, second_stage_score_conversion_fn,
second_stage_localization_loss_weight, second_stage_localization_loss_weight,
second_stage_classification_loss_weight, second_stage_classification_loss_weight,
second_stage_classification_loss,
1.0, # second stage mask prediction loss weight isn't used in R-FCN.
hard_example_miner, hard_example_miner,
parallel_iterations) parallel_iterations)
...@@ -198,10 +203,10 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -198,10 +203,10 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
"""Predicts the output tensors from 2nd stage of FasterRCNN. """Predicts the output tensors from 2nd stage of FasterRCNN.
Args: Args:
rpn_box_encodings: 3-D float tensor of shape rpn_box_encodings: 4-D float tensor of shape
[batch_size, num_valid_anchors, self._box_coder.code_size] containing [batch_size, num_valid_anchors, self._box_coder.code_size] containing
predicted boxes. predicted boxes.
rpn_objectness_predictions_with_background: 3-D float tensor of shape rpn_objectness_predictions_with_background: 2-D float tensor of shape
[batch_size, num_valid_anchors, 2] containing class [batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0). tensor *includes* background class predictions (at class index 0).
...@@ -225,13 +230,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -225,13 +230,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
Note that this tensor *includes* background class predictions Note that this tensor *includes* background class predictions
(at class index 0). (at class index 0).
3) num_proposals: An int32 tensor of shape [batch_size] representing the 3) num_proposals: An int32 tensor of shape [batch_size] representing the
number of proposals generated by the RPN. `num_proposals` allows us number of proposals generated by the RPN. `num_proposals` allows us
to keep track of which entries are to be treated as zero paddings and to keep track of which entries are to be treated as zero paddings and
which are not since we always pad the number of proposals to be which are not since we always pad the number of proposals to be
`self.max_num_proposals` for each image. `self.max_num_proposals` for each image.
4) proposal_boxes: A float32 tensor of shape 4) proposal_boxes: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing [batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes (in absolute coordinates). decoded proposal bounding boxes (in absolute coordinates).
5) proposal_boxes_normalized: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing decoded proposal
bounding boxes (in normalized coordinates). Can be used to override
the boxes proposed by the RPN, thus enabling one to extract box
classification and prediction for externally selected areas of the
image.
6) box_classifier_features: a 4-D float32 tensor, of shape
[batch_size, feature_map_height, feature_map_width, depth],
representing the box classifier features.
""" """
proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn( proposal_boxes_normalized, _, num_proposals = self._postprocess_rpn(
rpn_box_encodings, rpn_objectness_predictions_with_background, rpn_box_encodings, rpn_objectness_predictions_with_background,
...@@ -263,5 +277,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -263,5 +277,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
class_predictions_with_background, class_predictions_with_background,
'num_proposals': num_proposals, 'num_proposals': num_proposals,
'proposal_boxes': absolute_proposal_boxes, 'proposal_boxes': absolute_proposal_boxes,
'box_classifier_features': box_classifier_features,
'proposal_boxes_normalized': proposal_boxes_normalized,
} }
return prediction_dict return prediction_dict
...@@ -51,6 +51,15 @@ class RFCNMetaArchTest( ...@@ -51,6 +51,15 @@ class RFCNMetaArchTest(
return rfcn_meta_arch.RFCNMetaArch( return rfcn_meta_arch.RFCNMetaArch(
second_stage_rfcn_box_predictor=box_predictor, **common_kwargs) second_stage_rfcn_box_predictor=box_predictor, **common_kwargs)
def _get_box_classifier_features_shape(self,
image_size,
batch_size,
max_num_proposals,
initial_crop_size,
maxpool_stride,
num_features):
return (batch_size, image_size, image_size, num_features)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()
...@@ -12,7 +12,6 @@ ...@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""SSD Meta-architecture definition. """SSD Meta-architecture definition.
General tensorflow implementation of convolutional Multibox/SSD detection General tensorflow implementation of convolutional Multibox/SSD detection
...@@ -29,6 +28,7 @@ from object_detection.core import model ...@@ -29,6 +28,7 @@ from object_detection.core import model
from object_detection.core import standard_fields as fields from object_detection.core import standard_fields as fields
from object_detection.core import target_assigner from object_detection.core import target_assigner
from object_detection.utils import shape_utils from object_detection.utils import shape_utils
from object_detection.utils import visualization_utils
slim = tf.contrib.slim slim = tf.contrib.slim
...@@ -37,13 +37,34 @@ class SSDFeatureExtractor(object): ...@@ -37,13 +37,34 @@ class SSDFeatureExtractor(object):
"""SSD Feature Extractor definition.""" """SSD Feature Extractor definition."""
def __init__(self, def __init__(self,
is_training,
depth_multiplier, depth_multiplier,
min_depth, min_depth,
pad_to_multiple,
conv_hyperparams, conv_hyperparams,
batch_norm_trainable=True,
reuse_weights=None): reuse_weights=None):
"""Constructor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: whether to reuse variables. Default is None.
"""
self._is_training = is_training
self._depth_multiplier = depth_multiplier self._depth_multiplier = depth_multiplier
self._min_depth = min_depth self._min_depth = min_depth
self._pad_to_multiple = pad_to_multiple
self._conv_hyperparams = conv_hyperparams self._conv_hyperparams = conv_hyperparams
self._batch_norm_trainable = batch_norm_trainable
self._reuse_weights = reuse_weights self._reuse_weights = reuse_weights
@abstractmethod @abstractmethod
...@@ -101,9 +122,9 @@ class SSDMetaArch(model.DetectionModel): ...@@ -101,9 +122,9 @@ class SSDMetaArch(model.DetectionModel):
add_summaries=True): add_summaries=True):
"""SSDMetaArch Constructor. """SSDMetaArch Constructor.
TODO: group NMS parameters + score converter into TODO: group NMS parameters + score converter into a class and loss
a class and loss parameters into a class and write config protos for parameters into a class and write config protos for postprocessing
postprocessing and losses. and losses.
Args: Args:
is_training: A boolean indicating whether the training version of the is_training: A boolean indicating whether the training version of the
...@@ -204,8 +225,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -204,8 +225,8 @@ class SSDMetaArch(model.DetectionModel):
if inputs.dtype is not tf.float32: if inputs.dtype is not tf.float32:
raise ValueError('`preprocess` expects a tf.float32 tensor') raise ValueError('`preprocess` expects a tf.float32 tensor')
with tf.name_scope('Preprocessor'): with tf.name_scope('Preprocessor'):
# TODO: revisit whether to always use batch size as the number of # TODO: revisit whether to always use batch size as the number of parallel
# parallel iterations vs allow for dynamic batching. # iterations vs allow for dynamic batching.
resized_inputs = tf.map_fn(self._image_resizer_fn, resized_inputs = tf.map_fn(self._image_resizer_fn,
elems=inputs, elems=inputs,
dtype=tf.float32) dtype=tf.float32)
...@@ -226,7 +247,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -226,7 +247,7 @@ class SSDMetaArch(model.DetectionModel):
Returns: Returns:
prediction_dict: a dictionary holding "raw" prediction tensors: prediction_dict: a dictionary holding "raw" prediction tensors:
1) box_encodings: 3-D float tensor of shape [batch_size, num_anchors, 1) box_encodings: 4-D float tensor of shape [batch_size, num_anchors,
box_code_dimension] containing predicted boxes. box_code_dimension] containing predicted boxes.
2) class_predictions_with_background: 3-D float tensor of shape 2) class_predictions_with_background: 3-D float tensor of shape
[batch_size, num_anchors, num_classes+1] containing class predictions [batch_size, num_anchors, num_classes+1] containing class predictions
...@@ -234,19 +255,26 @@ class SSDMetaArch(model.DetectionModel): ...@@ -234,19 +255,26 @@ class SSDMetaArch(model.DetectionModel):
background class predictions (at class index 0). background class predictions (at class index 0).
3) feature_maps: a list of tensors where the ith tensor has shape 3) feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i]. [batch, height_i, width_i, depth_i].
4) anchors: 2-D float tensor of shape [num_anchors, 4] containing
the generated anchors in normalized coordinates.
""" """
with tf.variable_scope(None, self._extract_features_scope, with tf.variable_scope(None, self._extract_features_scope,
[preprocessed_inputs]): [preprocessed_inputs]):
feature_maps = self._feature_extractor.extract_features( feature_maps = self._feature_extractor.extract_features(
preprocessed_inputs) preprocessed_inputs)
feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps) feature_map_spatial_dims = self._get_feature_map_spatial_dims(feature_maps)
self._anchors = self._anchor_generator.generate(feature_map_spatial_dims) image_shape = tf.shape(preprocessed_inputs)
self._anchors = self._anchor_generator.generate(
feature_map_spatial_dims,
im_height=image_shape[1],
im_width=image_shape[2])
(box_encodings, class_predictions_with_background (box_encodings, class_predictions_with_background
) = self._add_box_predictions_to_feature_maps(feature_maps) ) = self._add_box_predictions_to_feature_maps(feature_maps)
predictions_dict = { predictions_dict = {
'box_encodings': box_encodings, 'box_encodings': box_encodings,
'class_predictions_with_background': class_predictions_with_background, 'class_predictions_with_background': class_predictions_with_background,
'feature_maps': feature_maps 'feature_maps': feature_maps,
'anchors': self._anchors.get()
} }
return predictions_dict return predictions_dict
...@@ -351,9 +379,11 @@ class SSDMetaArch(model.DetectionModel): ...@@ -351,9 +379,11 @@ class SSDMetaArch(model.DetectionModel):
Returns: Returns:
detections: a dictionary containing the following fields detections: a dictionary containing the following fields
detection_boxes: [batch, max_detection, 4] detection_boxes: [batch, max_detections, 4]
detection_scores: [batch, max_detections] detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections] detection_classes: [batch, max_detections]
detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
encoded in the prediction_dict 'box_encodings')
num_detections: [batch] num_detections: [batch]
Raises: Raises:
ValueError: if prediction_dict does not contain `box_encodings` or ValueError: if prediction_dict does not contain `box_encodings` or
...@@ -365,7 +395,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -365,7 +395,7 @@ class SSDMetaArch(model.DetectionModel):
with tf.name_scope('Postprocessor'): with tf.name_scope('Postprocessor'):
box_encodings = prediction_dict['box_encodings'] box_encodings = prediction_dict['box_encodings']
class_predictions = prediction_dict['class_predictions_with_background'] class_predictions = prediction_dict['class_predictions_with_background']
detection_boxes = self._batch_decode(box_encodings) detection_boxes, detection_keypoints = self._batch_decode(box_encodings)
detection_boxes = tf.expand_dims(detection_boxes, axis=2) detection_boxes = tf.expand_dims(detection_boxes, axis=2)
class_predictions_without_background = tf.slice(class_predictions, class_predictions_without_background = tf.slice(class_predictions,
...@@ -374,14 +404,25 @@ class SSDMetaArch(model.DetectionModel): ...@@ -374,14 +404,25 @@ class SSDMetaArch(model.DetectionModel):
detection_scores = self._score_conversion_fn( detection_scores = self._score_conversion_fn(
class_predictions_without_background) class_predictions_without_background)
clip_window = tf.constant([0, 0, 1, 1], tf.float32) clip_window = tf.constant([0, 0, 1, 1], tf.float32)
(nmsed_boxes, nmsed_scores, nmsed_classes, _, additional_fields = None
num_detections) = self._non_max_suppression_fn(detection_boxes, if detection_keypoints is not None:
detection_scores, additional_fields = {
clip_window=clip_window) fields.BoxListFields.keypoints: detection_keypoints}
return {'detection_boxes': nmsed_boxes, (nmsed_boxes, nmsed_scores, nmsed_classes, _, nmsed_additional_fields,
'detection_scores': nmsed_scores, num_detections) = self._non_max_suppression_fn(
'detection_classes': nmsed_classes, detection_boxes,
'num_detections': tf.to_float(num_detections)} detection_scores,
clip_window=clip_window,
additional_fields=additional_fields)
detection_dict = {'detection_boxes': nmsed_boxes,
'detection_scores': nmsed_scores,
'detection_classes': nmsed_classes,
'num_detections': tf.to_float(num_detections)}
if (nmsed_additional_fields is not None and
fields.BoxListFields.keypoints in nmsed_additional_fields):
detection_dict['detection_keypoints'] = nmsed_additional_fields[
fields.BoxListFields.keypoints]
return detection_dict
def loss(self, prediction_dict, scope=None): def loss(self, prediction_dict, scope=None):
"""Compute scalar loss tensors with respect to provided groundtruth. """Compute scalar loss tensors with respect to provided groundtruth.
...@@ -395,7 +436,7 @@ class SSDMetaArch(model.DetectionModel): ...@@ -395,7 +436,7 @@ class SSDMetaArch(model.DetectionModel):
box_code_dimension] containing predicted boxes. box_code_dimension] containing predicted boxes.
2) class_predictions_with_background: 3-D float tensor of shape 2) class_predictions_with_background: 3-D float tensor of shape
[batch_size, num_anchors, num_classes+1] containing class predictions [batch_size, num_anchors, num_classes+1] containing class predictions
(logits) for each of the anchors. Note that this tensor *includes* (logits) for each of the anchors. Note that this tensor *includes*
background class predictions. background class predictions.
scope: Optional scope name. scope: Optional scope name.
...@@ -405,10 +446,14 @@ class SSDMetaArch(model.DetectionModel): ...@@ -405,10 +446,14 @@ class SSDMetaArch(model.DetectionModel):
values. values.
""" """
with tf.name_scope(scope, 'Loss', prediction_dict.values()): with tf.name_scope(scope, 'Loss', prediction_dict.values()):
keypoints = None
if self.groundtruth_has_field(fields.BoxListFields.keypoints):
keypoints = self.groundtruth_lists(fields.BoxListFields.keypoints)
(batch_cls_targets, batch_cls_weights, batch_reg_targets, (batch_cls_targets, batch_cls_weights, batch_reg_targets,
batch_reg_weights, match_list) = self._assign_targets( batch_reg_weights, match_list) = self._assign_targets(
self.groundtruth_lists(fields.BoxListFields.boxes), self.groundtruth_lists(fields.BoxListFields.boxes),
self.groundtruth_lists(fields.BoxListFields.classes)) self.groundtruth_lists(fields.BoxListFields.classes),
keypoints)
if self._add_summaries: if self._add_summaries:
self._summarize_input( self._summarize_input(
self.groundtruth_lists(fields.BoxListFields.boxes), match_list) self.groundtruth_lists(fields.BoxListFields.boxes), match_list)
...@@ -417,35 +462,60 @@ class SSDMetaArch(model.DetectionModel): ...@@ -417,35 +462,60 @@ class SSDMetaArch(model.DetectionModel):
location_losses = self._localization_loss( location_losses = self._localization_loss(
prediction_dict['box_encodings'], prediction_dict['box_encodings'],
batch_reg_targets, batch_reg_targets,
ignore_nan_targets=True,
weights=batch_reg_weights) weights=batch_reg_weights)
cls_losses = self._classification_loss( cls_losses = self._classification_loss(
prediction_dict['class_predictions_with_background'], prediction_dict['class_predictions_with_background'],
batch_cls_targets, batch_cls_targets,
weights=batch_cls_weights) weights=batch_cls_weights)
# Optionally apply hard mining on top of loss values
localization_loss = tf.reduce_sum(location_losses)
classification_loss = tf.reduce_sum(cls_losses)
if self._hard_example_miner: if self._hard_example_miner:
(localization_loss, classification_loss) = self._apply_hard_mining( (localization_loss, classification_loss) = self._apply_hard_mining(
location_losses, cls_losses, prediction_dict, match_list) location_losses, cls_losses, prediction_dict, match_list)
if self._add_summaries: if self._add_summaries:
self._hard_example_miner.summarize() self._hard_example_miner.summarize()
else:
if self._add_summaries:
class_ids = tf.argmax(batch_cls_targets, axis=2)
flattened_class_ids = tf.reshape(class_ids, [-1])
flattened_classification_losses = tf.reshape(cls_losses, [-1])
self._summarize_anchor_classification_loss(
flattened_class_ids, flattened_classification_losses)
localization_loss = tf.reduce_sum(location_losses)
classification_loss = tf.reduce_sum(cls_losses)
# Optionally normalize by number of positive matches # Optionally normalize by number of positive matches
normalizer = tf.constant(1.0, dtype=tf.float32) normalizer = tf.constant(1.0, dtype=tf.float32)
if self._normalize_loss_by_num_matches: if self._normalize_loss_by_num_matches:
normalizer = tf.maximum(tf.to_float(tf.reduce_sum(num_matches)), 1.0) normalizer = tf.maximum(tf.to_float(tf.reduce_sum(num_matches)), 1.0)
with tf.name_scope('localization_loss'):
localization_loss = ((self._localization_loss_weight / normalizer) *
localization_loss)
with tf.name_scope('classification_loss'):
classification_loss = ((self._classification_loss_weight / normalizer) *
classification_loss)
loss_dict = { loss_dict = {
'localization_loss': (self._localization_loss_weight / normalizer) * 'localization_loss': localization_loss,
localization_loss, 'classification_loss': classification_loss
'classification_loss': (self._classification_loss_weight /
normalizer) * classification_loss
} }
return loss_dict return loss_dict
def _assign_targets(self, groundtruth_boxes_list, groundtruth_classes_list): def _summarize_anchor_classification_loss(self, class_ids, cls_losses):
positive_indices = tf.where(tf.greater(class_ids, 0))
positive_anchor_cls_loss = tf.squeeze(
tf.gather(cls_losses, positive_indices), axis=1)
visualization_utils.add_cdf_image_summary(positive_anchor_cls_loss,
'PositiveAnchorLossCDF')
negative_indices = tf.where(tf.equal(class_ids, 0))
negative_anchor_cls_loss = tf.squeeze(
tf.gather(cls_losses, negative_indices), axis=1)
visualization_utils.add_cdf_image_summary(negative_anchor_cls_loss,
'NegativeAnchorLossCDF')
def _assign_targets(self, groundtruth_boxes_list, groundtruth_classes_list,
groundtruth_keypoints_list=None):
"""Assign groundtruth targets. """Assign groundtruth targets.
Adds a background class to each one-hot encoding of groundtruth classes Adds a background class to each one-hot encoding of groundtruth classes
...@@ -460,6 +530,8 @@ class SSDMetaArch(model.DetectionModel): ...@@ -460,6 +530,8 @@ class SSDMetaArch(model.DetectionModel):
groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of
shape [num_boxes, num_classes] containing the class targets with the 0th shape [num_boxes, num_classes] containing the class targets with the 0th
index assumed to map to the first non-background class. index assumed to map to the first non-background class.
groundtruth_keypoints_list: (optional) a list of 3-D tensors of shape
[num_boxes, num_keypoints, 2]
Returns: Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors, batch_cls_targets: a tensor with shape [batch_size, num_anchors,
...@@ -480,6 +552,10 @@ class SSDMetaArch(model.DetectionModel): ...@@ -480,6 +552,10 @@ class SSDMetaArch(model.DetectionModel):
tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT') tf.pad(one_hot_encoding, [[0, 0], [1, 0]], mode='CONSTANT')
for one_hot_encoding in groundtruth_classes_list for one_hot_encoding in groundtruth_classes_list
] ]
if groundtruth_keypoints_list is not None:
for boxlist, keypoints in zip(
groundtruth_boxlists, groundtruth_keypoints_list):
boxlist.add_field(fields.BoxListFields.keypoints, keypoints)
return target_assigner.batch_assign_targets( return target_assigner.batch_assign_targets(
self._target_assigner, self.anchors, groundtruth_boxlists, self._target_assigner, self.anchors, groundtruth_boxlists,
groundtruth_classes_with_background_list) groundtruth_classes_with_background_list)
...@@ -544,12 +620,11 @@ class SSDMetaArch(model.DetectionModel): ...@@ -544,12 +620,11 @@ class SSDMetaArch(model.DetectionModel):
mined_cls_loss: a float scalar with sum of classification losses from mined_cls_loss: a float scalar with sum of classification losses from
selected hard examples. selected hard examples.
""" """
class_pred_shape = [-1, self.anchors.num_boxes_static(), self.num_classes] class_predictions = tf.slice(
class_predictions = tf.reshape( prediction_dict['class_predictions_with_background'], [0, 0,
tf.slice(prediction_dict['class_predictions_with_background'], 1], [-1, -1, -1])
[0, 0, 1], class_pred_shape), class_pred_shape)
decoded_boxes = self._batch_decode(prediction_dict['box_encodings']) decoded_boxes, _ = self._batch_decode(prediction_dict['box_encodings'])
decoded_box_tensors_list = tf.unstack(decoded_boxes) decoded_box_tensors_list = tf.unstack(decoded_boxes)
class_prediction_list = tf.unstack(class_predictions) class_prediction_list = tf.unstack(class_predictions)
decoded_boxlist_list = [] decoded_boxlist_list = []
...@@ -574,6 +649,9 @@ class SSDMetaArch(model.DetectionModel): ...@@ -574,6 +649,9 @@ class SSDMetaArch(model.DetectionModel):
Returns: Returns:
decoded_boxes: A float32 tensor of shape decoded_boxes: A float32 tensor of shape
[batch_size, num_anchors, 4] containing the decoded boxes. [batch_size, num_anchors, 4] containing the decoded boxes.
decoded_keypoints: A float32 tensor of shape
[batch_size, num_anchors, num_keypoints, 2] containing the decoded
keypoints if present in the input `box_encodings`, None otherwise.
""" """
combined_shape = shape_utils.combined_static_and_dynamic_shape( combined_shape = shape_utils.combined_static_and_dynamic_shape(
box_encodings) box_encodings)
...@@ -581,13 +659,21 @@ class SSDMetaArch(model.DetectionModel): ...@@ -581,13 +659,21 @@ class SSDMetaArch(model.DetectionModel):
tiled_anchor_boxes = tf.tile( tiled_anchor_boxes = tf.tile(
tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1]) tf.expand_dims(self.anchors.get(), 0), [batch_size, 1, 1])
tiled_anchors_boxlist = box_list.BoxList( tiled_anchors_boxlist = box_list.BoxList(
tf.reshape(tiled_anchor_boxes, [-1, self._box_coder.code_size])) tf.reshape(tiled_anchor_boxes, [-1, 4]))
decoded_boxes = self._box_coder.decode( decoded_boxes = self._box_coder.decode(
tf.reshape(box_encodings, [-1, self._box_coder.code_size]), tf.reshape(box_encodings, [-1, self._box_coder.code_size]),
tiled_anchors_boxlist) tiled_anchors_boxlist)
return tf.reshape(decoded_boxes.get(), decoded_keypoints = None
tf.stack([combined_shape[0], combined_shape[1], if decoded_boxes.has_field(fields.BoxListFields.keypoints):
4])) decoded_keypoints = decoded_boxes.get_field(
fields.BoxListFields.keypoints)
num_keypoints = decoded_keypoints.get_shape()[1]
decoded_keypoints = tf.reshape(
decoded_keypoints,
tf.stack([combined_shape[0], combined_shape[1], num_keypoints, 2]))
decoded_boxes = tf.reshape(decoded_boxes.get(), tf.stack(
[combined_shape[0], combined_shape[1], 4]))
return decoded_boxes, decoded_keypoints
def restore_map(self, from_detection_checkpoint=True): def restore_map(self, from_detection_checkpoint=True):
"""Returns a map of variables to load from a foreign checkpoint. """Returns a map of variables to load from a foreign checkpoint.
......
...@@ -18,7 +18,6 @@ import functools ...@@ -18,7 +18,6 @@ import functools
import numpy as np import numpy as np
import tensorflow as tf import tensorflow as tf
from tensorflow.python.training import saver as tf_saver
from object_detection.core import anchor_generator from object_detection.core import anchor_generator
from object_detection.core import box_list from object_detection.core import box_list
from object_detection.core import losses from object_detection.core import losses
...@@ -34,7 +33,12 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor): ...@@ -34,7 +33,12 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
def __init__(self): def __init__(self):
super(FakeSSDFeatureExtractor, self).__init__( super(FakeSSDFeatureExtractor, self).__init__(
depth_multiplier=0, min_depth=0, conv_hyperparams=None) is_training=True,
depth_multiplier=0,
min_depth=0,
pad_to_multiple=1,
batch_norm_trainable=True,
conv_hyperparams=None)
def preprocess(self, resized_inputs): def preprocess(self, resized_inputs):
return tf.identity(resized_inputs) return tf.identity(resized_inputs)
...@@ -55,7 +59,7 @@ class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator): ...@@ -55,7 +59,7 @@ class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
def num_anchors_per_location(self): def num_anchors_per_location(self):
return [1] return [1]
def _generate(self, feature_map_shape_list): def _generate(self, feature_map_shape_list, im_height, im_width):
return box_list.BoxList( return box_list.BoxList(
tf.constant([[0, 0, .5, .5], tf.constant([[0, 0, .5, .5],
[0, .5, .5, 1], [0, .5, .5, 1],
...@@ -147,6 +151,7 @@ class SsdMetaArchTest(tf.test.TestCase): ...@@ -147,6 +151,7 @@ class SsdMetaArchTest(tf.test.TestCase):
self.assertTrue('box_encodings' in prediction_dict) self.assertTrue('box_encodings' in prediction_dict)
self.assertTrue('class_predictions_with_background' in prediction_dict) self.assertTrue('class_predictions_with_background' in prediction_dict)
self.assertTrue('feature_maps' in prediction_dict) self.assertTrue('feature_maps' in prediction_dict)
self.assertTrue('anchors' in prediction_dict)
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
with self.test_session(graph=tf_graph) as sess: with self.test_session(graph=tf_graph) as sess:
...@@ -242,7 +247,7 @@ class SsdMetaArchTest(tf.test.TestCase): ...@@ -242,7 +247,7 @@ class SsdMetaArchTest(tf.test.TestCase):
def test_restore_map_for_detection_ckpt(self): def test_restore_map_for_detection_ckpt(self):
init_op = tf.global_variables_initializer() init_op = tf.global_variables_initializer()
saver = tf_saver.Saver() saver = tf.train.Saver()
save_path = self.get_temp_dir() save_path = self.get_temp_dir()
with self.test_session() as sess: with self.test_session() as sess:
sess.run(init_op) sess.run(init_op)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment