Commit 0ba5a72b authored by TF Object Detection Team's avatar TF Object Detection Team
Browse files

Merge pull request #8895 from syiming:adjust_frcnn_meta_arch_to_multilevel_rpn_feature

PiperOrigin-RevId: 325370846
parents 80a6318b 18d95442
...@@ -524,9 +524,31 @@ def _build_faster_rcnn_keras_feature_extractor( ...@@ -524,9 +524,31 @@ def _build_faster_rcnn_keras_feature_extractor(
feature_type)) feature_type))
feature_extractor_class = FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[ feature_extractor_class = FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[
feature_type] feature_type]
kwargs = {}
if feature_extractor_config.HasField('conv_hyperparams'):
kwargs.update({
'conv_hyperparams':
hyperparams_builder.KerasLayerHyperparams(
feature_extractor_config.conv_hyperparams),
'override_base_feature_extractor_hyperparams':
feature_extractor_config.override_base_feature_extractor_hyperparams
})
if feature_extractor_config.HasField('fpn'):
kwargs.update({
'fpn_min_level':
feature_extractor_config.fpn.min_level,
'fpn_max_level':
feature_extractor_config.fpn.max_level,
'additional_layer_depth':
feature_extractor_config.fpn.additional_layer_depth,
})
return feature_extractor_class( return feature_extractor_class(
is_training, first_stage_features_stride, is_training, first_stage_features_stride,
batch_norm_trainable) batch_norm_trainable, **kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries): def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
......
...@@ -310,6 +310,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -310,6 +310,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
def _compute_second_stage_input_feature_maps(self, features_to_crop, def _compute_second_stage_input_feature_maps(self, features_to_crop,
proposal_boxes_normalized, proposal_boxes_normalized,
image_shape,
context_features, context_features,
valid_context_size): valid_context_size):
"""Crops to a set of proposals from the feature map for a batch of images. """Crops to a set of proposals from the feature map for a batch of images.
...@@ -324,6 +325,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -324,6 +325,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
proposal_boxes_normalized: A float32 Tensor with shape [batch_size, proposal_boxes_normalized: A float32 Tensor with shape [batch_size,
num_proposals, box_code_size] containing proposal boxes in normalized num_proposals, box_code_size] containing proposal boxes in normalized
coordinates. coordinates.
image_shape: A 1D int32 tensors of size [4] containing the image shape.
context_features: A float Tensor of shape [batch_size, context_size, context_features: A float Tensor of shape [batch_size, context_size,
num_context_features]. num_context_features].
valid_context_size: A int32 Tensor of shape [batch_size]. valid_context_size: A int32 Tensor of shape [batch_size].
...@@ -331,9 +333,9 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -331,9 +333,9 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
Returns: Returns:
A float32 Tensor with shape [K, new_height, new_width, depth]. A float32 Tensor with shape [K, new_height, new_width, depth].
""" """
del image_shape
box_features = self._crop_and_resize_fn( box_features = self._crop_and_resize_fn(
[features_to_crop], proposal_boxes_normalized, None, features_to_crop, proposal_boxes_normalized, None,
[self._initial_crop_size, self._initial_crop_size]) [self._initial_crop_size, self._initial_crop_size])
attention_features = self._context_feature_extract_fn( attention_features = self._context_feature_extract_fn(
......
...@@ -529,7 +529,8 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase): ...@@ -529,7 +529,8 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
(rpn_box_predictor_features, rpn_box_encodings, refined_box_encodings, (rpn_box_predictor_features, rpn_box_encodings, refined_box_encodings,
proposal_boxes_normalized, proposal_boxes) = execute_fn(graph_fn, [], proposal_boxes_normalized, proposal_boxes) = execute_fn(graph_fn, [],
graph=g) graph=g)
self.assertAllEqual(rpn_box_predictor_features.shape, [2, 20, 20, 512]) self.assertAllEqual(len(rpn_box_predictor_features), 1)
self.assertAllEqual(rpn_box_predictor_features[0].shape, [2, 20, 20, 512])
self.assertAllEqual(rpn_box_encodings.shape, [2, 3600, 4]) self.assertAllEqual(rpn_box_encodings.shape, [2, 3600, 4])
self.assertAllEqual(refined_box_encodings.shape, [16, 42, 4]) self.assertAllEqual(refined_box_encodings.shape, [16, 42, 4])
self.assertAllEqual(proposal_boxes_normalized.shape, [2, 8, 4]) self.assertAllEqual(proposal_boxes_normalized.shape, [2, 8, 4])
......
...@@ -99,7 +99,6 @@ import functools ...@@ -99,7 +99,6 @@ import functools
import tensorflow.compat.v1 as tf import tensorflow.compat.v1 as tf
import tf_slim as slim import tf_slim as slim
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.builders import box_predictor_builder from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder from object_detection.builders import hyperparams_builder
from object_detection.core import box_list from object_detection.core import box_list
...@@ -451,11 +450,6 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -451,11 +450,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
# in the future. # in the future.
super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes) super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
if not isinstance(first_stage_anchor_generator,
grid_anchor_generator.GridAnchorGenerator):
raise ValueError('first_stage_anchor_generator must be of type '
'grid_anchor_generator.GridAnchorGenerator.')
self._is_training = is_training self._is_training = is_training
self._image_resizer_fn = image_resizer_fn self._image_resizer_fn = image_resizer_fn
self._resize_masks = resize_masks self._resize_masks = resize_masks
...@@ -492,9 +486,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -492,9 +486,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
hyperparams_builder.KerasLayerHyperparams): hyperparams_builder.KerasLayerHyperparams):
num_anchors_per_location = ( num_anchors_per_location = (
self._first_stage_anchor_generator.num_anchors_per_location()) self._first_stage_anchor_generator.num_anchors_per_location())
if len(num_anchors_per_location) != 1:
raise ValueError('anchor_generator is expected to generate anchors '
'corresponding to a single feature map.')
conv_hyperparams = ( conv_hyperparams = (
first_stage_box_predictor_arg_scope_fn) first_stage_box_predictor_arg_scope_fn)
self._first_stage_box_predictor_first_conv = ( self._first_stage_box_predictor_first_conv = (
...@@ -533,11 +525,10 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -533,11 +525,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
else: else:
self._first_stage_box_predictor_arg_scope_fn = ( self._first_stage_box_predictor_arg_scope_fn = (
first_stage_box_predictor_arg_scope_fn) first_stage_box_predictor_arg_scope_fn)
def rpn_box_predictor_feature_extractor(rpn_features_to_crop): def rpn_box_predictor_feature_extractor(single_rpn_features_to_crop):
with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()): with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()):
reuse = tf.get_variable_scope().reuse
return slim.conv2d( return slim.conv2d(
rpn_features_to_crop, single_rpn_features_to_crop,
self._first_stage_box_predictor_depth, self._first_stage_box_predictor_depth,
kernel_size=[ kernel_size=[
self._first_stage_box_predictor_kernel_size, self._first_stage_box_predictor_kernel_size,
...@@ -546,7 +537,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -546,7 +537,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
rate=self._first_stage_atrous_rate, rate=self._first_stage_atrous_rate,
activation_fn=tf.nn.relu6, activation_fn=tf.nn.relu6,
scope='Conv', scope='Conv',
reuse=reuse) reuse=tf.AUTO_REUSE)
self._first_stage_box_predictor_first_conv = ( self._first_stage_box_predictor_first_conv = (
rpn_box_predictor_feature_extractor) rpn_box_predictor_feature_extractor)
self._first_stage_box_predictor = ( self._first_stage_box_predictor = (
...@@ -762,10 +753,10 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -762,10 +753,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns: Returns:
prediction_dict: a dictionary holding "raw" prediction tensors: prediction_dict: a dictionary holding "raw" prediction tensors:
1) rpn_box_predictor_features: A 4-D float32 tensor with shape 1) rpn_box_predictor_features: A list of 4-D float32 tensor with shape
[batch_size, height, width, depth] to be used for predicting proposal [batch_size, height_i, width_j, depth] to be used for predicting
boxes and corresponding objectness scores. proposal boxes and corresponding objectness scores.
2) rpn_features_to_crop: A 4-D float32 tensor with shape 2) rpn_features_to_crop: A list of 4-D float32 tensor with shape
[batch_size, height, width, depth] representing image features to crop [batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN. using the proposal boxes predicted by the RPN.
3) image_shape: a 1-D tensor of shape [4] representing the input 3) image_shape: a 1-D tensor of shape [4] representing the input
...@@ -850,12 +841,12 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -850,12 +841,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns: Returns:
prediction_dict: a dictionary holding "raw" prediction tensors: prediction_dict: a dictionary holding "raw" prediction tensors:
1) rpn_box_predictor_features: A 4-D float32/bfloat16 tensor with shape 1) rpn_box_predictor_features: A list of 4-D float32/bfloat16 tensor
[batch_size, height, width, depth] to be used for predicting proposal with shape [batch_size, height_i, width_j, depth] to be used for
boxes and corresponding objectness scores. predicting proposal boxes and corresponding objectness scores.
2) rpn_features_to_crop: A 4-D float32/bfloat16 tensor with shape 2) rpn_features_to_crop: A list of 4-D float32/bfloat16 tensor with
[batch_size, height, width, depth] representing image features to crop shape [batch_size, height, width, depth] representing image features
using the proposal boxes predicted by the RPN. to crop using the proposal boxes predicted by the RPN.
3) image_shape: a 1-D tensor of shape [4] representing the input 3) image_shape: a 1-D tensor of shape [4] representing the input
image shape. image shape.
4) rpn_box_encodings: 3-D float32 tensor of shape 4) rpn_box_encodings: 3-D float32 tensor of shape
...@@ -911,7 +902,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -911,7 +902,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
dtype=tf.float32), dtype=tf.float32),
'anchors': 'anchors':
anchors_boxlist.data['boxes'], anchors_boxlist.data['boxes'],
fields.PredictionFields.feature_maps: [rpn_features_to_crop] fields.PredictionFields.feature_maps: rpn_features_to_crop
} }
return prediction_dict return prediction_dict
...@@ -947,9 +938,9 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -947,9 +938,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
[batch_size, num_valid_anchors, 2] containing class [batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0). tensor *includes* background class predictions (at class index 0).
rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape rpn_features_to_crop: A list of 4-D float32 or bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to crop [batch_size, height_i, width_i, depth] representing image features to
using the proposal boxes predicted by the RPN. crop using the proposal boxes predicted by the RPN.
anchors: 2-D float tensor of shape anchors: 2-D float tensor of shape
[num_anchors, self._box_coder.code_size]. [num_anchors, self._box_coder.code_size].
image_shape: A 1D int32 tensors of size [4] containing the image shape. image_shape: A 1D int32 tensors of size [4] containing the image shape.
...@@ -1012,9 +1003,9 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1012,9 +1003,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
"""Predicts the output tensors from second stage of Faster R-CNN. """Predicts the output tensors from second stage of Faster R-CNN.
Args: Args:
rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape rpn_features_to_crop: A list 4-D float32 or bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to crop [batch_size, height_i, width_i, depth] representing image features to
using the proposal boxes predicted by the RPN. crop using the proposal boxes predicted by the RPN.
proposal_boxes_normalized: A float tensor with shape [batch_size, proposal_boxes_normalized: A float tensor with shape [batch_size,
max_num_proposals, 4] representing the (potentially zero padded) max_num_proposals, 4] representing the (potentially zero padded)
proposal boxes for all images in the batch. These boxes are represented proposal boxes for all images in the batch. These boxes are represented
...@@ -1064,7 +1055,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1064,7 +1055,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
""" """
flattened_proposal_feature_maps = ( flattened_proposal_feature_maps = (
self._compute_second_stage_input_feature_maps( self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, proposal_boxes_normalized, **side_inputs)) rpn_features_to_crop, proposal_boxes_normalized,
image_shape, **side_inputs))
box_classifier_features = self._extract_box_classifier_features( box_classifier_features = self._extract_box_classifier_features(
flattened_proposal_feature_maps) flattened_proposal_feature_maps)
...@@ -1196,6 +1188,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1196,6 +1188,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
decoded proposal bounding boxes in absolute coordinates. decoded proposal bounding boxes in absolute coordinates.
5) box_classifier_features: a 4-D float32 tensor representing the 5) box_classifier_features: a 4-D float32 tensor representing the
features for each proposal. features for each proposal.
6) image_shape: a 1-D tensor of shape [4] representing the input
image shape.
image_shapes: A 2-D int32 tensors of shape [batch_size, 3] containing image_shapes: A 2-D int32 tensors of shape [batch_size, 3] containing
shapes of images in the batch. shapes of images in the batch.
...@@ -1234,11 +1228,12 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1234,11 +1228,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
detection_classes = detections_dict[ detection_classes = detections_dict[
fields.DetectionResultFields.detection_classes] fields.DetectionResultFields.detection_classes]
rpn_features_to_crop = prediction_dict['rpn_features_to_crop'] rpn_features_to_crop = prediction_dict['rpn_features_to_crop']
image_shape = prediction_dict['image_shape']
batch_size = tf.shape(detection_boxes)[0] batch_size = tf.shape(detection_boxes)[0]
max_detection = tf.shape(detection_boxes)[1] max_detection = tf.shape(detection_boxes)[1]
flattened_detected_feature_maps = ( flattened_detected_feature_maps = (
self._compute_second_stage_input_feature_maps( self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, detection_boxes)) rpn_features_to_crop, detection_boxes, image_shape))
curr_box_classifier_features = self._extract_box_classifier_features( curr_box_classifier_features = self._extract_box_classifier_features(
flattened_detected_feature_maps) flattened_detected_feature_maps)
...@@ -1302,13 +1297,13 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1302,13 +1297,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
preprocessed_inputs: a [batch, height, width, channels] image tensor. preprocessed_inputs: a [batch, height, width, channels] image tensor.
Returns: Returns:
rpn_box_predictor_features: A 4-D float32 tensor with shape rpn_box_predictor_features: A list of 4-D float32 tensor with shape
[batch, height, width, depth] to be used for predicting proposal boxes [batch, height_i, width_j, depth] to be used for predicting proposal
and corresponding objectness scores. boxes and corresponding objectness scores.
rpn_features_to_crop: A 4-D float32 tensor with shape rpn_features_to_crop: A list of 4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using [batch, height, width, depth] representing image features to crop using
the proposals boxes. the proposals boxes.
anchors: A BoxList representing anchors (for the RPN) in anchors: A list of BoxList representing anchors (for the RPN) in
absolute coordinates. absolute coordinates.
image_shape: A 1-D tensor representing the input image shape. image_shape: A 1-D tensor representing the input image shape.
""" """
...@@ -1317,12 +1312,21 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1317,12 +1312,21 @@ class FasterRCNNMetaArch(model.DetectionModel):
rpn_features_to_crop, self.endpoints = self._extract_proposal_features( rpn_features_to_crop, self.endpoints = self._extract_proposal_features(
preprocessed_inputs) preprocessed_inputs)
feature_map_shape = tf.shape(rpn_features_to_crop) # Decide if rpn_features_to_crop is a list. If not make it a list
if not isinstance(rpn_features_to_crop, list):
rpn_features_to_crop = [rpn_features_to_crop]
feature_map_shapes = []
rpn_box_predictor_features = []
for single_rpn_features_to_crop in rpn_features_to_crop:
single_shape = tf.shape(single_rpn_features_to_crop)
feature_map_shapes.append((single_shape[1], single_shape[2]))
single_rpn_box_predictor_features = (
self._first_stage_box_predictor_first_conv(
single_rpn_features_to_crop))
rpn_box_predictor_features.append(single_rpn_box_predictor_features)
anchors = box_list_ops.concatenate( anchors = box_list_ops.concatenate(
self._first_stage_anchor_generator.generate([(feature_map_shape[1], self._first_stage_anchor_generator.generate(feature_map_shapes))
feature_map_shape[2])]))
rpn_box_predictor_features = (
self._first_stage_box_predictor_first_conv(rpn_features_to_crop))
return (rpn_box_predictor_features, rpn_features_to_crop, return (rpn_box_predictor_features, rpn_features_to_crop,
anchors, image_shape) anchors, image_shape)
...@@ -1349,9 +1353,9 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1349,9 +1353,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
Note resulting tensors will not have been postprocessed. Note resulting tensors will not have been postprocessed.
Args: Args:
rpn_box_predictor_features: A 4-D float32 tensor with shape rpn_box_predictor_features: A list of 4-D float32 tensor with shape
[batch, height, width, depth] to be used for predicting proposal boxes [batch, height_i, width_j, depth] to be used for predicting proposal
and corresponding objectness scores. boxes and corresponding objectness scores.
Returns: Returns:
box_encodings: 3-D float tensor of shape box_encodings: 3-D float tensor of shape
...@@ -1369,15 +1373,13 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1369,15 +1373,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
""" """
num_anchors_per_location = ( num_anchors_per_location = (
self._first_stage_anchor_generator.num_anchors_per_location()) self._first_stage_anchor_generator.num_anchors_per_location())
if len(num_anchors_per_location) != 1:
raise RuntimeError('anchor_generator is expected to generate anchors '
'corresponding to a single feature map.')
if self._first_stage_box_predictor.is_keras_model: if self._first_stage_box_predictor.is_keras_model:
box_predictions = self._first_stage_box_predictor( box_predictions = self._first_stage_box_predictor(
[rpn_box_predictor_features]) rpn_box_predictor_features)
else: else:
box_predictions = self._first_stage_box_predictor.predict( box_predictions = self._first_stage_box_predictor.predict(
[rpn_box_predictor_features], rpn_box_predictor_features,
num_anchors_per_location, num_anchors_per_location,
scope=self.first_stage_box_predictor_scope) scope=self.first_stage_box_predictor_scope)
...@@ -1547,7 +1549,8 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1547,7 +1549,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
detections_dict[ detections_dict[
'detection_features'] = self._add_detection_features_output_node( 'detection_features'] = self._add_detection_features_output_node(
detections_dict[fields.DetectionResultFields.detection_boxes], detections_dict[fields.DetectionResultFields.detection_boxes],
prediction_dict['rpn_features_to_crop']) prediction_dict['rpn_features_to_crop'],
prediction_dict['image_shape'])
return detections_dict return detections_dict
...@@ -1564,7 +1567,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1564,7 +1567,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
return prediction_dict return prediction_dict
def _add_detection_features_output_node(self, detection_boxes, def _add_detection_features_output_node(self, detection_boxes,
rpn_features_to_crop): rpn_features_to_crop, image_shape):
"""Add detection features to outputs. """Add detection features to outputs.
This function extracts box features for each box in rpn_features_to_crop. This function extracts box features for each box in rpn_features_to_crop.
...@@ -1576,9 +1579,10 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1576,9 +1579,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args: Args:
detection_boxes: a 3-D float32 tensor of shape detection_boxes: a 3-D float32 tensor of shape
[batch_size, max_detections, 4] which represents the bounding boxes. [batch_size, max_detections, 4] which represents the bounding boxes.
rpn_features_to_crop: A 4-D float32 tensor with shape rpn_features_to_crop: A list of 4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using [batch, height, width, depth] representing image features to crop using
the proposals boxes. the proposals boxes.
image_shape: a 1-D tensor of shape [4] representing the image shape.
Returns: Returns:
detection_features: a 4-D float32 tensor of shape detection_features: a 4-D float32 tensor of shape
...@@ -1588,7 +1592,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1588,7 +1592,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
with tf.name_scope('SecondStageDetectionFeaturesExtract'): with tf.name_scope('SecondStageDetectionFeaturesExtract'):
flattened_detected_feature_maps = ( flattened_detected_feature_maps = (
self._compute_second_stage_input_feature_maps( self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, detection_boxes)) rpn_features_to_crop, detection_boxes, image_shape))
detection_features_unpooled = self._extract_box_classifier_features( detection_features_unpooled = self._extract_box_classifier_features(
flattened_detected_feature_maps) flattened_detected_feature_maps)
...@@ -1930,6 +1934,7 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1930,6 +1934,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
def _compute_second_stage_input_feature_maps(self, features_to_crop, def _compute_second_stage_input_feature_maps(self, features_to_crop,
proposal_boxes_normalized, proposal_boxes_normalized,
image_shape,
**side_inputs): **side_inputs):
"""Crops to a set of proposals from the feature map for a batch of images. """Crops to a set of proposals from the feature map for a batch of images.
...@@ -1943,18 +1948,24 @@ class FasterRCNNMetaArch(model.DetectionModel): ...@@ -1943,18 +1948,24 @@ class FasterRCNNMetaArch(model.DetectionModel):
proposal_boxes_normalized: A float32 tensor with shape [batch_size, proposal_boxes_normalized: A float32 tensor with shape [batch_size,
num_proposals, box_code_size] containing proposal boxes in num_proposals, box_code_size] containing proposal boxes in
normalized coordinates. normalized coordinates.
image_shape: A 1D int32 tensors of size [4] containing the image shape.
**side_inputs: additional tensors that are required by the network. **side_inputs: additional tensors that are required by the network.
Returns: Returns:
A float32 tensor with shape [K, new_height, new_width, depth]. A float32 tensor with shape [K, new_height, new_width, depth].
""" """
features_to_crop = [features_to_crop]
num_levels = len(features_to_crop) num_levels = len(features_to_crop)
box_levels = None box_levels = None
if num_levels != 1: if num_levels != 1:
# If there are multiple levels to select, get the box levels # If there are multiple levels to select, get the box levels
box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1, # unit_scale_index: num_levels-2 is chosen based on section 4.2 of
1.0/224, proposal_boxes_normalized) # https://arxiv.org/pdf/1612.03144.pdf and works best for Resnet based
# feature extractor.
box_levels = ops.fpn_feature_levels(
num_levels, num_levels - 2,
tf.sqrt(tf.cast(image_shape[1] * image_shape[2], tf.float32)) / 224.0,
proposal_boxes_normalized)
cropped_regions = self._flatten_first_two_dimensions( cropped_regions = self._flatten_first_two_dimensions(
self._crop_and_resize_fn( self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized, box_levels, features_to_crop, proposal_boxes_normalized, box_levels,
......
...@@ -484,7 +484,7 @@ class FasterRCNNMetaArchTest( ...@@ -484,7 +484,7 @@ class FasterRCNNMetaArchTest(
'mask_predictions': 'mask_predictions':
mask_predictions, mask_predictions,
'rpn_features_to_crop': 'rpn_features_to_crop':
rpn_features_to_crop [rpn_features_to_crop]
}, true_image_shapes) }, true_image_shapes)
self.assertIn('detection_features', detections) self.assertIn('detection_features', detections)
return (detections['detection_boxes'], detections['detection_scores'], return (detections['detection_boxes'], detections['detection_scores'],
......
...@@ -23,6 +23,7 @@ import tensorflow.compat.v1 as tf ...@@ -23,6 +23,7 @@ import tensorflow.compat.v1 as tf
from google.protobuf import text_format from google.protobuf import text_format
from object_detection.anchor_generators import grid_anchor_generator from object_detection.anchor_generators import grid_anchor_generator
from object_detection.anchor_generators import multiscale_grid_anchor_generator
from object_detection.builders import box_predictor_builder from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder from object_detection.builders import hyperparams_builder
from object_detection.builders import post_processing_builder from object_detection.builders import post_processing_builder
...@@ -76,6 +77,36 @@ class FakeFasterRCNNFeatureExtractor( ...@@ -76,6 +77,36 @@ class FakeFasterRCNNFeatureExtractor(
proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer2') proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer2')
class FakeFasterRCNNMultiLevelFeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Fake feature extractor to use in tests."""
def __init__(self):
super(FakeFasterRCNNMultiLevelFeatureExtractor, self).__init__(
is_training=False,
first_stage_features_stride=32,
reuse_weights=None,
weight_decay=0.0)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def _extract_proposal_features(self, preprocessed_inputs, scope):
with tf.variable_scope('mock_model'):
proposal_features_1 = 0 * slim.conv2d(
preprocessed_inputs, num_outputs=3, kernel_size=3, scope='layer1',
padding='VALID')
proposal_features_2 = 0 * slim.conv2d(
proposal_features_1, num_outputs=3, kernel_size=3, scope='layer2',
padding='VALID')
return [proposal_features_1, proposal_features_2], {}
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
with tf.variable_scope('mock_model'):
return 0 * slim.conv2d(
proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer3')
class FakeFasterRCNNKerasFeatureExtractor( class FakeFasterRCNNKerasFeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor): faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor):
"""Fake feature extractor to use in tests.""" """Fake feature extractor to use in tests."""
...@@ -112,6 +143,42 @@ class FakeFasterRCNNKerasFeatureExtractor( ...@@ -112,6 +143,42 @@ class FakeFasterRCNNKerasFeatureExtractor(
3, kernel_size=1, padding='SAME', name=name + '_layer2')]) 3, kernel_size=1, padding='SAME', name=name + '_layer2')])
class FakeFasterRCNNKerasMultilevelFeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor):
"""Fake feature extractor to use in tests."""
def __init__(self):
super(FakeFasterRCNNKerasMultilevelFeatureExtractor, self).__init__(
is_training=False,
first_stage_features_stride=32,
weight_decay=0.0)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def get_proposal_feature_extractor_model(self, name):
class ProposalFeatureExtractor(tf.keras.Model):
"""Dummy proposal feature extraction."""
def __init__(self, name):
super(ProposalFeatureExtractor, self).__init__(name=name)
self.conv = None
def build(self, input_shape):
self.conv = tf.keras.layers.Conv2D(
3, kernel_size=3, name='layer1')
self.conv_1 = tf.keras.layers.Conv2D(
3, kernel_size=3, name='layer1')
def call(self, inputs):
output_1 = self.conv(inputs)
output_2 = self.conv_1(output_1)
return [output_1, output_2]
return ProposalFeatureExtractor(name=name)
class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
"""Base class to test Faster R-CNN and R-FCN meta architectures.""" """Base class to test Faster R-CNN and R-FCN meta architectures."""
...@@ -234,7 +301,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -234,7 +301,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
calibration_mapping_value=None, calibration_mapping_value=None,
share_box_across_classes=False, share_box_across_classes=False,
return_raw_detections_during_predict=False, return_raw_detections_during_predict=False,
output_final_box_features=False): output_final_box_features=False,
multi_level=False):
use_keras = tf_version.is_tf2() use_keras = tf_version.is_tf2()
def image_resizer_fn(image, masks=None): def image_resizer_fn(image, masks=None):
"""Fake image resizer function.""" """Fake image resizer function."""
...@@ -260,6 +328,19 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -260,6 +328,19 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
# anchors in this test are designed so that a subset of anchors are inside # anchors in this test are designed so that a subset of anchors are inside
# the image and a subset of anchors are outside. # the image and a subset of anchors are outside.
first_stage_anchor_generator = None
if multi_level:
min_level = 0
max_level = 1
anchor_scale = 0.1
aspect_ratios = [1.0, 2.0, 0.5]
scales_per_octave = 2
normalize_coordinates = False
(first_stage_anchor_generator
) = multiscale_grid_anchor_generator.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates)
else:
first_stage_anchor_scales = (0.001, 0.005, 0.1) first_stage_anchor_scales = (0.001, 0.005, 0.1)
first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0) first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
first_stage_anchor_strides = (1, 1) first_stage_anchor_strides = (1, 1)
...@@ -273,7 +354,13 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -273,7 +354,13 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
use_matmul_gather=use_matmul_gather_in_matcher) use_matmul_gather=use_matmul_gather_in_matcher)
if use_keras: if use_keras:
if multi_level:
fake_feature_extractor = FakeFasterRCNNKerasMultilevelFeatureExtractor()
else:
fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor() fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor()
else:
if multi_level:
fake_feature_extractor = FakeFasterRCNNMultiLevelFeatureExtractor()
else: else:
fake_feature_extractor = FakeFasterRCNNFeatureExtractor() fake_feature_extractor = FakeFasterRCNNFeatureExtractor()
...@@ -479,8 +566,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -479,8 +566,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
preprocessed_inputs, true_image_shapes = model.preprocess(images) preprocessed_inputs, true_image_shapes = model.preprocess(images)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes) prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
return (prediction_dict['rpn_box_predictor_features'], return (prediction_dict['rpn_box_predictor_features'][0],
prediction_dict['rpn_features_to_crop'], prediction_dict['rpn_features_to_crop'][0],
prediction_dict['image_shape'], prediction_dict['image_shape'],
prediction_dict['rpn_box_encodings'], prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'], prediction_dict['rpn_objectness_predictions_with_background'],
...@@ -529,6 +616,92 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -529,6 +616,92 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
self.assertTrue(np.all(np.less_equal(anchors[:, 2], height))) self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 3], width))) self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
@parameterized.parameters(
{'use_static_shapes': False},
{'use_static_shapes': True},
)
def test_predict_shape_in_inference_mode_first_stage_only_multi_level(
self, use_static_shapes):
batch_size = 2
height = 50
width = 52
input_image_shape = (batch_size, height, width, 3)
with test_utils.GraphContextOrNone() as g:
model = self._build_model(
is_training=False,
number_of_stages=1,
second_stage_batch_size=2,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes,
multi_level=True)
def graph_fn(images):
"""Function to construct tf graph for the test."""
preprocessed_inputs, true_image_shapes = model.preprocess(images)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
return (prediction_dict['rpn_box_predictor_features'][0],
prediction_dict['rpn_box_predictor_features'][1],
prediction_dict['rpn_features_to_crop'][0],
prediction_dict['rpn_features_to_crop'][1],
prediction_dict['image_shape'],
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'])
images = np.zeros(input_image_shape, dtype=np.float32)
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors = ((height-2) * (width-2) + (height-4) * (width-4)) * 6
expected_output_shapes = {
'rpn_box_predictor_features_0': (batch_size, height-2, width-2, 512),
'rpn_box_predictor_features_1': (batch_size, height-4, width-4, 512),
'rpn_features_to_crop_0': (batch_size, height-2, width-2, 3),
'rpn_features_to_crop_1': (batch_size, height-4, width-4, 3),
'rpn_box_encodings': (batch_size, expected_num_anchors, 4),
'rpn_objectness_predictions_with_background':
(batch_size, expected_num_anchors, 2),
}
if use_static_shapes:
expected_output_shapes['anchors'] = (expected_num_anchors, 4)
else:
expected_output_shapes['anchors'] = (18300, 4)
if use_static_shapes:
results = self.execute(graph_fn, [images], graph=g)
else:
results = self.execute_cpu(graph_fn, [images], graph=g)
self.assertAllEqual(results[0].shape,
expected_output_shapes['rpn_box_predictor_features_0'])
self.assertAllEqual(results[1].shape,
expected_output_shapes['rpn_box_predictor_features_1'])
self.assertAllEqual(results[2].shape,
expected_output_shapes['rpn_features_to_crop_0'])
self.assertAllEqual(results[3].shape,
expected_output_shapes['rpn_features_to_crop_1'])
self.assertAllEqual(results[4],
input_image_shape)
self.assertAllEqual(results[5].shape,
expected_output_shapes['rpn_box_encodings'])
self.assertAllEqual(
results[6].shape,
expected_output_shapes['rpn_objectness_predictions_with_background'])
self.assertAllEqual(results[7].shape,
expected_output_shapes['anchors'])
# Check that anchors are clipped to window.
anchors = results[5]
self.assertTrue(np.all(np.greater_equal(anchors, 0)))
self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 1], width)))
self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
def test_regularization_losses(self): def test_regularization_losses(self):
with test_utils.GraphContextOrNone() as g: with test_utils.GraphContextOrNone() as g:
model = self._build_model( model = self._build_model(
...@@ -601,9 +774,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -601,9 +774,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
def compare_results(results, expected_output_shapes): def compare_results(results, expected_output_shapes):
"""Checks if the shape of the predictions are as expected.""" """Checks if the shape of the predictions are as expected."""
self.assertAllEqual(results[0].shape, self.assertAllEqual(results[0][0].shape,
expected_output_shapes['rpn_box_predictor_features']) expected_output_shapes['rpn_box_predictor_features'])
self.assertAllEqual(results[1].shape, self.assertAllEqual(results[1][0].shape,
expected_output_shapes['rpn_features_to_crop']) expected_output_shapes['rpn_features_to_crop'])
self.assertAllEqual(results[2].shape, self.assertAllEqual(results[2].shape,
expected_output_shapes['image_shape']) expected_output_shapes['image_shape'])
...@@ -746,8 +919,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase): ...@@ -746,8 +919,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
result_tensor_dict['anchors'], result_tensor_dict['anchors'],
result_tensor_dict['rpn_box_encodings'], result_tensor_dict['rpn_box_encodings'],
result_tensor_dict['rpn_objectness_predictions_with_background'], result_tensor_dict['rpn_objectness_predictions_with_background'],
result_tensor_dict['rpn_features_to_crop'], result_tensor_dict['rpn_features_to_crop'][0],
result_tensor_dict['rpn_box_predictor_features'], result_tensor_dict['rpn_box_predictor_features'][0],
result_tensor_dict['final_anchors'], result_tensor_dict['final_anchors'],
) )
......
...@@ -265,7 +265,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -265,7 +265,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
[batch_size, num_valid_anchors, 2] containing class [batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0). tensor *includes* background class predictions (at class index 0).
rpn_features: A 4-D float32 tensor with shape rpn_features: A list of single 4-D float32 tensor with shape
[batch_size, height, width, depth] representing image features from the [batch_size, height, width, depth] representing image features from the
RPN. RPN.
anchors: 2-D float tensor of shape anchors: 2-D float tensor of shape
...@@ -313,6 +313,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch): ...@@ -313,6 +313,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
rpn_objectness_predictions_with_background, rpn_objectness_predictions_with_background,
anchors, image_shape_2d, true_image_shapes) anchors, image_shape_2d, true_image_shapes)
rpn_features = rpn_features[0]
box_classifier_features = ( box_classifier_features = (
self._extract_box_classifier_features(rpn_features)) self._extract_box_classifier_features(rpn_features))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment