Commit 0ba5a72b authored by TF Object Detection Team's avatar TF Object Detection Team
Browse files

Merge pull request #8895 from syiming:adjust_frcnn_meta_arch_to_multilevel_rpn_feature

PiperOrigin-RevId: 325370846
parents 80a6318b 18d95442
......@@ -524,9 +524,31 @@ def _build_faster_rcnn_keras_feature_extractor(
feature_type))
feature_extractor_class = FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[
feature_type]
kwargs = {}
if feature_extractor_config.HasField('conv_hyperparams'):
kwargs.update({
'conv_hyperparams':
hyperparams_builder.KerasLayerHyperparams(
feature_extractor_config.conv_hyperparams),
'override_base_feature_extractor_hyperparams':
feature_extractor_config.override_base_feature_extractor_hyperparams
})
if feature_extractor_config.HasField('fpn'):
kwargs.update({
'fpn_min_level':
feature_extractor_config.fpn.min_level,
'fpn_max_level':
feature_extractor_config.fpn.max_level,
'additional_layer_depth':
feature_extractor_config.fpn.additional_layer_depth,
})
return feature_extractor_class(
is_training, first_stage_features_stride,
batch_norm_trainable)
batch_norm_trainable, **kwargs)
def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
......
......@@ -310,6 +310,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
def _compute_second_stage_input_feature_maps(self, features_to_crop,
proposal_boxes_normalized,
image_shape,
context_features,
valid_context_size):
"""Crops to a set of proposals from the feature map for a batch of images.
......@@ -324,6 +325,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
proposal_boxes_normalized: A float32 Tensor with shape [batch_size,
num_proposals, box_code_size] containing proposal boxes in normalized
coordinates.
image_shape: A 1D int32 tensors of size [4] containing the image shape.
context_features: A float Tensor of shape [batch_size, context_size,
num_context_features].
valid_context_size: A int32 Tensor of shape [batch_size].
......@@ -331,9 +333,9 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
Returns:
A float32 Tensor with shape [K, new_height, new_width, depth].
"""
del image_shape
box_features = self._crop_and_resize_fn(
[features_to_crop], proposal_boxes_normalized, None,
features_to_crop, proposal_boxes_normalized, None,
[self._initial_crop_size, self._initial_crop_size])
attention_features = self._context_feature_extract_fn(
......
......@@ -529,7 +529,8 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
(rpn_box_predictor_features, rpn_box_encodings, refined_box_encodings,
proposal_boxes_normalized, proposal_boxes) = execute_fn(graph_fn, [],
graph=g)
self.assertAllEqual(rpn_box_predictor_features.shape, [2, 20, 20, 512])
self.assertAllEqual(len(rpn_box_predictor_features), 1)
self.assertAllEqual(rpn_box_predictor_features[0].shape, [2, 20, 20, 512])
self.assertAllEqual(rpn_box_encodings.shape, [2, 3600, 4])
self.assertAllEqual(refined_box_encodings.shape, [16, 42, 4])
self.assertAllEqual(proposal_boxes_normalized.shape, [2, 8, 4])
......
......@@ -99,7 +99,6 @@ import functools
import tensorflow.compat.v1 as tf
import tf_slim as slim
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder
from object_detection.core import box_list
......@@ -451,11 +450,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
# in the future.
super(FasterRCNNMetaArch, self).__init__(num_classes=num_classes)
if not isinstance(first_stage_anchor_generator,
grid_anchor_generator.GridAnchorGenerator):
raise ValueError('first_stage_anchor_generator must be of type '
'grid_anchor_generator.GridAnchorGenerator.')
self._is_training = is_training
self._image_resizer_fn = image_resizer_fn
self._resize_masks = resize_masks
......@@ -492,9 +486,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
hyperparams_builder.KerasLayerHyperparams):
num_anchors_per_location = (
self._first_stage_anchor_generator.num_anchors_per_location())
if len(num_anchors_per_location) != 1:
raise ValueError('anchor_generator is expected to generate anchors '
'corresponding to a single feature map.')
conv_hyperparams = (
first_stage_box_predictor_arg_scope_fn)
self._first_stage_box_predictor_first_conv = (
......@@ -533,11 +525,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
else:
self._first_stage_box_predictor_arg_scope_fn = (
first_stage_box_predictor_arg_scope_fn)
def rpn_box_predictor_feature_extractor(rpn_features_to_crop):
def rpn_box_predictor_feature_extractor(single_rpn_features_to_crop):
with slim.arg_scope(self._first_stage_box_predictor_arg_scope_fn()):
reuse = tf.get_variable_scope().reuse
return slim.conv2d(
rpn_features_to_crop,
single_rpn_features_to_crop,
self._first_stage_box_predictor_depth,
kernel_size=[
self._first_stage_box_predictor_kernel_size,
......@@ -546,7 +537,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
rate=self._first_stage_atrous_rate,
activation_fn=tf.nn.relu6,
scope='Conv',
reuse=reuse)
reuse=tf.AUTO_REUSE)
self._first_stage_box_predictor_first_conv = (
rpn_box_predictor_feature_extractor)
self._first_stage_box_predictor = (
......@@ -762,10 +753,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) rpn_box_predictor_features: A 4-D float32 tensor with shape
[batch_size, height, width, depth] to be used for predicting proposal
boxes and corresponding objectness scores.
2) rpn_features_to_crop: A 4-D float32 tensor with shape
1) rpn_box_predictor_features: A list of 4-D float32 tensor with shape
[batch_size, height_i, width_j, depth] to be used for predicting
proposal boxes and corresponding objectness scores.
2) rpn_features_to_crop: A list of 4-D float32 tensor with shape
[batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN.
3) image_shape: a 1-D tensor of shape [4] representing the input
......@@ -850,12 +841,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) rpn_box_predictor_features: A 4-D float32/bfloat16 tensor with shape
[batch_size, height, width, depth] to be used for predicting proposal
boxes and corresponding objectness scores.
2) rpn_features_to_crop: A 4-D float32/bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN.
1) rpn_box_predictor_features: A list of 4-D float32/bfloat16 tensor
with shape [batch_size, height_i, width_j, depth] to be used for
predicting proposal boxes and corresponding objectness scores.
2) rpn_features_to_crop: A list of 4-D float32/bfloat16 tensor with
shape [batch_size, height, width, depth] representing image features
to crop using the proposal boxes predicted by the RPN.
3) image_shape: a 1-D tensor of shape [4] representing the input
image shape.
4) rpn_box_encodings: 3-D float32 tensor of shape
......@@ -911,7 +902,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
dtype=tf.float32),
'anchors':
anchors_boxlist.data['boxes'],
fields.PredictionFields.feature_maps: [rpn_features_to_crop]
fields.PredictionFields.feature_maps: rpn_features_to_crop
}
return prediction_dict
......@@ -947,9 +938,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN.
rpn_features_to_crop: A list of 4-D float32 or bfloat16 tensor with shape
[batch_size, height_i, width_i, depth] representing image features to
crop using the proposal boxes predicted by the RPN.
anchors: 2-D float tensor of shape
[num_anchors, self._box_coder.code_size].
image_shape: A 1D int32 tensors of size [4] containing the image shape.
......@@ -1012,9 +1003,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
"""Predicts the output tensors from second stage of Faster R-CNN.
Args:
rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN.
rpn_features_to_crop: A list 4-D float32 or bfloat16 tensor with shape
[batch_size, height_i, width_i, depth] representing image features to
crop using the proposal boxes predicted by the RPN.
proposal_boxes_normalized: A float tensor with shape [batch_size,
max_num_proposals, 4] representing the (potentially zero padded)
proposal boxes for all images in the batch. These boxes are represented
......@@ -1064,7 +1055,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
"""
flattened_proposal_feature_maps = (
self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, proposal_boxes_normalized, **side_inputs))
rpn_features_to_crop, proposal_boxes_normalized,
image_shape, **side_inputs))
box_classifier_features = self._extract_box_classifier_features(
flattened_proposal_feature_maps)
......@@ -1196,6 +1188,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
decoded proposal bounding boxes in absolute coordinates.
5) box_classifier_features: a 4-D float32 tensor representing the
features for each proposal.
6) image_shape: a 1-D tensor of shape [4] representing the input
image shape.
image_shapes: A 2-D int32 tensors of shape [batch_size, 3] containing
shapes of images in the batch.
......@@ -1234,11 +1228,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
detection_classes = detections_dict[
fields.DetectionResultFields.detection_classes]
rpn_features_to_crop = prediction_dict['rpn_features_to_crop']
image_shape = prediction_dict['image_shape']
batch_size = tf.shape(detection_boxes)[0]
max_detection = tf.shape(detection_boxes)[1]
flattened_detected_feature_maps = (
self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, detection_boxes))
rpn_features_to_crop, detection_boxes, image_shape))
curr_box_classifier_features = self._extract_box_classifier_features(
flattened_detected_feature_maps)
......@@ -1302,13 +1297,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
preprocessed_inputs: a [batch, height, width, channels] image tensor.
Returns:
rpn_box_predictor_features: A 4-D float32 tensor with shape
[batch, height, width, depth] to be used for predicting proposal boxes
and corresponding objectness scores.
rpn_features_to_crop: A 4-D float32 tensor with shape
rpn_box_predictor_features: A list of 4-D float32 tensor with shape
[batch, height_i, width_j, depth] to be used for predicting proposal
boxes and corresponding objectness scores.
rpn_features_to_crop: A list of 4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using
the proposals boxes.
anchors: A BoxList representing anchors (for the RPN) in
anchors: A list of BoxList representing anchors (for the RPN) in
absolute coordinates.
image_shape: A 1-D tensor representing the input image shape.
"""
......@@ -1317,12 +1312,21 @@ class FasterRCNNMetaArch(model.DetectionModel):
rpn_features_to_crop, self.endpoints = self._extract_proposal_features(
preprocessed_inputs)
feature_map_shape = tf.shape(rpn_features_to_crop)
# Decide if rpn_features_to_crop is a list. If not make it a list
if not isinstance(rpn_features_to_crop, list):
rpn_features_to_crop = [rpn_features_to_crop]
feature_map_shapes = []
rpn_box_predictor_features = []
for single_rpn_features_to_crop in rpn_features_to_crop:
single_shape = tf.shape(single_rpn_features_to_crop)
feature_map_shapes.append((single_shape[1], single_shape[2]))
single_rpn_box_predictor_features = (
self._first_stage_box_predictor_first_conv(
single_rpn_features_to_crop))
rpn_box_predictor_features.append(single_rpn_box_predictor_features)
anchors = box_list_ops.concatenate(
self._first_stage_anchor_generator.generate([(feature_map_shape[1],
feature_map_shape[2])]))
rpn_box_predictor_features = (
self._first_stage_box_predictor_first_conv(rpn_features_to_crop))
self._first_stage_anchor_generator.generate(feature_map_shapes))
return (rpn_box_predictor_features, rpn_features_to_crop,
anchors, image_shape)
......@@ -1349,9 +1353,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
Note resulting tensors will not have been postprocessed.
Args:
rpn_box_predictor_features: A 4-D float32 tensor with shape
[batch, height, width, depth] to be used for predicting proposal boxes
and corresponding objectness scores.
rpn_box_predictor_features: A list of 4-D float32 tensor with shape
[batch, height_i, width_j, depth] to be used for predicting proposal
boxes and corresponding objectness scores.
Returns:
box_encodings: 3-D float tensor of shape
......@@ -1369,15 +1373,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
"""
num_anchors_per_location = (
self._first_stage_anchor_generator.num_anchors_per_location())
if len(num_anchors_per_location) != 1:
raise RuntimeError('anchor_generator is expected to generate anchors '
'corresponding to a single feature map.')
if self._first_stage_box_predictor.is_keras_model:
box_predictions = self._first_stage_box_predictor(
[rpn_box_predictor_features])
rpn_box_predictor_features)
else:
box_predictions = self._first_stage_box_predictor.predict(
[rpn_box_predictor_features],
rpn_box_predictor_features,
num_anchors_per_location,
scope=self.first_stage_box_predictor_scope)
......@@ -1547,7 +1549,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
detections_dict[
'detection_features'] = self._add_detection_features_output_node(
detections_dict[fields.DetectionResultFields.detection_boxes],
prediction_dict['rpn_features_to_crop'])
prediction_dict['rpn_features_to_crop'],
prediction_dict['image_shape'])
return detections_dict
......@@ -1564,7 +1567,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
return prediction_dict
def _add_detection_features_output_node(self, detection_boxes,
rpn_features_to_crop):
rpn_features_to_crop, image_shape):
"""Add detection features to outputs.
This function extracts box features for each box in rpn_features_to_crop.
......@@ -1576,9 +1579,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args:
detection_boxes: a 3-D float32 tensor of shape
[batch_size, max_detections, 4] which represents the bounding boxes.
rpn_features_to_crop: A 4-D float32 tensor with shape
rpn_features_to_crop: A list of 4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using
the proposals boxes.
image_shape: a 1-D tensor of shape [4] representing the image shape.
Returns:
detection_features: a 4-D float32 tensor of shape
......@@ -1588,7 +1592,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
with tf.name_scope('SecondStageDetectionFeaturesExtract'):
flattened_detected_feature_maps = (
self._compute_second_stage_input_feature_maps(
rpn_features_to_crop, detection_boxes))
rpn_features_to_crop, detection_boxes, image_shape))
detection_features_unpooled = self._extract_box_classifier_features(
flattened_detected_feature_maps)
......@@ -1930,6 +1934,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
def _compute_second_stage_input_feature_maps(self, features_to_crop,
proposal_boxes_normalized,
image_shape,
**side_inputs):
"""Crops to a set of proposals from the feature map for a batch of images.
......@@ -1943,18 +1948,24 @@ class FasterRCNNMetaArch(model.DetectionModel):
proposal_boxes_normalized: A float32 tensor with shape [batch_size,
num_proposals, box_code_size] containing proposal boxes in
normalized coordinates.
image_shape: A 1D int32 tensors of size [4] containing the image shape.
**side_inputs: additional tensors that are required by the network.
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
features_to_crop = [features_to_crop]
num_levels = len(features_to_crop)
box_levels = None
if num_levels != 1:
# If there are multiple levels to select, get the box levels
box_levels = ops.fpn_feature_levels(num_levels, num_levels - 1,
1.0/224, proposal_boxes_normalized)
# unit_scale_index: num_levels-2 is chosen based on section 4.2 of
# https://arxiv.org/pdf/1612.03144.pdf and works best for Resnet based
# feature extractor.
box_levels = ops.fpn_feature_levels(
num_levels, num_levels - 2,
tf.sqrt(tf.cast(image_shape[1] * image_shape[2], tf.float32)) / 224.0,
proposal_boxes_normalized)
cropped_regions = self._flatten_first_two_dimensions(
self._crop_and_resize_fn(
features_to_crop, proposal_boxes_normalized, box_levels,
......
......@@ -484,7 +484,7 @@ class FasterRCNNMetaArchTest(
'mask_predictions':
mask_predictions,
'rpn_features_to_crop':
rpn_features_to_crop
[rpn_features_to_crop]
}, true_image_shapes)
self.assertIn('detection_features', detections)
return (detections['detection_boxes'], detections['detection_scores'],
......
......@@ -23,6 +23,7 @@ import tensorflow.compat.v1 as tf
from google.protobuf import text_format
from object_detection.anchor_generators import grid_anchor_generator
from object_detection.anchor_generators import multiscale_grid_anchor_generator
from object_detection.builders import box_predictor_builder
from object_detection.builders import hyperparams_builder
from object_detection.builders import post_processing_builder
......@@ -76,6 +77,36 @@ class FakeFasterRCNNFeatureExtractor(
proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer2')
class FakeFasterRCNNMultiLevelFeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
"""Fake feature extractor to use in tests."""
def __init__(self):
super(FakeFasterRCNNMultiLevelFeatureExtractor, self).__init__(
is_training=False,
first_stage_features_stride=32,
reuse_weights=None,
weight_decay=0.0)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def _extract_proposal_features(self, preprocessed_inputs, scope):
with tf.variable_scope('mock_model'):
proposal_features_1 = 0 * slim.conv2d(
preprocessed_inputs, num_outputs=3, kernel_size=3, scope='layer1',
padding='VALID')
proposal_features_2 = 0 * slim.conv2d(
proposal_features_1, num_outputs=3, kernel_size=3, scope='layer2',
padding='VALID')
return [proposal_features_1, proposal_features_2], {}
def _extract_box_classifier_features(self, proposal_feature_maps, scope):
with tf.variable_scope('mock_model'):
return 0 * slim.conv2d(
proposal_feature_maps, num_outputs=3, kernel_size=1, scope='layer3')
class FakeFasterRCNNKerasFeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor):
"""Fake feature extractor to use in tests."""
......@@ -112,6 +143,42 @@ class FakeFasterRCNNKerasFeatureExtractor(
3, kernel_size=1, padding='SAME', name=name + '_layer2')])
class FakeFasterRCNNKerasMultilevelFeatureExtractor(
faster_rcnn_meta_arch.FasterRCNNKerasFeatureExtractor):
"""Fake feature extractor to use in tests."""
def __init__(self):
super(FakeFasterRCNNKerasMultilevelFeatureExtractor, self).__init__(
is_training=False,
first_stage_features_stride=32,
weight_decay=0.0)
def preprocess(self, resized_inputs):
return tf.identity(resized_inputs)
def get_proposal_feature_extractor_model(self, name):
class ProposalFeatureExtractor(tf.keras.Model):
"""Dummy proposal feature extraction."""
def __init__(self, name):
super(ProposalFeatureExtractor, self).__init__(name=name)
self.conv = None
def build(self, input_shape):
self.conv = tf.keras.layers.Conv2D(
3, kernel_size=3, name='layer1')
self.conv_1 = tf.keras.layers.Conv2D(
3, kernel_size=3, name='layer1')
def call(self, inputs):
output_1 = self.conv(inputs)
output_2 = self.conv_1(output_1)
return [output_1, output_2]
return ProposalFeatureExtractor(name=name)
class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
"""Base class to test Faster R-CNN and R-FCN meta architectures."""
......@@ -234,7 +301,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
calibration_mapping_value=None,
share_box_across_classes=False,
return_raw_detections_during_predict=False,
output_final_box_features=False):
output_final_box_features=False,
multi_level=False):
use_keras = tf_version.is_tf2()
def image_resizer_fn(image, masks=None):
"""Fake image resizer function."""
......@@ -260,22 +328,41 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
# anchors in this test are designed so that a subset of anchors are inside
# the image and a subset of anchors are outside.
first_stage_anchor_scales = (0.001, 0.005, 0.1)
first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
first_stage_anchor_strides = (1, 1)
first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator(
first_stage_anchor_scales,
first_stage_anchor_aspect_ratios,
anchor_stride=first_stage_anchor_strides)
first_stage_anchor_generator = None
if multi_level:
min_level = 0
max_level = 1
anchor_scale = 0.1
aspect_ratios = [1.0, 2.0, 0.5]
scales_per_octave = 2
normalize_coordinates = False
(first_stage_anchor_generator
) = multiscale_grid_anchor_generator.MultiscaleGridAnchorGenerator(
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
normalize_coordinates)
else:
first_stage_anchor_scales = (0.001, 0.005, 0.1)
first_stage_anchor_aspect_ratios = (0.5, 1.0, 2.0)
first_stage_anchor_strides = (1, 1)
first_stage_anchor_generator = grid_anchor_generator.GridAnchorGenerator(
first_stage_anchor_scales,
first_stage_anchor_aspect_ratios,
anchor_stride=first_stage_anchor_strides)
first_stage_target_assigner = target_assigner.create_target_assigner(
'FasterRCNN',
'proposal',
use_matmul_gather=use_matmul_gather_in_matcher)
if use_keras:
fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor()
if multi_level:
fake_feature_extractor = FakeFasterRCNNKerasMultilevelFeatureExtractor()
else:
fake_feature_extractor = FakeFasterRCNNKerasFeatureExtractor()
else:
fake_feature_extractor = FakeFasterRCNNFeatureExtractor()
if multi_level:
fake_feature_extractor = FakeFasterRCNNMultiLevelFeatureExtractor()
else:
fake_feature_extractor = FakeFasterRCNNFeatureExtractor()
first_stage_box_predictor_hyperparams_text_proto = """
op: CONV
......@@ -479,8 +566,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
preprocessed_inputs, true_image_shapes = model.preprocess(images)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
return (prediction_dict['rpn_box_predictor_features'],
prediction_dict['rpn_features_to_crop'],
return (prediction_dict['rpn_box_predictor_features'][0],
prediction_dict['rpn_features_to_crop'][0],
prediction_dict['image_shape'],
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
......@@ -529,6 +616,92 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
@parameterized.parameters(
{'use_static_shapes': False},
{'use_static_shapes': True},
)
def test_predict_shape_in_inference_mode_first_stage_only_multi_level(
self, use_static_shapes):
batch_size = 2
height = 50
width = 52
input_image_shape = (batch_size, height, width, 3)
with test_utils.GraphContextOrNone() as g:
model = self._build_model(
is_training=False,
number_of_stages=1,
second_stage_batch_size=2,
clip_anchors_to_image=use_static_shapes,
use_static_shapes=use_static_shapes,
multi_level=True)
def graph_fn(images):
"""Function to construct tf graph for the test."""
preprocessed_inputs, true_image_shapes = model.preprocess(images)
prediction_dict = model.predict(preprocessed_inputs, true_image_shapes)
return (prediction_dict['rpn_box_predictor_features'][0],
prediction_dict['rpn_box_predictor_features'][1],
prediction_dict['rpn_features_to_crop'][0],
prediction_dict['rpn_features_to_crop'][1],
prediction_dict['image_shape'],
prediction_dict['rpn_box_encodings'],
prediction_dict['rpn_objectness_predictions_with_background'],
prediction_dict['anchors'])
images = np.zeros(input_image_shape, dtype=np.float32)
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors = ((height-2) * (width-2) + (height-4) * (width-4)) * 6
expected_output_shapes = {
'rpn_box_predictor_features_0': (batch_size, height-2, width-2, 512),
'rpn_box_predictor_features_1': (batch_size, height-4, width-4, 512),
'rpn_features_to_crop_0': (batch_size, height-2, width-2, 3),
'rpn_features_to_crop_1': (batch_size, height-4, width-4, 3),
'rpn_box_encodings': (batch_size, expected_num_anchors, 4),
'rpn_objectness_predictions_with_background':
(batch_size, expected_num_anchors, 2),
}
if use_static_shapes:
expected_output_shapes['anchors'] = (expected_num_anchors, 4)
else:
expected_output_shapes['anchors'] = (18300, 4)
if use_static_shapes:
results = self.execute(graph_fn, [images], graph=g)
else:
results = self.execute_cpu(graph_fn, [images], graph=g)
self.assertAllEqual(results[0].shape,
expected_output_shapes['rpn_box_predictor_features_0'])
self.assertAllEqual(results[1].shape,
expected_output_shapes['rpn_box_predictor_features_1'])
self.assertAllEqual(results[2].shape,
expected_output_shapes['rpn_features_to_crop_0'])
self.assertAllEqual(results[3].shape,
expected_output_shapes['rpn_features_to_crop_1'])
self.assertAllEqual(results[4],
input_image_shape)
self.assertAllEqual(results[5].shape,
expected_output_shapes['rpn_box_encodings'])
self.assertAllEqual(
results[6].shape,
expected_output_shapes['rpn_objectness_predictions_with_background'])
self.assertAllEqual(results[7].shape,
expected_output_shapes['anchors'])
# Check that anchors are clipped to window.
anchors = results[5]
self.assertTrue(np.all(np.greater_equal(anchors, 0)))
self.assertTrue(np.all(np.less_equal(anchors[:, 0], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 1], width)))
self.assertTrue(np.all(np.less_equal(anchors[:, 2], height)))
self.assertTrue(np.all(np.less_equal(anchors[:, 3], width)))
def test_regularization_losses(self):
with test_utils.GraphContextOrNone() as g:
model = self._build_model(
......@@ -601,9 +774,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
def compare_results(results, expected_output_shapes):
"""Checks if the shape of the predictions are as expected."""
self.assertAllEqual(results[0].shape,
self.assertAllEqual(results[0][0].shape,
expected_output_shapes['rpn_box_predictor_features'])
self.assertAllEqual(results[1].shape,
self.assertAllEqual(results[1][0].shape,
expected_output_shapes['rpn_features_to_crop'])
self.assertAllEqual(results[2].shape,
expected_output_shapes['image_shape'])
......@@ -746,8 +919,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
result_tensor_dict['anchors'],
result_tensor_dict['rpn_box_encodings'],
result_tensor_dict['rpn_objectness_predictions_with_background'],
result_tensor_dict['rpn_features_to_crop'],
result_tensor_dict['rpn_box_predictor_features'],
result_tensor_dict['rpn_features_to_crop'][0],
result_tensor_dict['rpn_box_predictor_features'][0],
result_tensor_dict['final_anchors'],
)
......
......@@ -265,7 +265,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
rpn_features: A 4-D float32 tensor with shape
rpn_features: A list of single 4-D float32 tensor with shape
[batch_size, height, width, depth] representing image features from the
RPN.
anchors: 2-D float tensor of shape
......@@ -313,6 +313,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
rpn_objectness_predictions_with_background,
anchors, image_shape_2d, true_image_shapes)
rpn_features = rpn_features[0]
box_classifier_features = (
self._extract_box_classifier_features(rpn_features))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment