Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
0ba5a72b
Commit
0ba5a72b
authored
Aug 06, 2020
by
TF Object Detection Team
Browse files
Merge pull request #8895 from syiming:adjust_frcnn_meta_arch_to_multilevel_rpn_feature
PiperOrigin-RevId: 325370846
parents
80a6318b
18d95442
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
289 additions
and
79 deletions
+289
-79
research/object_detection/builders/model_builder.py
research/object_detection/builders/model_builder.py
+23
-1
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
...ct_detection/meta_architectures/context_rcnn_meta_arch.py
+4
-2
research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py
...tection/meta_architectures/context_rcnn_meta_arch_test.py
+2
-1
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+68
-57
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
...etection/meta_architectures/faster_rcnn_meta_arch_test.py
+1
-1
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+189
-16
research/object_detection/meta_architectures/rfcn_meta_arch.py
...rch/object_detection/meta_architectures/rfcn_meta_arch.py
+2
-1
No files found.
research/object_detection/builders/model_builder.py
View file @
0ba5a72b
...
...
@@ -524,9 +524,31 @@ def _build_faster_rcnn_keras_feature_extractor(
feature_type
))
feature_extractor_class
=
FASTER_RCNN_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
[
feature_type
]
kwargs
=
{}
if
feature_extractor_config
.
HasField
(
'conv_hyperparams'
):
kwargs
.
update
({
'conv_hyperparams'
:
hyperparams_builder
.
KerasLayerHyperparams
(
feature_extractor_config
.
conv_hyperparams
),
'override_base_feature_extractor_hyperparams'
:
feature_extractor_config
.
override_base_feature_extractor_hyperparams
})
if
feature_extractor_config
.
HasField
(
'fpn'
):
kwargs
.
update
({
'fpn_min_level'
:
feature_extractor_config
.
fpn
.
min_level
,
'fpn_max_level'
:
feature_extractor_config
.
fpn
.
max_level
,
'additional_layer_depth'
:
feature_extractor_config
.
fpn
.
additional_layer_depth
,
})
return
feature_extractor_class
(
is_training
,
first_stage_features_stride
,
batch_norm_trainable
)
batch_norm_trainable
,
**
kwargs
)
def
_build_faster_rcnn_model
(
frcnn_config
,
is_training
,
add_summaries
):
...
...
research/object_detection/meta_architectures/context_rcnn_meta_arch.py
View file @
0ba5a72b
...
...
@@ -310,6 +310,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
def
_compute_second_stage_input_feature_maps
(
self
,
features_to_crop
,
proposal_boxes_normalized
,
image_shape
,
context_features
,
valid_context_size
):
"""Crops to a set of proposals from the feature map for a batch of images.
...
...
@@ -324,6 +325,7 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
proposal_boxes_normalized: A float32 Tensor with shape [batch_size,
num_proposals, box_code_size] containing proposal boxes in normalized
coordinates.
image_shape: A 1D int32 tensors of size [4] containing the image shape.
context_features: A float Tensor of shape [batch_size, context_size,
num_context_features].
valid_context_size: A int32 Tensor of shape [batch_size].
...
...
@@ -331,9 +333,9 @@ class ContextRCNNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
Returns:
A float32 Tensor with shape [K, new_height, new_width, depth].
"""
del
image_shape
box_features
=
self
.
_crop_and_resize_fn
(
[
features_to_crop
]
,
proposal_boxes_normalized
,
None
,
features_to_crop
,
proposal_boxes_normalized
,
None
,
[
self
.
_initial_crop_size
,
self
.
_initial_crop_size
])
attention_features
=
self
.
_context_feature_extract_fn
(
...
...
research/object_detection/meta_architectures/context_rcnn_meta_arch_test.py
View file @
0ba5a72b
...
...
@@ -529,7 +529,8 @@ class ContextRCNNMetaArchTest(test_case.TestCase, parameterized.TestCase):
(
rpn_box_predictor_features
,
rpn_box_encodings
,
refined_box_encodings
,
proposal_boxes_normalized
,
proposal_boxes
)
=
execute_fn
(
graph_fn
,
[],
graph
=
g
)
self
.
assertAllEqual
(
rpn_box_predictor_features
.
shape
,
[
2
,
20
,
20
,
512
])
self
.
assertAllEqual
(
len
(
rpn_box_predictor_features
),
1
)
self
.
assertAllEqual
(
rpn_box_predictor_features
[
0
].
shape
,
[
2
,
20
,
20
,
512
])
self
.
assertAllEqual
(
rpn_box_encodings
.
shape
,
[
2
,
3600
,
4
])
self
.
assertAllEqual
(
refined_box_encodings
.
shape
,
[
16
,
42
,
4
])
self
.
assertAllEqual
(
proposal_boxes_normalized
.
shape
,
[
2
,
8
,
4
])
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
0ba5a72b
...
...
@@ -99,7 +99,6 @@ import functools
import
tensorflow.compat.v1
as
tf
import
tf_slim
as
slim
from
object_detection.anchor_generators
import
grid_anchor_generator
from
object_detection.builders
import
box_predictor_builder
from
object_detection.builders
import
hyperparams_builder
from
object_detection.core
import
box_list
...
...
@@ -451,11 +450,6 @@ class FasterRCNNMetaArch(model.DetectionModel):
# in the future.
super
(
FasterRCNNMetaArch
,
self
).
__init__
(
num_classes
=
num_classes
)
if
not
isinstance
(
first_stage_anchor_generator
,
grid_anchor_generator
.
GridAnchorGenerator
):
raise
ValueError
(
'first_stage_anchor_generator must be of type '
'grid_anchor_generator.GridAnchorGenerator.'
)
self
.
_is_training
=
is_training
self
.
_image_resizer_fn
=
image_resizer_fn
self
.
_resize_masks
=
resize_masks
...
...
@@ -492,9 +486,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
hyperparams_builder
.
KerasLayerHyperparams
):
num_anchors_per_location
=
(
self
.
_first_stage_anchor_generator
.
num_anchors_per_location
())
if
len
(
num_anchors_per_location
)
!=
1
:
raise
ValueError
(
'anchor_generator is expected to generate anchors '
'corresponding to a single feature map.'
)
conv_hyperparams
=
(
first_stage_box_predictor_arg_scope_fn
)
self
.
_first_stage_box_predictor_first_conv
=
(
...
...
@@ -533,11 +525,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
else
:
self
.
_first_stage_box_predictor_arg_scope_fn
=
(
first_stage_box_predictor_arg_scope_fn
)
def
rpn_box_predictor_feature_extractor
(
rpn_features_to_crop
):
def
rpn_box_predictor_feature_extractor
(
single_
rpn_features_to_crop
):
with
slim
.
arg_scope
(
self
.
_first_stage_box_predictor_arg_scope_fn
()):
reuse
=
tf
.
get_variable_scope
().
reuse
return
slim
.
conv2d
(
rpn_features_to_crop
,
single_
rpn_features_to_crop
,
self
.
_first_stage_box_predictor_depth
,
kernel_size
=
[
self
.
_first_stage_box_predictor_kernel_size
,
...
...
@@ -546,7 +537,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
rate
=
self
.
_first_stage_atrous_rate
,
activation_fn
=
tf
.
nn
.
relu6
,
scope
=
'Conv'
,
reuse
=
reuse
)
reuse
=
tf
.
AUTO_REUSE
)
self
.
_first_stage_box_predictor_first_conv
=
(
rpn_box_predictor_feature_extractor
)
self
.
_first_stage_box_predictor
=
(
...
...
@@ -762,10 +753,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) rpn_box_predictor_features: A 4-D float32 tensor with shape
[batch_size, height, width, depth] to be used for predicting
proposal
boxes and corresponding objectness scores.
2) rpn_features_to_crop: A 4-D float32 tensor with shape
1) rpn_box_predictor_features: A
list of
4-D float32 tensor with shape
[batch_size, height
_i
, width
_j
, depth] to be used for predicting
proposal
boxes and corresponding objectness scores.
2) rpn_features_to_crop: A
list of
4-D float32 tensor with shape
[batch_size, height, width, depth] representing image features to crop
using the proposal boxes predicted by the RPN.
3) image_shape: a 1-D tensor of shape [4] representing the input
...
...
@@ -850,12 +841,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
Returns:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) rpn_box_predictor_features: A 4-D float32/bfloat16 tensor
with shape
[batch_size, height, width, depth] to be used for
predicting proposal
boxes and corresponding objectness scores.
2) rpn_features_to_crop: A 4-D float32/bfloat16 tensor with
shape
[batch_size, height, width, depth] representing image features
to crop
using the proposal boxes predicted by the RPN.
1) rpn_box_predictor_features: A
list of
4-D float32/bfloat16 tensor
with shape
[batch_size, height
_i
, width
_j
, depth] to be used for
predicting proposal
boxes and corresponding objectness scores.
2) rpn_features_to_crop: A
list of
4-D float32/bfloat16 tensor with
shape
[batch_size, height, width, depth] representing image features
to crop
using the proposal boxes predicted by the RPN.
3) image_shape: a 1-D tensor of shape [4] representing the input
image shape.
4) rpn_box_encodings: 3-D float32 tensor of shape
...
...
@@ -911,7 +902,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
dtype
=
tf
.
float32
),
'anchors'
:
anchors_boxlist
.
data
[
'boxes'
],
fields
.
PredictionFields
.
feature_maps
:
[
rpn_features_to_crop
]
fields
.
PredictionFields
.
feature_maps
:
rpn_features_to_crop
}
return
prediction_dict
...
...
@@ -947,9 +938,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to
crop
using the proposal boxes predicted by the RPN.
rpn_features_to_crop: A
list of
4-D float32 or bfloat16 tensor with shape
[batch_size, height
_i
, width
_i
, depth] representing image features to
crop
using the proposal boxes predicted by the RPN.
anchors: 2-D float tensor of shape
[num_anchors, self._box_coder.code_size].
image_shape: A 1D int32 tensors of size [4] containing the image shape.
...
...
@@ -1012,9 +1003,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
"""Predicts the output tensors from second stage of Faster R-CNN.
Args:
rpn_features_to_crop: A 4-D float32 or bfloat16 tensor with shape
[batch_size, height, width, depth] representing image features to
crop
using the proposal boxes predicted by the RPN.
rpn_features_to_crop: A
list
4-D float32 or bfloat16 tensor with shape
[batch_size, height
_i
, width
_i
, depth] representing image features to
crop
using the proposal boxes predicted by the RPN.
proposal_boxes_normalized: A float tensor with shape [batch_size,
max_num_proposals, 4] representing the (potentially zero padded)
proposal boxes for all images in the batch. These boxes are represented
...
...
@@ -1064,7 +1055,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
"""
flattened_proposal_feature_maps
=
(
self
.
_compute_second_stage_input_feature_maps
(
rpn_features_to_crop
,
proposal_boxes_normalized
,
**
side_inputs
))
rpn_features_to_crop
,
proposal_boxes_normalized
,
image_shape
,
**
side_inputs
))
box_classifier_features
=
self
.
_extract_box_classifier_features
(
flattened_proposal_feature_maps
)
...
...
@@ -1196,6 +1188,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
decoded proposal bounding boxes in absolute coordinates.
5) box_classifier_features: a 4-D float32 tensor representing the
features for each proposal.
6) image_shape: a 1-D tensor of shape [4] representing the input
image shape.
image_shapes: A 2-D int32 tensors of shape [batch_size, 3] containing
shapes of images in the batch.
...
...
@@ -1234,11 +1228,12 @@ class FasterRCNNMetaArch(model.DetectionModel):
detection_classes
=
detections_dict
[
fields
.
DetectionResultFields
.
detection_classes
]
rpn_features_to_crop
=
prediction_dict
[
'rpn_features_to_crop'
]
image_shape
=
prediction_dict
[
'image_shape'
]
batch_size
=
tf
.
shape
(
detection_boxes
)[
0
]
max_detection
=
tf
.
shape
(
detection_boxes
)[
1
]
flattened_detected_feature_maps
=
(
self
.
_compute_second_stage_input_feature_maps
(
rpn_features_to_crop
,
detection_boxes
))
rpn_features_to_crop
,
detection_boxes
,
image_shape
))
curr_box_classifier_features
=
self
.
_extract_box_classifier_features
(
flattened_detected_feature_maps
)
...
...
@@ -1302,13 +1297,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
preprocessed_inputs: a [batch, height, width, channels] image tensor.
Returns:
rpn_box_predictor_features: A 4-D float32 tensor with shape
[batch, height, width, depth] to be used for predicting proposal
boxes
and corresponding objectness scores.
rpn_features_to_crop: A 4-D float32 tensor with shape
rpn_box_predictor_features: A
list of
4-D float32 tensor with shape
[batch, height
_i
, width
_j
, depth] to be used for predicting proposal
boxes
and corresponding objectness scores.
rpn_features_to_crop: A
list of
4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using
the proposals boxes.
anchors: A BoxList representing anchors (for the RPN) in
anchors: A
list of
BoxList representing anchors (for the RPN) in
absolute coordinates.
image_shape: A 1-D tensor representing the input image shape.
"""
...
...
@@ -1317,12 +1312,21 @@ class FasterRCNNMetaArch(model.DetectionModel):
rpn_features_to_crop
,
self
.
endpoints
=
self
.
_extract_proposal_features
(
preprocessed_inputs
)
feature_map_shape
=
tf
.
shape
(
rpn_features_to_crop
)
# Decide if rpn_features_to_crop is a list. If not make it a list
if
not
isinstance
(
rpn_features_to_crop
,
list
):
rpn_features_to_crop
=
[
rpn_features_to_crop
]
feature_map_shapes
=
[]
rpn_box_predictor_features
=
[]
for
single_rpn_features_to_crop
in
rpn_features_to_crop
:
single_shape
=
tf
.
shape
(
single_rpn_features_to_crop
)
feature_map_shapes
.
append
((
single_shape
[
1
],
single_shape
[
2
]))
single_rpn_box_predictor_features
=
(
self
.
_first_stage_box_predictor_first_conv
(
single_rpn_features_to_crop
))
rpn_box_predictor_features
.
append
(
single_rpn_box_predictor_features
)
anchors
=
box_list_ops
.
concatenate
(
self
.
_first_stage_anchor_generator
.
generate
([(
feature_map_shape
[
1
],
feature_map_shape
[
2
])]))
rpn_box_predictor_features
=
(
self
.
_first_stage_box_predictor_first_conv
(
rpn_features_to_crop
))
self
.
_first_stage_anchor_generator
.
generate
(
feature_map_shapes
))
return
(
rpn_box_predictor_features
,
rpn_features_to_crop
,
anchors
,
image_shape
)
...
...
@@ -1349,9 +1353,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
Note resulting tensors will not have been postprocessed.
Args:
rpn_box_predictor_features: A 4-D float32 tensor with shape
[batch, height, width, depth] to be used for predicting proposal
boxes
and corresponding objectness scores.
rpn_box_predictor_features: A
list of
4-D float32 tensor with shape
[batch, height
_i
, width
_j
, depth] to be used for predicting proposal
boxes
and corresponding objectness scores.
Returns:
box_encodings: 3-D float tensor of shape
...
...
@@ -1369,15 +1373,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
"""
num_anchors_per_location
=
(
self
.
_first_stage_anchor_generator
.
num_anchors_per_location
())
if
len
(
num_anchors_per_location
)
!=
1
:
raise
RuntimeError
(
'anchor_generator is expected to generate anchors '
'corresponding to a single feature map.'
)
if
self
.
_first_stage_box_predictor
.
is_keras_model
:
box_predictions
=
self
.
_first_stage_box_predictor
(
[
rpn_box_predictor_features
]
)
rpn_box_predictor_features
)
else
:
box_predictions
=
self
.
_first_stage_box_predictor
.
predict
(
[
rpn_box_predictor_features
]
,
rpn_box_predictor_features
,
num_anchors_per_location
,
scope
=
self
.
first_stage_box_predictor_scope
)
...
...
@@ -1547,7 +1549,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
detections_dict
[
'detection_features'
]
=
self
.
_add_detection_features_output_node
(
detections_dict
[
fields
.
DetectionResultFields
.
detection_boxes
],
prediction_dict
[
'rpn_features_to_crop'
])
prediction_dict
[
'rpn_features_to_crop'
],
prediction_dict
[
'image_shape'
])
return
detections_dict
...
...
@@ -1564,7 +1567,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
return
prediction_dict
def
_add_detection_features_output_node
(
self
,
detection_boxes
,
rpn_features_to_crop
):
rpn_features_to_crop
,
image_shape
):
"""Add detection features to outputs.
This function extracts box features for each box in rpn_features_to_crop.
...
...
@@ -1576,9 +1579,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
Args:
detection_boxes: a 3-D float32 tensor of shape
[batch_size, max_detections, 4] which represents the bounding boxes.
rpn_features_to_crop: A 4-D float32 tensor with shape
rpn_features_to_crop: A
list of
4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using
the proposals boxes.
image_shape: a 1-D tensor of shape [4] representing the image shape.
Returns:
detection_features: a 4-D float32 tensor of shape
...
...
@@ -1588,7 +1592,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
with
tf
.
name_scope
(
'SecondStageDetectionFeaturesExtract'
):
flattened_detected_feature_maps
=
(
self
.
_compute_second_stage_input_feature_maps
(
rpn_features_to_crop
,
detection_boxes
))
rpn_features_to_crop
,
detection_boxes
,
image_shape
))
detection_features_unpooled
=
self
.
_extract_box_classifier_features
(
flattened_detected_feature_maps
)
...
...
@@ -1930,6 +1934,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
def
_compute_second_stage_input_feature_maps
(
self
,
features_to_crop
,
proposal_boxes_normalized
,
image_shape
,
**
side_inputs
):
"""Crops to a set of proposals from the feature map for a batch of images.
...
...
@@ -1943,18 +1948,24 @@ class FasterRCNNMetaArch(model.DetectionModel):
proposal_boxes_normalized: A float32 tensor with shape [batch_size,
num_proposals, box_code_size] containing proposal boxes in
normalized coordinates.
image_shape: A 1D int32 tensors of size [4] containing the image shape.
**side_inputs: additional tensors that are required by the network.
Returns:
A float32 tensor with shape [K, new_height, new_width, depth].
"""
features_to_crop
=
[
features_to_crop
]
num_levels
=
len
(
features_to_crop
)
box_levels
=
None
if
num_levels
!=
1
:
# If there are multiple levels to select, get the box levels
box_levels
=
ops
.
fpn_feature_levels
(
num_levels
,
num_levels
-
1
,
1.0
/
224
,
proposal_boxes_normalized
)
# unit_scale_index: num_levels-2 is chosen based on section 4.2 of
# https://arxiv.org/pdf/1612.03144.pdf and works best for Resnet based
# feature extractor.
box_levels
=
ops
.
fpn_feature_levels
(
num_levels
,
num_levels
-
2
,
tf
.
sqrt
(
tf
.
cast
(
image_shape
[
1
]
*
image_shape
[
2
],
tf
.
float32
))
/
224.0
,
proposal_boxes_normalized
)
cropped_regions
=
self
.
_flatten_first_two_dimensions
(
self
.
_crop_and_resize_fn
(
features_to_crop
,
proposal_boxes_normalized
,
box_levels
,
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
View file @
0ba5a72b
...
...
@@ -484,7 +484,7 @@ class FasterRCNNMetaArchTest(
'mask_predictions'
:
mask_predictions
,
'rpn_features_to_crop'
:
rpn_features_to_crop
[
rpn_features_to_crop
]
},
true_image_shapes
)
self
.
assertIn
(
'detection_features'
,
detections
)
return
(
detections
[
'detection_boxes'
],
detections
[
'detection_scores'
],
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
0ba5a72b
...
...
@@ -23,6 +23,7 @@ import tensorflow.compat.v1 as tf
from
google.protobuf
import
text_format
from
object_detection.anchor_generators
import
grid_anchor_generator
from
object_detection.anchor_generators
import
multiscale_grid_anchor_generator
from
object_detection.builders
import
box_predictor_builder
from
object_detection.builders
import
hyperparams_builder
from
object_detection.builders
import
post_processing_builder
...
...
@@ -76,6 +77,36 @@ class FakeFasterRCNNFeatureExtractor(
proposal_feature_maps
,
num_outputs
=
3
,
kernel_size
=
1
,
scope
=
'layer2'
)
class
FakeFasterRCNNMultiLevelFeatureExtractor
(
faster_rcnn_meta_arch
.
FasterRCNNFeatureExtractor
):
"""Fake feature extractor to use in tests."""
def
__init__
(
self
):
super
(
FakeFasterRCNNMultiLevelFeatureExtractor
,
self
).
__init__
(
is_training
=
False
,
first_stage_features_stride
=
32
,
reuse_weights
=
None
,
weight_decay
=
0.0
)
def
preprocess
(
self
,
resized_inputs
):
return
tf
.
identity
(
resized_inputs
)
def
_extract_proposal_features
(
self
,
preprocessed_inputs
,
scope
):
with
tf
.
variable_scope
(
'mock_model'
):
proposal_features_1
=
0
*
slim
.
conv2d
(
preprocessed_inputs
,
num_outputs
=
3
,
kernel_size
=
3
,
scope
=
'layer1'
,
padding
=
'VALID'
)
proposal_features_2
=
0
*
slim
.
conv2d
(
proposal_features_1
,
num_outputs
=
3
,
kernel_size
=
3
,
scope
=
'layer2'
,
padding
=
'VALID'
)
return
[
proposal_features_1
,
proposal_features_2
],
{}
def
_extract_box_classifier_features
(
self
,
proposal_feature_maps
,
scope
):
with
tf
.
variable_scope
(
'mock_model'
):
return
0
*
slim
.
conv2d
(
proposal_feature_maps
,
num_outputs
=
3
,
kernel_size
=
1
,
scope
=
'layer3'
)
class
FakeFasterRCNNKerasFeatureExtractor
(
faster_rcnn_meta_arch
.
FasterRCNNKerasFeatureExtractor
):
"""Fake feature extractor to use in tests."""
...
...
@@ -112,6 +143,42 @@ class FakeFasterRCNNKerasFeatureExtractor(
3
,
kernel_size
=
1
,
padding
=
'SAME'
,
name
=
name
+
'_layer2'
)])
class
FakeFasterRCNNKerasMultilevelFeatureExtractor
(
faster_rcnn_meta_arch
.
FasterRCNNKerasFeatureExtractor
):
"""Fake feature extractor to use in tests."""
def
__init__
(
self
):
super
(
FakeFasterRCNNKerasMultilevelFeatureExtractor
,
self
).
__init__
(
is_training
=
False
,
first_stage_features_stride
=
32
,
weight_decay
=
0.0
)
def
preprocess
(
self
,
resized_inputs
):
return
tf
.
identity
(
resized_inputs
)
def
get_proposal_feature_extractor_model
(
self
,
name
):
class
ProposalFeatureExtractor
(
tf
.
keras
.
Model
):
"""Dummy proposal feature extraction."""
def
__init__
(
self
,
name
):
super
(
ProposalFeatureExtractor
,
self
).
__init__
(
name
=
name
)
self
.
conv
=
None
def
build
(
self
,
input_shape
):
self
.
conv
=
tf
.
keras
.
layers
.
Conv2D
(
3
,
kernel_size
=
3
,
name
=
'layer1'
)
self
.
conv_1
=
tf
.
keras
.
layers
.
Conv2D
(
3
,
kernel_size
=
3
,
name
=
'layer1'
)
def
call
(
self
,
inputs
):
output_1
=
self
.
conv
(
inputs
)
output_2
=
self
.
conv_1
(
output_1
)
return
[
output_1
,
output_2
]
return
ProposalFeatureExtractor
(
name
=
name
)
class
FasterRCNNMetaArchTestBase
(
test_case
.
TestCase
,
parameterized
.
TestCase
):
"""Base class to test Faster R-CNN and R-FCN meta architectures."""
...
...
@@ -234,7 +301,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
calibration_mapping_value
=
None
,
share_box_across_classes
=
False
,
return_raw_detections_during_predict
=
False
,
output_final_box_features
=
False
):
output_final_box_features
=
False
,
multi_level
=
False
):
use_keras
=
tf_version
.
is_tf2
()
def
image_resizer_fn
(
image
,
masks
=
None
):
"""Fake image resizer function."""
...
...
@@ -260,22 +328,41 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
# anchors in this test are designed so that a subset of anchors are inside
# the image and a subset of anchors are outside.
first_stage_anchor_scales
=
(
0.001
,
0.005
,
0.1
)
first_stage_anchor_aspect_ratios
=
(
0.5
,
1.0
,
2.0
)
first_stage_anchor_strides
=
(
1
,
1
)
first_stage_anchor_generator
=
grid_anchor_generator
.
GridAnchorGenerator
(
first_stage_anchor_scales
,
first_stage_anchor_aspect_ratios
,
anchor_stride
=
first_stage_anchor_strides
)
first_stage_anchor_generator
=
None
if
multi_level
:
min_level
=
0
max_level
=
1
anchor_scale
=
0.1
aspect_ratios
=
[
1.0
,
2.0
,
0.5
]
scales_per_octave
=
2
normalize_coordinates
=
False
(
first_stage_anchor_generator
)
=
multiscale_grid_anchor_generator
.
MultiscaleGridAnchorGenerator
(
min_level
,
max_level
,
anchor_scale
,
aspect_ratios
,
scales_per_octave
,
normalize_coordinates
)
else
:
first_stage_anchor_scales
=
(
0.001
,
0.005
,
0.1
)
first_stage_anchor_aspect_ratios
=
(
0.5
,
1.0
,
2.0
)
first_stage_anchor_strides
=
(
1
,
1
)
first_stage_anchor_generator
=
grid_anchor_generator
.
GridAnchorGenerator
(
first_stage_anchor_scales
,
first_stage_anchor_aspect_ratios
,
anchor_stride
=
first_stage_anchor_strides
)
first_stage_target_assigner
=
target_assigner
.
create_target_assigner
(
'FasterRCNN'
,
'proposal'
,
use_matmul_gather
=
use_matmul_gather_in_matcher
)
if
use_keras
:
fake_feature_extractor
=
FakeFasterRCNNKerasFeatureExtractor
()
if
multi_level
:
fake_feature_extractor
=
FakeFasterRCNNKerasMultilevelFeatureExtractor
()
else
:
fake_feature_extractor
=
FakeFasterRCNNKerasFeatureExtractor
()
else
:
fake_feature_extractor
=
FakeFasterRCNNFeatureExtractor
()
if
multi_level
:
fake_feature_extractor
=
FakeFasterRCNNMultiLevelFeatureExtractor
()
else
:
fake_feature_extractor
=
FakeFasterRCNNFeatureExtractor
()
first_stage_box_predictor_hyperparams_text_proto
=
"""
op: CONV
...
...
@@ -479,8 +566,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
preprocessed_inputs
,
true_image_shapes
=
model
.
preprocess
(
images
)
prediction_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
return
(
prediction_dict
[
'rpn_box_predictor_features'
],
prediction_dict
[
'rpn_features_to_crop'
],
return
(
prediction_dict
[
'rpn_box_predictor_features'
]
[
0
]
,
prediction_dict
[
'rpn_features_to_crop'
]
[
0
]
,
prediction_dict
[
'image_shape'
],
prediction_dict
[
'rpn_box_encodings'
],
prediction_dict
[
'rpn_objectness_predictions_with_background'
],
...
...
@@ -529,6 +616,92 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
2
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
3
],
width
)))
@
parameterized
.
parameters
(
{
'use_static_shapes'
:
False
},
{
'use_static_shapes'
:
True
},
)
def
test_predict_shape_in_inference_mode_first_stage_only_multi_level
(
self
,
use_static_shapes
):
batch_size
=
2
height
=
50
width
=
52
input_image_shape
=
(
batch_size
,
height
,
width
,
3
)
with
test_utils
.
GraphContextOrNone
()
as
g
:
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
2
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
,
multi_level
=
True
)
def
graph_fn
(
images
):
"""Function to construct tf graph for the test."""
preprocessed_inputs
,
true_image_shapes
=
model
.
preprocess
(
images
)
prediction_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
return
(
prediction_dict
[
'rpn_box_predictor_features'
][
0
],
prediction_dict
[
'rpn_box_predictor_features'
][
1
],
prediction_dict
[
'rpn_features_to_crop'
][
0
],
prediction_dict
[
'rpn_features_to_crop'
][
1
],
prediction_dict
[
'image_shape'
],
prediction_dict
[
'rpn_box_encodings'
],
prediction_dict
[
'rpn_objectness_predictions_with_background'
],
prediction_dict
[
'anchors'
])
images
=
np
.
zeros
(
input_image_shape
,
dtype
=
np
.
float32
)
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors
=
((
height
-
2
)
*
(
width
-
2
)
+
(
height
-
4
)
*
(
width
-
4
))
*
6
expected_output_shapes
=
{
'rpn_box_predictor_features_0'
:
(
batch_size
,
height
-
2
,
width
-
2
,
512
),
'rpn_box_predictor_features_1'
:
(
batch_size
,
height
-
4
,
width
-
4
,
512
),
'rpn_features_to_crop_0'
:
(
batch_size
,
height
-
2
,
width
-
2
,
3
),
'rpn_features_to_crop_1'
:
(
batch_size
,
height
-
4
,
width
-
4
,
3
),
'rpn_box_encodings'
:
(
batch_size
,
expected_num_anchors
,
4
),
'rpn_objectness_predictions_with_background'
:
(
batch_size
,
expected_num_anchors
,
2
),
}
if
use_static_shapes
:
expected_output_shapes
[
'anchors'
]
=
(
expected_num_anchors
,
4
)
else
:
expected_output_shapes
[
'anchors'
]
=
(
18300
,
4
)
if
use_static_shapes
:
results
=
self
.
execute
(
graph_fn
,
[
images
],
graph
=
g
)
else
:
results
=
self
.
execute_cpu
(
graph_fn
,
[
images
],
graph
=
g
)
self
.
assertAllEqual
(
results
[
0
].
shape
,
expected_output_shapes
[
'rpn_box_predictor_features_0'
])
self
.
assertAllEqual
(
results
[
1
].
shape
,
expected_output_shapes
[
'rpn_box_predictor_features_1'
])
self
.
assertAllEqual
(
results
[
2
].
shape
,
expected_output_shapes
[
'rpn_features_to_crop_0'
])
self
.
assertAllEqual
(
results
[
3
].
shape
,
expected_output_shapes
[
'rpn_features_to_crop_1'
])
self
.
assertAllEqual
(
results
[
4
],
input_image_shape
)
self
.
assertAllEqual
(
results
[
5
].
shape
,
expected_output_shapes
[
'rpn_box_encodings'
])
self
.
assertAllEqual
(
results
[
6
].
shape
,
expected_output_shapes
[
'rpn_objectness_predictions_with_background'
])
self
.
assertAllEqual
(
results
[
7
].
shape
,
expected_output_shapes
[
'anchors'
])
# Check that anchors are clipped to window.
anchors
=
results
[
5
]
self
.
assertTrue
(
np
.
all
(
np
.
greater_equal
(
anchors
,
0
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
0
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
1
],
width
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
2
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
3
],
width
)))
def
test_regularization_losses
(
self
):
with
test_utils
.
GraphContextOrNone
()
as
g
:
model
=
self
.
_build_model
(
...
...
@@ -601,9 +774,9 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
def
compare_results
(
results
,
expected_output_shapes
):
"""Checks if the shape of the predictions are as expected."""
self
.
assertAllEqual
(
results
[
0
].
shape
,
self
.
assertAllEqual
(
results
[
0
]
[
0
]
.
shape
,
expected_output_shapes
[
'rpn_box_predictor_features'
])
self
.
assertAllEqual
(
results
[
1
].
shape
,
self
.
assertAllEqual
(
results
[
1
]
[
0
]
.
shape
,
expected_output_shapes
[
'rpn_features_to_crop'
])
self
.
assertAllEqual
(
results
[
2
].
shape
,
expected_output_shapes
[
'image_shape'
])
...
...
@@ -746,8 +919,8 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
result_tensor_dict
[
'anchors'
],
result_tensor_dict
[
'rpn_box_encodings'
],
result_tensor_dict
[
'rpn_objectness_predictions_with_background'
],
result_tensor_dict
[
'rpn_features_to_crop'
],
result_tensor_dict
[
'rpn_box_predictor_features'
],
result_tensor_dict
[
'rpn_features_to_crop'
]
[
0
]
,
result_tensor_dict
[
'rpn_box_predictor_features'
]
[
0
]
,
result_tensor_dict
[
'final_anchors'
],
)
...
...
research/object_detection/meta_architectures/rfcn_meta_arch.py
View file @
0ba5a72b
...
...
@@ -265,7 +265,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
rpn_features: A 4-D float32 tensor with shape
rpn_features: A
list of single
4-D float32 tensor with shape
[batch_size, height, width, depth] representing image features from the
RPN.
anchors: 2-D float tensor of shape
...
...
@@ -313,6 +313,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
rpn_objectness_predictions_with_background
,
anchors
,
image_shape_2d
,
true_image_shapes
)
rpn_features
=
rpn_features
[
0
]
box_classifier_features
=
(
self
.
_extract_box_classifier_features
(
rpn_features
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment