Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e00e0e13
Commit
e00e0e13
authored
Dec 03, 2018
by
dreamdragon
Browse files
Merge remote-tracking branch 'upstream/master'
parents
b915db4e
402b561b
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1040 additions
and
356 deletions
+1040
-356
research/object_detection/inputs.py
research/object_detection/inputs.py
+5
-3
research/object_detection/inputs_test.py
research/object_detection/inputs_test.py
+3
-0
research/object_detection/legacy/trainer_test.py
research/object_detection/legacy/trainer_test.py
+23
-0
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+97
-28
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
...etection/meta_architectures/faster_rcnn_meta_arch_test.py
+4
-1
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+275
-187
research/object_detection/meta_architectures/ssd_meta_arch.py
...arch/object_detection/meta_architectures/ssd_meta_arch.py
+140
-28
research/object_detection/meta_architectures/ssd_meta_arch_test.py
...object_detection/meta_architectures/ssd_meta_arch_test.py
+10
-11
research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
...ct_detection/meta_architectures/ssd_meta_arch_test_lib.py
+6
-4
research/object_detection/metrics/coco_evaluation.py
research/object_detection/metrics/coco_evaluation.py
+108
-45
research/object_detection/metrics/coco_evaluation_test.py
research/object_detection/metrics/coco_evaluation_test.py
+227
-16
research/object_detection/model_lib.py
research/object_detection/model_lib.py
+26
-8
research/object_detection/model_lib_test.py
research/object_detection/model_lib_test.py
+1
-1
research/object_detection/model_tpu_main.py
research/object_detection/model_tpu_main.py
+1
-0
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
...tection/models/faster_rcnn_resnet_v1_feature_extractor.py
+7
-2
research/object_detection/models/feature_map_generators.py
research/object_detection/models/feature_map_generators.py
+34
-5
research/object_detection/models/feature_map_generators_test.py
...ch/object_detection/models/feature_map_generators_test.py
+37
-2
research/object_detection/models/keras_applications/mobilenet_v2.py
...bject_detection/models/keras_applications/mobilenet_v2.py
+7
-0
research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
...etection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
+14
-6
research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
...etection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
+15
-9
No files found.
research/object_detection/inputs.py
View file @
e00e0e13
...
...
@@ -124,6 +124,8 @@ def transform_input_data(tensor_dict,
if
fields
.
InputDataFields
.
groundtruth_instance_masks
in
tensor_dict
:
masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
_
,
resized_masks
,
_
=
image_resizer_fn
(
image
,
masks
)
if
use_bfloat16
:
resized_masks
=
tf
.
cast
(
resized_masks
,
tf
.
bfloat16
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
resized_masks
...
...
@@ -161,6 +163,9 @@ def transform_input_data(tensor_dict,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
merged_classes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
merged_confidences
)
if
fields
.
InputDataFields
.
groundtruth_boxes
in
tensor_dict
:
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]
return
tensor_dict
...
...
@@ -282,12 +287,9 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in
tensor_dict
)
include_keypoints
=
(
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
)
include_label_scores
=
(
fields
.
InputDataFields
.
groundtruth_confidences
in
tensor_dict
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
data_augmentation_options
,
func_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_label_scores
=
include_label_scores
,
include_instance_masks
=
include_instance_masks
,
include_keypoints
=
include_keypoints
))
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
...
...
research/object_detection/inputs_test.py
View file @
e00e0e13
...
...
@@ -630,6 +630,9 @@ class DataTransformationFnTest(test_case.TestCase):
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_confidences
],
[[
1
,
0
,
1
]])
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
num_groundtruth_boxes
],
1
)
def
test_returns_resized_masks
(
self
):
tensor_dict
=
{
...
...
research/object_detection/legacy/trainer_test.py
View file @
e00e0e13
...
...
@@ -160,6 +160,17 @@ class FakeDetectionModel(model.DetectionModel):
}
return
loss_dict
def
regularization_losses
(
self
):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
pass
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
"""Returns a map of variables to load from a foreign checkpoint.
...
...
@@ -174,6 +185,18 @@ class FakeDetectionModel(model.DetectionModel):
"""
return
{
var
.
op
.
name
:
var
for
var
in
tf
.
global_variables
()}
def
updates
(
self
):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
pass
class
TrainerTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
e00e0e13
...
...
@@ -662,7 +662,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
anchors_boxlist
,
clip_window
)
else
:
anchors_boxlist
=
box_list_ops
.
clip_to_window
(
anchors_boxlist
,
clip_window
)
anchors_boxlist
,
clip_window
,
filter_nonoverlapping
=
not
self
.
_use_static_shapes
)
self
.
_anchors
=
anchors_boxlist
prediction_dict
=
{
...
...
@@ -917,12 +918,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
_
,
num_classes
,
mask_height
,
mask_width
=
(
detection_masks
.
get_shape
().
as_list
())
_
,
max_detection
=
detection_classes
.
get_shape
().
as_list
()
prediction_dict
[
'mask_predictions'
]
=
tf
.
reshape
(
detection_masks
,
[
-
1
,
num_classes
,
mask_height
,
mask_width
])
if
num_classes
>
1
:
detection_masks
=
self
.
_gather_instance_masks
(
detection_masks
,
detection_classes
)
prediction_dict
[
fields
.
DetectionResultFields
.
detection_masks
]
=
(
tf
.
reshape
(
detection_masks
,
tf
.
reshape
(
tf
.
sigmoid
(
detection_masks
)
,
[
batch_size
,
max_detection
,
mask_height
,
mask_width
]))
return
prediction_dict
...
...
@@ -1159,9 +1162,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
}
# TODO(jrru): Remove mask_predictions from _post_process_box_classifier.
with
tf
.
name_scope
(
'SecondStagePostprocessor'
):
if
(
self
.
_number_of_stages
==
2
or
(
self
.
_number_of_stages
==
3
and
self
.
_is_training
)
):
if
(
self
.
_number_of_stages
==
2
or
(
self
.
_number_of_stages
==
3
and
self
.
_is_training
)):
with
tf
.
name_scope
(
'SecondStagePostprocessor'
):
mask_predictions
=
prediction_dict
.
get
(
box_predictor
.
MASK_PREDICTIONS
)
detections_dict
=
self
.
_postprocess_box_classifier
(
prediction_dict
[
'refined_box_encodings'
],
...
...
@@ -1170,18 +1173,53 @@ class FasterRCNNMetaArch(model.DetectionModel):
prediction_dict
[
'num_proposals'
],
true_image_shapes
,
mask_predictions
=
mask_predictions
)
return
detections_dict
if
'rpn_features_to_crop'
in
prediction_dict
and
self
.
_initial_crop_size
:
self
.
_add_detection_features_output_node
(
detections_dict
[
fields
.
DetectionResultFields
.
detection_boxes
],
prediction_dict
[
'rpn_features_to_crop'
])
return
detections_dict
if
self
.
_number_of_stages
==
3
:
# Post processing is already performed in 3rd stage. We need to transfer
# postprocessed tensors from `prediction_dict` to `detections_dict`.
detections_dict
=
{}
for
key
in
prediction_dict
:
if
key
==
fields
.
DetectionResultFields
.
detection_masks
:
detections_dict
[
key
]
=
tf
.
sigmoid
(
prediction_dict
[
key
])
elif
'detection'
in
key
:
detections_dict
[
key
]
=
prediction_dict
[
key
]
return
detections_dict
return
prediction_dict
def
_add_detection_features_output_node
(
self
,
detection_boxes
,
rpn_features_to_crop
):
"""Add the detection features to the output node.
The detection features are from cropping rpn_features with boxes.
Each bounding box has one feature vector of length depth, which comes from
mean_pooling of the cropped rpn_features.
Args:
detection_boxes: a 3-D float32 tensor of shape
[batch_size, max_detection, 4] which represents the bounding boxes.
rpn_features_to_crop: A 4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using
the proposals boxes.
"""
with
tf
.
name_scope
(
'SecondStageDetectionFeaturesExtract'
):
flattened_detected_feature_maps
=
(
self
.
_compute_second_stage_input_feature_maps
(
rpn_features_to_crop
,
detection_boxes
))
detection_features_unpooled
=
(
self
.
_feature_extractor
.
extract_box_classifier_features
(
flattened_detected_feature_maps
,
scope
=
self
.
second_stage_feature_extractor_scope
))
batch_size
=
tf
.
shape
(
detection_boxes
)[
0
]
max_detection
=
tf
.
shape
(
detection_boxes
)[
1
]
detection_features_pool
=
tf
.
reduce_mean
(
detection_features_unpooled
,
axis
=
[
1
,
2
])
detection_features
=
tf
.
reshape
(
detection_features_pool
,
[
batch_size
,
max_detection
,
tf
.
shape
(
detection_features_pool
)[
-
1
]])
detection_features
=
tf
.
identity
(
detection_features
,
'detection_features'
)
def
_postprocess_rpn
(
self
,
rpn_box_encodings_batch
,
...
...
@@ -1454,6 +1492,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
# to cls_weights. This could happen as boxes within certain IOU ranges
# are ignored. If triggered, the selected boxes will still be ignored
# during loss computation.
cls_weights
=
tf
.
reduce_mean
(
cls_weights
,
axis
=-
1
)
positive_indicator
=
tf
.
greater
(
tf
.
argmax
(
cls_targets
,
axis
=
1
),
0
)
valid_indicator
=
tf
.
logical_and
(
tf
.
range
(
proposal_boxlist
.
num_boxes
())
<
num_valid_proposals
,
...
...
@@ -1566,6 +1605,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
mask_predictions_batch
=
tf
.
reshape
(
mask_predictions
,
[
-
1
,
self
.
max_num_proposals
,
self
.
num_classes
,
mask_height
,
mask_width
])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
_
,
num_detections
)
=
self
.
_second_stage_nms_fn
(
refined_decoded_boxes_batch
,
...
...
@@ -1713,6 +1753,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
gt_box_batch
=
groundtruth_boxlists
,
gt_class_targets_batch
=
(
len
(
groundtruth_boxlists
)
*
[
None
]),
gt_weights_batch
=
groundtruth_weights_list
)
batch_cls_weights
=
tf
.
reduce_mean
(
batch_cls_weights
,
axis
=
2
)
batch_cls_targets
=
tf
.
squeeze
(
batch_cls_targets
,
axis
=
2
)
def
_minibatch_subsample_fn
(
inputs
):
...
...
@@ -1743,7 +1784,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
losses_mask
=
losses_mask
)
objectness_losses
=
self
.
_first_stage_objectness_loss
(
rpn_objectness_predictions_with_background
,
batch_one_hot_targets
,
weights
=
batch_sampled_indices
,
batch_one_hot_targets
,
weights
=
tf
.
expand_dims
(
batch_sampled_indices
,
axis
=-
1
),
losses_mask
=
losses_mask
)
localization_loss
=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
localization_losses
,
axis
=
1
)
/
normalizer
)
...
...
@@ -1960,25 +2002,28 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
[
mask_height
,
mask_width
])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
# about 4% worse.
# TODO(rathodv): Investigate this since we don't expect any variables
# upstream of flat_cropped_gt_mask.
flat_cropped_gt_mask
=
tf
.
stop_gradient
(
flat_cropped_gt_mask
)
batch_cropped_gt_mask
=
tf
.
reshape
(
flat_cropped_gt_mask
,
[
batch_size
,
-
1
,
mask_height
*
mask_width
])
second_stage_mask_losses
=
ops
.
reduce_sum_trailing_dimensions
(
self
.
_second_stage_mask_loss
(
reshaped_prediction_masks
,
batch_cropped_gt_mask
,
weights
=
batch_mask_target_weights
,
losses_mask
=
losses_mask
),
ndims
=
2
)
/
(
mask_height
*
mask_width
*
tf
.
maximum
(
tf
.
reduce_sum
(
batch_mask_target_weights
,
axis
=
1
,
keep_dims
=
True
),
tf
.
ones
((
batch_size
,
1
))))
second_stage_mask_loss
=
tf
.
reduce_sum
(
tf
.
where
(
paddings_indicator
,
second_stage_mask_losses
,
tf
.
zeros_like
(
second_stage_mask_losses
)))
mask_losses_weights
=
(
batch_mask_target_weights
*
tf
.
to_float
(
paddings_indicator
))
mask_losses
=
self
.
_second_stage_mask_loss
(
reshaped_prediction_masks
,
batch_cropped_gt_mask
,
weights
=
tf
.
expand_dims
(
mask_losses_weights
,
axis
=-
1
),
losses_mask
=
losses_mask
)
total_mask_loss
=
tf
.
reduce_sum
(
mask_losses
)
normalizer
=
tf
.
maximum
(
tf
.
reduce_sum
(
mask_losses_weights
*
mask_height
*
mask_width
),
1.0
)
second_stage_mask_loss
=
total_mask_loss
/
normalizer
if
second_stage_mask_loss
is
not
None
:
mask_loss
=
tf
.
multiply
(
self
.
_second_stage_mask_loss_weight
,
...
...
@@ -2073,6 +2118,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
cls_losses
=
tf
.
expand_dims
(
single_image_cls_loss
,
0
),
decoded_boxlist_list
=
[
proposal_boxlist
])
def
regularization_losses
(
self
):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
return
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
,
load_all_detection_checkpoint_vars
=
False
):
...
...
@@ -2117,3 +2173,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
feature_extractor_variables
=
tf
.
contrib
.
framework
.
filter_variables
(
variables_to_restore
,
include_patterns
=
include_patterns
)
return
{
var
.
op
.
name
:
var
for
var
in
feature_extractor_variables
}
def
updates
(
self
):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
return
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
)
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
View file @
e00e0e13
...
...
@@ -189,7 +189,7 @@ class FasterRCNNMetaArchTest(
set
(
expected_shapes
.
keys
()).
union
(
set
([
'detection_boxes'
,
'detection_scores'
,
'detection_classes'
,
'detection_masks'
,
'num_detections'
'detection_masks'
,
'num_detections'
,
'mask_predictions'
,
])))
for
key
in
expected_shapes
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
...
...
@@ -199,6 +199,9 @@ class FasterRCNNMetaArchTest(
self
.
assertAllEqual
(
tensor_dict_out
[
'detection_classes'
].
shape
,
[
2
,
5
])
self
.
assertAllEqual
(
tensor_dict_out
[
'detection_scores'
].
shape
,
[
2
,
5
])
self
.
assertAllEqual
(
tensor_dict_out
[
'num_detections'
].
shape
,
[
2
])
num_classes
=
1
if
masks_are_class_agnostic
else
2
self
.
assertAllEqual
(
tensor_dict_out
[
'mask_predictions'
].
shape
,
[
10
,
num_classes
,
14
,
14
])
@
parameterized
.
parameters
(
{
'masks_are_class_agnostic'
:
False
},
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
e00e0e13
...
...
@@ -250,6 +250,7 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
iou_threshold: 1.0
max_detections_per_class: 5
max_total_detections: 5
use_static_shapes: """
+
'{}'
.
format
(
use_static_shapes
)
+
"""
}
"""
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
...
...
@@ -336,61 +337,71 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
masks_are_class_agnostic
=
masks_are_class_agnostic
),
**
common_kwargs
)
def
test_predict_gives_correct_shapes_in_inference_mode_first_stage_only
(
self
):
test_graph
=
tf
.
Graph
()
with
test_graph
.
as_default
():
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
2
)
batch_size
=
2
height
=
10
width
=
12
input_image_shape
=
(
batch_size
,
height
,
width
,
3
)
self
,
use_static_shapes
=
False
):
batch_size
=
2
height
=
10
width
=
12
input_image_shape
=
(
batch_size
,
height
,
width
,
3
)
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
input_image_shape
))
preprocessed_inputs
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
(
batch_size
,
None
,
None
,
3
))
def
graph_fn
(
images
):
"""Function to construct tf graph for the test."""
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
2
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
)
preprocessed_inputs
,
true_image_shapes
=
model
.
preprocess
(
images
)
prediction_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
return
(
prediction_dict
[
'rpn_box_predictor_features'
],
prediction_dict
[
'rpn_features_to_crop'
],
prediction_dict
[
'image_shape'
],
prediction_dict
[
'rpn_box_encodings'
],
prediction_dict
[
'rpn_objectness_predictions_with_background'
],
prediction_dict
[
'anchors'
])
images
=
np
.
zeros
(
input_image_shape
,
dtype
=
np
.
float32
)
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors
=
height
*
width
*
3
*
3
expected_output_shapes
=
{
'rpn_box_predictor_features'
:
(
batch_size
,
height
,
width
,
512
),
'rpn_features_to_crop'
:
(
batch_size
,
height
,
width
,
3
),
'rpn_box_encodings'
:
(
batch_size
,
expected_num_anchors
,
4
),
'rpn_objectness_predictions_with_background'
:
(
batch_size
,
expected_num_anchors
,
2
),
'anchors'
:
(
expected_num_anchors
,
4
)
}
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors
=
height
*
width
*
3
*
3
expected_output_keys
=
set
([
'rpn_box_predictor_features'
,
'rpn_features_to_crop'
,
'image_shape'
,
'rpn_box_encodings'
,
'rpn_objectness_predictions_with_background'
,
'anchors'
])
expected_output_shapes
=
{
'rpn_box_predictor_features'
:
(
batch_size
,
height
,
width
,
512
),
'rpn_features_to_crop'
:
(
batch_size
,
height
,
width
,
3
),
'rpn_box_encodings'
:
(
batch_size
,
expected_num_anchors
,
4
),
'rpn_objectness_predictions_with_background'
:
(
batch_size
,
expected_num_anchors
,
2
),
'anchors'
:
(
expected_num_anchors
,
4
)
}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
sess
.
run
(
init_op
)
prediction_out
=
sess
.
run
(
prediction_dict
,
feed_dict
=
{
preprocessed_inputs
:
np
.
zeros
(
input_image_shape
)
})
self
.
assertEqual
(
set
(
prediction_out
.
keys
()),
expected_output_keys
)
if
use_static_shapes
:
results
=
self
.
execute
(
graph_fn
,
[
images
])
else
:
results
=
self
.
execute_cpu
(
graph_fn
,
[
images
])
self
.
assertAllEqual
(
prediction_out
[
'image_shape'
],
input_image_shape
)
for
output_key
,
expected_shape
in
expected_output_shapes
.
items
():
self
.
assertAllEqual
(
prediction_out
[
output_key
].
shape
,
expected_shape
)
self
.
assertAllEqual
(
results
[
0
].
shape
,
expected_output_shapes
[
'rpn_box_predictor_features'
])
self
.
assertAllEqual
(
results
[
1
].
shape
,
expected_output_shapes
[
'rpn_features_to_crop'
])
self
.
assertAllEqual
(
results
[
2
],
input_image_shape
)
self
.
assertAllEqual
(
results
[
3
].
shape
,
expected_output_shapes
[
'rpn_box_encodings'
])
self
.
assertAllEqual
(
results
[
4
].
shape
,
expected_output_shapes
[
'rpn_objectness_predictions_with_background'
])
self
.
assertAllEqual
(
results
[
5
].
shape
,
expected_output_shapes
[
'anchors'
])
# Check that anchors are clipped to window.
anchors
=
p
re
diction_out
[
'anchors'
]
self
.
assertTrue
(
np
.
all
(
np
.
greater_equal
(
anchors
,
0
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
0
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
1
],
width
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
2
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
3
],
width
)))
# Check that anchors are clipped to window.
anchors
=
re
sults
[
5
]
self
.
assertTrue
(
np
.
all
(
np
.
greater_equal
(
anchors
,
0
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
0
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
1
],
width
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
2
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
3
],
width
)))
def
test_predict_gives_valid_anchors_in_training_mode_first_stage_only
(
self
):
test_graph
=
tf
.
Graph
()
...
...
@@ -446,7 +457,38 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
prediction_out
[
'rpn_objectness_predictions_with_background'
].
shape
,
(
batch_size
,
num_anchors_out
,
2
))
def
test_predict_correct_shapes_in_inference_mode_two_stages
(
self
):
def
test_predict_correct_shapes_in_inference_mode_two_stages
(
self
,
use_static_shapes
=
False
):
def
compare_results
(
results
,
expected_output_shapes
):
"""Checks if the shape of the predictions are as expected."""
self
.
assertAllEqual
(
results
[
0
].
shape
,
expected_output_shapes
[
'rpn_box_predictor_features'
])
self
.
assertAllEqual
(
results
[
1
].
shape
,
expected_output_shapes
[
'rpn_features_to_crop'
])
self
.
assertAllEqual
(
results
[
2
].
shape
,
expected_output_shapes
[
'image_shape'
])
self
.
assertAllEqual
(
results
[
3
].
shape
,
expected_output_shapes
[
'rpn_box_encodings'
])
self
.
assertAllEqual
(
results
[
4
].
shape
,
expected_output_shapes
[
'rpn_objectness_predictions_with_background'
])
self
.
assertAllEqual
(
results
[
5
].
shape
,
expected_output_shapes
[
'anchors'
])
self
.
assertAllEqual
(
results
[
6
].
shape
,
expected_output_shapes
[
'refined_box_encodings'
])
self
.
assertAllEqual
(
results
[
7
].
shape
,
expected_output_shapes
[
'class_predictions_with_background'
])
self
.
assertAllEqual
(
results
[
8
].
shape
,
expected_output_shapes
[
'num_proposals'
])
self
.
assertAllEqual
(
results
[
9
].
shape
,
expected_output_shapes
[
'proposal_boxes'
])
self
.
assertAllEqual
(
results
[
10
].
shape
,
expected_output_shapes
[
'proposal_boxes_normalized'
])
self
.
assertAllEqual
(
results
[
11
].
shape
,
expected_output_shapes
[
'box_classifier_features'
])
batch_size
=
2
image_size
=
10
max_num_proposals
=
8
...
...
@@ -457,6 +499,32 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
(
None
,
image_size
,
image_size
,
3
),
(
batch_size
,
None
,
None
,
3
),
(
None
,
None
,
None
,
3
)]
def
graph_fn_tpu
(
images
):
"""Function to construct tf graph for the test."""
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
2
,
second_stage_batch_size
=
2
,
predict_masks
=
False
,
use_matmul_crop_and_resize
=
use_static_shapes
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
)
preprocessed_inputs
,
true_image_shapes
=
model
.
preprocess
(
images
)
prediction_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
return
(
prediction_dict
[
'rpn_box_predictor_features'
],
prediction_dict
[
'rpn_features_to_crop'
],
prediction_dict
[
'image_shape'
],
prediction_dict
[
'rpn_box_encodings'
],
prediction_dict
[
'rpn_objectness_predictions_with_background'
],
prediction_dict
[
'anchors'
],
prediction_dict
[
'refined_box_encodings'
],
prediction_dict
[
'class_predictions_with_background'
],
prediction_dict
[
'num_proposals'
],
prediction_dict
[
'proposal_boxes'
],
prediction_dict
[
'proposal_boxes_normalized'
],
prediction_dict
[
'box_classifier_features'
])
expected_num_anchors
=
image_size
*
image_size
*
3
*
3
expected_shapes
=
{
'rpn_box_predictor_features'
:
...
...
@@ -481,28 +549,34 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
3
)
}
for
input_shape
in
input_shapes
:
test_graph
=
tf
.
Graph
()
with
test_graph
.
as_default
():
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
2
,
second_stage_batch_size
=
2
,
predict_masks
=
False
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
input_shape
)
_
,
true_image_shapes
=
model
.
preprocess
(
preprocessed_inputs
)
result_tensor_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
sess
.
run
(
init_op
)
tensor_dict_out
=
sess
.
run
(
result_tensor_dict
,
feed_dict
=
{
preprocessed_inputs
:
np
.
zeros
((
batch_size
,
image_size
,
image_size
,
3
))})
self
.
assertEqual
(
set
(
tensor_dict_out
.
keys
()),
set
(
expected_shapes
.
keys
()))
for
key
in
expected_shapes
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
if
use_static_shapes
:
input_shape
=
(
batch_size
,
image_size
,
image_size
,
3
)
images
=
np
.
zeros
(
input_shape
,
dtype
=
np
.
float32
)
results
=
self
.
execute
(
graph_fn_tpu
,
[
images
])
compare_results
(
results
,
expected_shapes
)
else
:
for
input_shape
in
input_shapes
:
test_graph
=
tf
.
Graph
()
with
test_graph
.
as_default
():
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
2
,
second_stage_batch_size
=
2
,
predict_masks
=
False
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
input_shape
)
_
,
true_image_shapes
=
model
.
preprocess
(
preprocessed_inputs
)
result_tensor_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
sess
.
run
(
init_op
)
tensor_dict_out
=
sess
.
run
(
result_tensor_dict
,
feed_dict
=
{
preprocessed_inputs
:
np
.
zeros
((
batch_size
,
image_size
,
image_size
,
3
))})
self
.
assertEqual
(
set
(
tensor_dict_out
.
keys
()),
set
(
expected_shapes
.
keys
()))
for
key
in
expected_shapes
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
def
test_predict_gives_correct_shapes_in_train_mode_both_stages
(
self
,
...
...
@@ -596,23 +670,46 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
self
.
assertAllEqual
(
results
[
8
].
shape
,
expected_shapes
[
'rpn_box_predictor_features'
])
def
_test_postprocess_first_stage_only_inference_mode
(
self
,
pad_to_max_dimension
=
None
):
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
6
,
pad_to_max_dimension
=
pad_to_max_dimension
)
def
test_postprocess_first_stage_only_inference_mode
(
self
,
use_static_shapes
=
False
,
pad_to_max_dimension
=
None
):
batch_size
=
2
anchors
=
tf
.
constant
(
first_stage_max_proposals
=
4
if
use_static_shapes
else
8
def
graph_fn
(
images
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
rpn_features_to_crop
,
anchors
):
"""Function to construct tf graph for the test."""
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
6
,
use_matmul_crop_and_resize
=
use_static_shapes
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
,
use_matmul_gather_in_matcher
=
use_static_shapes
,
first_stage_max_proposals
=
first_stage_max_proposals
,
pad_to_max_dimension
=
pad_to_max_dimension
)
_
,
true_image_shapes
=
model
.
preprocess
(
images
)
proposals
=
model
.
postprocess
({
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
rpn_objectness_predictions_with_background
,
'rpn_features_to_crop'
:
rpn_features_to_crop
,
'anchors'
:
anchors
},
true_image_shapes
)
return
(
proposals
[
'num_detections'
],
proposals
[
'detection_boxes'
],
proposals
[
'detection_scores'
])
anchors
=
np
.
array
(
[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
16
,
16
,
32
,
32
]],
dtype
=
tf
.
float32
)
rpn_box_encodings
=
tf
.
zeros
(
[
batch_size
,
anchors
.
get_shape
().
as_list
()[
0
],
BOX_CODE_SIZE
],
dtype
=
tf
.
float32
)
[
16
,
16
,
32
,
32
]],
dtype
=
np
.
float32
)
rpn_box_encodings
=
np
.
zeros
(
(
batch_size
,
anchors
.
shape
[
0
],
BOX_CODE_SIZE
),
dtype
=
np
.
float32
)
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
rpn_objectness_predictions_with_background
=
tf
.
constant
([
rpn_objectness_predictions_with_background
=
np
.
array
([
[[
-
10
,
13
],
[
10
,
-
10
],
[
10
,
-
11
],
...
...
@@ -620,16 +717,22 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
[[
10
,
-
10
],
[
-
10
,
13
],
[
-
10
,
12
],
[
10
,
-
11
]]],
dtype
=
tf
.
float32
)
rpn_features_to_crop
=
tf
.
ones
((
batch_size
,
8
,
8
,
10
),
dtype
=
tf
.
float32
)
image_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
int32
)
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image_shape
))
proposals
=
model
.
postprocess
({
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
rpn_objectness_predictions_with_background
,
'rpn_features_to_crop'
:
rpn_features_to_crop
,
'anchors'
:
anchors
},
true_image_shapes
)
[
10
,
-
11
]]],
dtype
=
np
.
float32
)
rpn_features_to_crop
=
np
.
ones
((
batch_size
,
8
,
8
,
10
),
dtype
=
np
.
float32
)
image_shape
=
(
batch_size
,
32
,
32
,
3
)
images
=
np
.
zeros
(
image_shape
,
dtype
=
np
.
float32
)
if
use_static_shapes
:
results
=
self
.
execute
(
graph_fn
,
[
images
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
rpn_features_to_crop
,
anchors
])
else
:
results
=
self
.
execute_cpu
(
graph_fn
,
[
images
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
rpn_features_to_crop
,
anchors
])
expected_proposal_boxes
=
[
[[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
],
[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1.0
,
.
5
]]
+
4
*
[
4
*
[
0
]],
...
...
@@ -639,24 +742,12 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
[
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
]]
expected_num_proposals
=
[
4
,
4
]
expected_output_keys
=
set
([
'detection_boxes'
,
'detection_scores'
,
'num_detections'
])
self
.
assertEqual
(
set
(
proposals
.
keys
()),
expected_output_keys
)
with
self
.
test_session
()
as
sess
:
proposals_out
=
sess
.
run
(
proposals
)
self
.
assertAllClose
(
proposals_out
[
'detection_boxes'
],
expected_proposal_boxes
)
self
.
assertAllClose
(
proposals_out
[
'detection_scores'
],
expected_proposal_scores
)
self
.
assertAllEqual
(
proposals_out
[
'num_detections'
],
expected_num_proposals
)
def
test_postprocess_first_stage_only_inference_mode
(
self
):
self
.
_test_postprocess_first_stage_only_inference_mode
()
def
test_postprocess_first_stage_only_inference_mode_padded_image
(
self
):
self
.
_test_postprocess_first_stage_only_inference_mode
(
pad_to_max_dimension
=
56
)
self
.
assertAllClose
(
results
[
0
],
expected_num_proposals
)
for
indx
,
num_proposals
in
enumerate
(
expected_num_proposals
):
self
.
assertAllClose
(
results
[
1
][
indx
][
0
:
num_proposals
],
expected_proposal_boxes
[
indx
][
0
:
num_proposals
])
self
.
assertAllClose
(
results
[
2
][
indx
][
0
:
num_proposals
],
expected_proposal_scores
[
indx
][
0
:
num_proposals
])
def
_test_postprocess_first_stage_only_train_mode
(
self
,
pad_to_max_dimension
=
None
):
...
...
@@ -733,83 +824,80 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
def
test_postprocess_first_stage_only_train_mode_padded_image
(
self
):
self
.
_test_postprocess_first_stage_only_train_mode
(
pad_to_max_dimension
=
56
)
def
_test_postprocess_second_stage_only_inference_mode
(
self
,
pad_to_max_dimension
=
None
):
num_proposals_shapes
=
[(
2
),
(
None
,)]
refined_box_encodings_shapes
=
[(
16
,
2
,
4
),
(
None
,
2
,
4
)]
class_predictions_with_background_shapes
=
[(
16
,
3
),
(
None
,
3
)]
proposal_boxes_shapes
=
[(
2
,
8
,
4
),
(
None
,
8
,
4
)]
def
test_postprocess_second_stage_only_inference_mode
(
self
,
use_static_shapes
=
False
,
pad_to_max_dimension
=
None
):
batch_size
=
2
num_classes
=
2
image_shape
=
np
.
array
((
2
,
36
,
48
,
3
),
dtype
=
np
.
int32
)
f
or
(
num_proposals_shape
,
refined_box_encoding_shape
,
class_predictions_with_background_shape
,
proposal_boxes_shape
)
in
zip
(
num_proposals_shapes
,
refined_box_encodings_shap
es
,
class_predictions_with_background_shape
s
,
proposal_boxes_shapes
):
tf_graph
=
tf
.
Graph
()
with
tf_graph
.
as_default
(
):
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
2
,
second_stage_batch_size
=
6
,
pad_to_max_dimension
=
pad_to_max_dimension
)
_
,
tru
e_
i
ma
ge_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image
_shape
))
total_num_padded_proposals
=
batch_size
*
model
.
max_num_proposals
proposal_boxes
=
np
.
array
(
[[[
1
,
1
,
2
,
3
]
,
[
0
,
0
,
1
,
1
],
[.
5
,
.
5
,
.
6
,
.
6
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]],
[[
2
,
3
,
6
,
8
]
,
[
1
,
2
,
5
,
3
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]]])
num_proposals
=
np
.
array
([
3
,
2
],
dtype
=
np
.
int32
)
refined_box_encodings
=
np
.
zeros
(
[
total_num_padded_proposals
,
model
.
num_classes
,
4
]
)
class_predictions_with_background
=
np
.
ones
(
[
total_num_padded_proposals
,
model
.
num_classes
+
1
])
num_proposals_placeholder
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
num_proposals_shape
)
refined_box_encodings_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
refined_box_encoding_shape
)
class_predictions_with_background_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
class_predictions_with_background_shape
)
proposal_boxes_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
proposal_boxes_shape
)
image_shape_placeholder
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
4
))
detections
=
model
.
postprocess
({
'
refined_box_encodings
'
:
refined_box_encodings_placeholder
,
'class_predictions_with_background'
:
class_predictions_with_background
_placeholder
,
'num
_proposals
'
:
num_
proposals_placeholder
,
'proposal_boxes'
:
proposal_boxes_placeholder
,
},
true_image_shapes
)
with
self
.
test_session
(
graph
=
tf_graph
)
as
ses
s
:
detections_out
=
sess
.
run
(
detection
s
,
feed_dict
=
{
refined_box_encodings_placeholder
:
refined_box_encodings
,
class_predictions_with_background_placeholder
:
class_predictions_with_background
,
num_proposals_placeholder
:
num_proposal
s
,
proposal_boxes_placeholder
:
proposal_boxes
,
image_shape_placeholder
:
image_shape
})
self
.
assertAllEqual
(
detections_out
[
'detection_boxes'
].
shape
,
[
2
,
5
,
4
])
self
.
assertAllClose
(
detections_out
[
'detection_scores'
],
[[
1
,
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
,
0
]])
self
.
assertAllClose
(
detections_out
[
'detection_classes'
],
[[
0
,
0
,
0
,
1
,
1
],
[
0
,
0
,
1
,
1
,
0
]])
self
.
assertAllClose
(
detections_out
[
'
num_detections
'
],
[
5
,
4
]
)
def
test_postprocess_sec
on
d
_s
tage_only_inference_mode
(
self
):
self
.
_test_postprocess_second_stage_only_inference_mode
()
def
test_postprocess_second_stage_only_inference_mode_padded_image
(
self
):
self
.
_test_postprocess_second_stage_only_inference_mode
(
pad_to_max_dimension
=
56
)
f
irst_stage_max_proposals
=
8
total_num_padded_proposals
=
batch_size
*
first_stage_max_proposals
def
graph_fn
(
imag
es
,
refined_box_encoding
s
,
class_predictions_with_background
,
num_proposals
,
proposal_boxes
):
"""Function to construct tf graph for the test."""
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
2
,
second_stage_batch_size
=
6
,
us
e_ma
tmul_crop_and_resize
=
use_static
_shape
s
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
,
use_matmul_gather_in_matcher
=
use_static_shapes
,
pad_to_max_dimension
=
pad_to_max_dimension
)
_
,
true_image_shapes
=
model
.
preprocess
(
images
)
detections
=
model
.
postprocess
({
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'
num_proposals
'
:
num_proposals
,
'proposal_boxes'
:
proposal_boxes
,
},
true_image_shapes
)
return
(
detections
[
'num_detections'
],
detections
[
'detection_boxes'
],
detections
[
'detection_scores'
],
detections
[
'detection_classes'
])
proposal_boxes
=
np
.
array
(
[[[
1
,
1
,
2
,
3
],
[
0
,
0
,
1
,
1
],
[.
5
,
.
5
,
.
6
,
.
6
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]],
[[
2
,
3
,
6
,
8
],
[
1
,
2
,
5
,
3
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]]],
dtype
=
np
.
float32
)
num_proposals
=
np
.
array
([
3
,
2
],
dtype
=
np
.
int32
)
refined_box_encodings
=
np
.
zeros
(
[
total_num_padded_proposals
,
num_classes
,
4
],
dtype
=
np
.
float32
)
class_predictions_with_background
=
np
.
ones
(
[
total_num_padded
_proposals
,
num_
classes
+
1
],
dtype
=
np
.
float32
)
images
=
np
.
zeros
(
image_shape
,
dtype
=
np
.
float32
)
if
use_static_shape
s
:
results
=
self
.
execute
(
graph_fn
,
[
images
,
refined_box_encoding
s
,
class_predictions_with_background
,
num_proposals
,
proposal_boxes
])
else
:
results
=
self
.
execute_cpu
(
graph_fn
,
[
images
,
refined_box_encoding
s
,
class_predictions_with_background
,
num_proposals
,
proposal_boxes
])
expected_num_detections
=
[
5
,
4
]
expected_detection_classes
=
[[
0
,
0
,
0
,
1
,
1
],
[
0
,
0
,
1
,
1
,
0
]]
expected_detection_scores
=
[[
1
,
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
,
0
]]
self
.
assertAllClose
(
results
[
0
],
expected_num_detections
)
for
indx
,
num_proposals
in
enumerate
(
expected_
num_detections
)
:
self
.
assertAllClose
(
results
[
2
][
indx
][
0
:
num_proposals
],
expected_detecti
on_s
cores
[
indx
][
0
:
num_proposals
])
self
.
assertAllClose
(
results
[
3
][
indx
][
0
:
num_proposals
],
expected_detection_classes
[
indx
][
0
:
num_proposals
])
if
not
use_static_shapes
:
self
.
assertAllEqual
(
results
[
1
].
shape
,
[
2
,
5
,
4
]
)
def
test_preprocess_preserves_input_shapes
(
self
):
image_shapes
=
[(
3
,
None
,
None
,
3
),
...
...
research/object_detection/meta_architectures/ssd_meta_arch.py
View file @
e00e0e13
...
...
@@ -19,7 +19,6 @@ models.
"""
from
abc
import
abstractmethod
import
re
import
tensorflow
as
tf
from
object_detection.core
import
box_list
...
...
@@ -116,6 +115,25 @@ class SSDFeatureExtractor(object):
"""
raise
NotImplementedError
def
restore_from_classification_checkpoint_fn
(
self
,
feature_extractor_scope
):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
var_name
=
variable
.
op
.
name
if
var_name
.
startswith
(
feature_extractor_scope
+
'/'
):
var_name
=
var_name
.
replace
(
feature_extractor_scope
+
'/'
,
''
)
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
class
SSDKerasFeatureExtractor
(
tf
.
keras
.
Model
):
"""SSD Feature Extractor definition."""
...
...
@@ -218,6 +236,25 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
def
call
(
self
,
inputs
,
**
kwargs
):
return
self
.
_extract_features
(
inputs
)
def
restore_from_classification_checkpoint_fn
(
self
,
feature_extractor_scope
):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
var_name
=
variable
.
op
.
name
if
var_name
.
startswith
(
feature_extractor_scope
+
'/'
):
var_name
=
var_name
.
replace
(
feature_extractor_scope
+
'/'
,
''
)
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
class
SSDMetaArch
(
model
.
DetectionModel
):
"""SSD Meta-architecture definition."""
...
...
@@ -333,13 +370,15 @@ class SSDMetaArch(model.DetectionModel):
# Slim feature extractors get an explicit naming scope
self
.
_extract_features_scope
=
'FeatureExtractor'
# TODO(jonathanhuang): handle agnostic mode
# weights
self
.
_unmatched_class_label
=
tf
.
constant
([
1
]
+
self
.
num_classes
*
[
0
],
tf
.
float32
)
if
encode_background_as_zeros
:
if
self
.
_add_background_class
and
encode_background_as_zeros
:
self
.
_unmatched_class_label
=
tf
.
constant
((
self
.
num_classes
+
1
)
*
[
0
],
tf
.
float32
)
elif
self
.
_add_background_class
:
self
.
_unmatched_class_label
=
tf
.
constant
([
1
]
+
self
.
num_classes
*
[
0
],
tf
.
float32
)
else
:
self
.
_unmatched_class_label
=
tf
.
constant
(
self
.
num_classes
*
[
0
],
tf
.
float32
)
self
.
_target_assigner
=
target_assigner_instance
...
...
@@ -606,14 +645,22 @@ class SSDMetaArch(model.DetectionModel):
detection_boxes
=
tf
.
identity
(
detection_boxes
,
'raw_box_locations'
)
detection_boxes
=
tf
.
expand_dims
(
detection_boxes
,
axis
=
2
)
detection_scores_with_background
=
self
.
_score_conversion_fn
(
class_predictions
)
detection_scores_with_background
=
tf
.
identity
(
detection_scores_with_background
,
'raw_box_scores'
)
detection_scores
=
tf
.
slice
(
detection_scores_with_background
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
detection_scores
=
self
.
_score_conversion_fn
(
class_predictions
)
detection_scores
=
tf
.
identity
(
detection_scores
,
'raw_box_scores'
)
if
self
.
_add_background_class
:
detection_scores
=
tf
.
slice
(
detection_scores
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
additional_fields
=
None
batch_size
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
preprocessed_images
)[
0
])
if
'feature_maps'
in
prediction_dict
:
feature_map_list
=
[]
for
feature_map
in
prediction_dict
[
'feature_maps'
]:
feature_map_list
.
append
(
tf
.
reshape
(
feature_map
,
[
batch_size
,
-
1
]))
box_features
=
tf
.
concat
(
feature_map_list
,
1
)
box_features
=
tf
.
identity
(
box_features
,
'raw_box_features'
)
if
detection_keypoints
is
not
None
:
additional_fields
=
{
fields
.
BoxListFields
.
keypoints
:
detection_keypoints
}
...
...
@@ -683,17 +730,20 @@ class SSDMetaArch(model.DetectionModel):
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
match_list
)
if
self
.
_random_example_sampler
:
batch_cls_per_anchor_weights
=
tf
.
reduce_mean
(
batch_cls_weights
,
axis
=-
1
)
batch_sampled_indicator
=
tf
.
to_float
(
shape_utils
.
static_or_dynamic_map_fn
(
self
.
_minibatch_subsample_fn
,
[
batch_cls_targets
,
batch_cls_weights
],
[
batch_cls_targets
,
batch_cls_
per_anchor_
weights
],
dtype
=
tf
.
bool
,
parallel_iterations
=
self
.
_parallel_iterations
,
back_prop
=
True
))
batch_reg_weights
=
tf
.
multiply
(
batch_sampled_indicator
,
batch_reg_weights
)
batch_cls_weights
=
tf
.
multiply
(
batch_sampled_indicator
,
batch_cls_weights
)
batch_cls_weights
=
tf
.
multiply
(
tf
.
expand_dims
(
batch_sampled_indicator
,
-
1
),
batch_cls_weights
)
losses_mask
=
None
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
...
...
@@ -713,16 +763,32 @@ class SSDMetaArch(model.DetectionModel):
losses_mask
=
losses_mask
)
if
self
.
_expected_classification_loss_under_sampling
:
# Need to compute losses for assigned targets against the
# unmatched_class_label as well as their assigned targets.
# simplest thing (but wasteful) is just to calculate all losses
# twice
batch_size
,
num_anchors
,
num_classes
=
batch_cls_targets
.
get_shape
()
unmatched_targets
=
tf
.
ones
([
batch_size
,
num_anchors
,
1
])
*
self
.
_unmatched_class_label
unmatched_cls_losses
=
self
.
_classification_loss
(
prediction_dict
[
'class_predictions_with_background'
],
unmatched_targets
,
weights
=
batch_cls_weights
,
losses_mask
=
losses_mask
)
if
cls_losses
.
get_shape
().
ndims
==
3
:
batch_size
,
num_anchors
,
num_classes
=
cls_losses
.
get_shape
()
cls_losses
=
tf
.
reshape
(
cls_losses
,
[
batch_size
,
-
1
])
unmatched_cls_losses
=
tf
.
reshape
(
unmatched_cls_losses
,
[
batch_size
,
-
1
])
batch_cls_targets
=
tf
.
reshape
(
batch_cls_targets
,
[
batch_size
,
num_anchors
*
num_classes
,
-
1
])
batch_cls_targets
=
tf
.
concat
(
[
1
-
batch_cls_targets
,
batch_cls_targets
],
axis
=-
1
)
cls_losses
=
self
.
_expected_classification_loss_under_sampling
(
batch_cls_targets
,
cls_losses
)
batch_cls_targets
,
cls_losses
,
unmatched_
cls_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
...
...
@@ -971,6 +1037,26 @@ class SSDMetaArch(model.DetectionModel):
[
combined_shape
[
0
],
combined_shape
[
1
],
4
]))
return
decoded_boxes
,
decoded_keypoints
def
regularization_losses
(
self
):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
losses
=
[]
slim_losses
=
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
# Copy the slim losses to avoid modifying the collection
if
slim_losses
:
losses
.
extend
(
slim_losses
)
if
self
.
_box_predictor
.
is_keras_model
:
losses
.
extend
(
self
.
_box_predictor
.
losses
)
if
self
.
_feature_extractor
.
is_keras_model
:
losses
.
extend
(
self
.
_feature_extractor
.
losses
)
return
losses
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
,
load_all_detection_checkpoint_vars
=
False
):
...
...
@@ -997,18 +1083,44 @@ class SSDMetaArch(model.DetectionModel):
if
fine_tune_checkpoint_type
not
in
[
'detection'
,
'classification'
]:
raise
ValueError
(
'Not supported fine_tune_checkpoint_type: {}'
.
format
(
fine_tune_checkpoint_type
))
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
var_name
=
variable
.
op
.
name
if
(
fine_tune_checkpoint_type
==
'detection'
and
load_all_detection_checkpoint_vars
):
variables_to_restore
[
var_name
]
=
variable
else
:
if
var_name
.
startswith
(
self
.
_extract_features_scope
):
if
fine_tune_checkpoint_type
==
'classification'
:
var_name
=
(
re
.
split
(
'^'
+
self
.
_extract_features_scope
+
'/'
,
var_name
)[
-
1
])
if
fine_tune_checkpoint_type
==
'classification'
:
return
self
.
_feature_extractor
.
restore_from_classification_checkpoint_fn
(
self
.
_extract_features_scope
)
if
fine_tune_checkpoint_type
==
'detection'
:
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
var_name
=
variable
.
op
.
name
if
load_all_detection_checkpoint_vars
:
variables_to_restore
[
var_name
]
=
variable
else
:
if
var_name
.
startswith
(
self
.
_extract_features_scope
):
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
def
updates
(
self
):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
update_ops
=
[]
slim_update_ops
=
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
)
# Copy the slim ops to avoid modifying the collection
if
slim_update_ops
:
update_ops
.
extend
(
slim_update_ops
)
if
self
.
_box_predictor
.
is_keras_model
:
update_ops
.
extend
(
self
.
_box_predictor
.
get_updates_for
(
None
))
update_ops
.
extend
(
self
.
_box_predictor
.
get_updates_for
(
self
.
_box_predictor
.
inputs
))
if
self
.
_feature_extractor
.
is_keras_model
:
update_ops
.
extend
(
self
.
_feature_extractor
.
get_updates_for
(
None
))
update_ops
.
extend
(
self
.
_feature_extractor
.
get_updates_for
(
self
.
_feature_extractor
.
inputs
))
return
update_ops
research/object_detection/meta_architectures/ssd_meta_arch_test.py
View file @
e00e0e13
...
...
@@ -42,7 +42,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
random_example_sampling
=
False
,
weight_regression_loss_by_score
=
False
,
use_expected_classification_loss_under_sampling
=
False
,
min
im
um_negative_sampl
ing
=
1
,
min
_n
um_negative_sampl
es
=
1
,
desired_negative_sampling_ratio
=
3
,
use_keras
=
False
,
predict_mask
=
False
,
...
...
@@ -57,7 +57,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
weight_regression_loss_by_score
=
weight_regression_loss_by_score
,
use_expected_classification_loss_under_sampling
=
use_expected_classification_loss_under_sampling
,
min
im
um_negative_sampl
ing
=
min
im
um_negative_sampl
ing
,
min
_n
um_negative_sampl
es
=
min
_n
um_negative_sampl
es
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
,
use_keras
=
use_keras
,
predict_mask
=
predict_mask
,
...
...
@@ -344,11 +344,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_boxes2
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
0
,
1
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
0
,
1
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
expected_localization_loss
=
0.0
expected_classification_loss
=
(
batch_size
*
num_anchors
*
(
num_classes
+
1
)
*
np
.
log
(
2.0
))
batch_size
*
num_anchors
*
num_classes
*
np
.
log
(
2.0
))
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
graph_fn
,
[
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
...
...
@@ -371,7 +371,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
apply_hard_mining
=
False
,
add_background_class
=
True
,
use_expected_classification_loss_under_sampling
=
True
,
min
im
um_negative_sampl
ing
=
1
,
min
_n
um_negative_sampl
es
=
1
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
)
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
)
...
...
@@ -391,8 +391,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
expected_localization_loss
=
0.0
expected_classification_loss
=
(
batch_size
*
(
desired_negative_sampling_ratio
*
num_anchors
+
num_classes
*
num_anchors
)
*
np
.
log
(
2.0
))
batch_size
*
(
num_anchors
+
num_classes
*
num_anchors
)
*
np
.
log
(
2.0
))
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
graph_fn
,
[
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
...
...
@@ -432,11 +431,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
np
.
float32
)
groundtruth_boxes2
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
0
,
1
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
1
,
0
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
0
]],
dtype
=
np
.
float32
)
expected_localization_loss
=
0.25
expected_classification_loss
=
(
batch_size
*
num_anchors
*
(
num_classes
+
1
)
*
np
.
log
(
2.0
))
batch_size
*
num_anchors
*
num_classes
*
np
.
log
(
2.0
))
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
graph_fn
,
[
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
...
...
research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
View file @
e00e0e13
...
...
@@ -119,7 +119,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
random_example_sampling
=
False
,
weight_regression_loss_by_score
=
False
,
use_expected_classification_loss_under_sampling
=
False
,
min
im
um_negative_sampl
ing
=
1
,
min
_n
um_negative_sampl
es
=
1
,
desired_negative_sampling_ratio
=
3
,
use_keras
=
False
,
predict_mask
=
False
,
...
...
@@ -130,10 +130,12 @@ class SSDMetaArchTestBase(test_case.TestCase):
mock_anchor_generator
=
MockAnchorGenerator2x2
()
if
use_keras
:
mock_box_predictor
=
test_utils
.
MockKerasBoxPredictor
(
is_training
,
num_classes
,
predict_mask
=
predict_mask
)
is_training
,
num_classes
,
add_background_class
=
add_background_class
,
predict_mask
=
predict_mask
)
else
:
mock_box_predictor
=
test_utils
.
MockBoxPredictor
(
is_training
,
num_classes
,
predict_mask
=
predict_mask
)
is_training
,
num_classes
,
add_background_class
=
add_background_class
,
predict_mask
=
predict_mask
)
mock_box_coder
=
test_utils
.
MockBoxCoder
()
if
use_keras
:
fake_feature_extractor
=
FakeSSDKerasFeatureExtractor
()
...
...
@@ -182,7 +184,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
if
use_expected_classification_loss_under_sampling
:
expected_classification_loss_under_sampling
=
functools
.
partial
(
ops
.
expected_classification_loss_under_sampling
,
min
im
um_negative_sampl
ing
=
min
im
um_negative_sampl
ing
,
min
_n
um_negative_sampl
es
=
min
_n
um_negative_sampl
es
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
)
code_size
=
4
...
...
research/object_detection/metrics/coco_evaluation.py
View file @
e00e0e13
...
...
@@ -248,27 +248,30 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes_batched
,
detection_scores_batched
,
detection_classes_batched
,
num_det_boxes_per_image
):
num_det_boxes_per_image
,
is_annotated_batched
):
"""Update operation for adding batch of images to Coco evaluator."""
for
(
image_id
,
gt_box
,
gt_class
,
gt_is_crowd
,
num_gt_box
,
det_box
,
det_score
,
det_class
,
num_det_box
)
in
zip
(
det_score
,
det_class
,
num_det_box
,
is_annotated
)
in
zip
(
image_id_batched
,
groundtruth_boxes_batched
,
groundtruth_classes_batched
,
groundtruth_is_crowd_batched
,
num_gt_boxes_per_image
,
detection_boxes_batched
,
detection_scores_batched
,
detection_classes_batched
,
num_det_boxes_per_image
):
self
.
add_single_ground_truth_image_info
(
image_id
,
{
'groundtruth_boxes'
:
gt_box
[:
num_gt_box
],
'groundtruth_classes'
:
gt_class
[:
num_gt_box
],
'groundtruth_is_crowd'
:
gt_is_crowd
[:
num_gt_box
]
})
self
.
add_single_detected_image_info
(
image_id
,
{
'detection_boxes'
:
det_box
[:
num_det_box
],
'detection_scores'
:
det_score
[:
num_det_box
],
'detection_classes'
:
det_class
[:
num_det_box
]})
detection_classes_batched
,
num_det_boxes_per_image
,
is_annotated_batched
):
if
is_annotated
:
self
.
add_single_ground_truth_image_info
(
image_id
,
{
'groundtruth_boxes'
:
gt_box
[:
num_gt_box
],
'groundtruth_classes'
:
gt_class
[:
num_gt_box
],
'groundtruth_is_crowd'
:
gt_is_crowd
[:
num_gt_box
]
})
self
.
add_single_detected_image_info
(
image_id
,
{
'detection_boxes'
:
det_box
[:
num_det_box
],
'detection_scores'
:
det_score
[:
num_det_box
],
'detection_classes'
:
det_class
[:
num_det_box
]})
# Unpack items from the evaluation dictionary.
input_data_fields
=
standard_fields
.
InputDataFields
...
...
@@ -284,6 +287,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_gt_boxes_per_image
=
eval_dict
.
get
(
'num_groundtruth_boxes_per_image'
,
None
)
num_det_boxes_per_image
=
eval_dict
.
get
(
'num_det_boxes_per_image'
,
None
)
is_annotated
=
eval_dict
.
get
(
'is_annotated'
,
None
)
if
groundtruth_is_crowd
is
None
:
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
...
...
@@ -306,6 +310,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image
=
tf
.
shape
(
detection_boxes
)[
1
:
2
]
else
:
num_det_boxes_per_image
=
tf
.
expand_dims
(
num_det_boxes_per_image
,
0
)
if
is_annotated
is
None
:
is_annotated
=
tf
.
constant
([
True
])
else
:
is_annotated
=
tf
.
expand_dims
(
is_annotated
,
0
)
else
:
if
num_gt_boxes_per_image
is
None
:
num_gt_boxes_per_image
=
tf
.
tile
(
...
...
@@ -315,6 +324,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image
=
tf
.
tile
(
tf
.
shape
(
detection_boxes
)[
1
:
2
],
multiples
=
tf
.
shape
(
detection_boxes
)[
0
:
1
])
if
is_annotated
is
None
:
is_annotated
=
tf
.
ones_like
(
image_id
,
dtype
=
tf
.
bool
)
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
groundtruth_boxes
,
...
...
@@ -324,7 +335,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes
,
detection_scores
,
detection_classes
,
num_det_boxes_per_image
],
[])
num_det_boxes_per_image
,
is_annotated
],
[])
metric_names
=
[
'DetectionBoxes_Precision/mAP'
,
'DetectionBoxes_Precision/mAP@.50IOU'
,
'DetectionBoxes_Precision/mAP@.75IOU'
,
...
...
@@ -581,8 +593,11 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
Args:
eval_dict: A dictionary that holds tensors for evaluating object detection
performance. This dictionary may be produced from
eval_util.result_dict_for_single_example().
performance. For single-image evaluation, this dictionary may be
produced from eval_util.result_dict_for_single_example(). If multi-image
evaluation, `eval_dict` should contain the fields
'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to
properly unpad the tensors from the batch.
Returns:
a dictionary of metric names to tuple of value_op and update_op that can
...
...
@@ -590,27 +605,41 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
update ops must be run together and similarly all value ops must be run
together to guarantee correct behaviour.
"""
def
update_op
(
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
detection_scores
,
detection_classes
,
detection_masks
):
def
update_op
(
image_id_batched
,
groundtruth_boxes_batched
,
groundtruth_classes_batched
,
groundtruth_instance_masks_batched
,
groundtruth_is_crowd_batched
,
num_gt_boxes_per_image
,
detection_scores_batched
,
detection_classes_batched
,
detection_masks_batched
,
num_det_boxes_per_image
):
"""Update op for metrics."""
self
.
add_single_ground_truth_image_info
(
image_id
,
{
'groundtruth_boxes'
:
groundtruth_boxes
,
'groundtruth_classes'
:
groundtruth_classes
,
'groundtruth_instance_masks'
:
groundtruth_instance_masks
,
'groundtruth_is_crowd'
:
groundtruth_is_crowd
})
self
.
add_single_detected_image_info
(
image_id
,
{
'detection_scores'
:
detection_scores
,
'detection_classes'
:
detection_classes
,
'detection_masks'
:
detection_masks
})
for
(
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
num_gt_box
,
detection_scores
,
detection_classes
,
detection_masks
,
num_det_box
)
in
zip
(
image_id_batched
,
groundtruth_boxes_batched
,
groundtruth_classes_batched
,
groundtruth_instance_masks_batched
,
groundtruth_is_crowd_batched
,
num_gt_boxes_per_image
,
detection_scores_batched
,
detection_classes_batched
,
detection_masks_batched
,
num_det_boxes_per_image
):
self
.
add_single_ground_truth_image_info
(
image_id
,
{
'groundtruth_boxes'
:
groundtruth_boxes
[:
num_gt_box
],
'groundtruth_classes'
:
groundtruth_classes
[:
num_gt_box
],
'groundtruth_instance_masks'
:
groundtruth_instance_masks
[:
num_gt_box
],
'groundtruth_is_crowd'
:
groundtruth_is_crowd
[:
num_gt_box
]
})
self
.
add_single_detected_image_info
(
image_id
,
{
'detection_scores'
:
detection_scores
[:
num_det_box
],
'detection_classes'
:
detection_classes
[:
num_det_box
],
'detection_masks'
:
detection_masks
[:
num_det_box
]
})
# Unpack items from the evaluation dictionary.
input_data_fields
=
standard_fields
.
InputDataFields
...
...
@@ -622,20 +651,54 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
input_data_fields
.
groundtruth_instance_masks
]
groundtruth_is_crowd
=
eval_dict
.
get
(
input_data_fields
.
groundtruth_is_crowd
,
None
)
num_gt_boxes_per_image
=
eval_dict
.
get
(
input_data_fields
.
num_groundtruth_boxes
,
None
)
detection_scores
=
eval_dict
[
detection_fields
.
detection_scores
]
detection_classes
=
eval_dict
[
detection_fields
.
detection_classes
]
detection_masks
=
eval_dict
[
detection_fields
.
detection_masks
]
num_det_boxes_per_image
=
eval_dict
.
get
(
detection_fields
.
num_detections
,
None
)
if
groundtruth_is_crowd
is
None
:
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
detection_scores
,
detection_classes
,
detection_masks
],
[])
if
not
image_id
.
shape
.
as_list
():
# Apply a batch dimension to all tensors.
image_id
=
tf
.
expand_dims
(
image_id
,
0
)
groundtruth_boxes
=
tf
.
expand_dims
(
groundtruth_boxes
,
0
)
groundtruth_classes
=
tf
.
expand_dims
(
groundtruth_classes
,
0
)
groundtruth_instance_masks
=
tf
.
expand_dims
(
groundtruth_instance_masks
,
0
)
groundtruth_is_crowd
=
tf
.
expand_dims
(
groundtruth_is_crowd
,
0
)
detection_scores
=
tf
.
expand_dims
(
detection_scores
,
0
)
detection_classes
=
tf
.
expand_dims
(
detection_classes
,
0
)
detection_masks
=
tf
.
expand_dims
(
detection_masks
,
0
)
if
num_gt_boxes_per_image
is
None
:
num_gt_boxes_per_image
=
tf
.
shape
(
groundtruth_boxes
)[
1
:
2
]
else
:
num_gt_boxes_per_image
=
tf
.
expand_dims
(
num_gt_boxes_per_image
,
0
)
if
num_det_boxes_per_image
is
None
:
num_det_boxes_per_image
=
tf
.
shape
(
detection_scores
)[
1
:
2
]
else
:
num_det_boxes_per_image
=
tf
.
expand_dims
(
num_det_boxes_per_image
,
0
)
else
:
if
num_gt_boxes_per_image
is
None
:
num_gt_boxes_per_image
=
tf
.
tile
(
tf
.
shape
(
groundtruth_boxes
)[
1
:
2
],
multiples
=
tf
.
shape
(
groundtruth_boxes
)[
0
:
1
])
if
num_det_boxes_per_image
is
None
:
num_det_boxes_per_image
=
tf
.
tile
(
tf
.
shape
(
detection_scores
)[
1
:
2
],
multiples
=
tf
.
shape
(
detection_scores
)[
0
:
1
])
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
num_gt_boxes_per_image
,
detection_scores
,
detection_classes
,
detection_masks
,
num_det_boxes_per_image
],
[])
metric_names
=
[
'DetectionMasks_Precision/mAP'
,
'DetectionMasks_Precision/mAP@.50IOU'
,
'DetectionMasks_Precision/mAP@.75IOU'
,
...
...
research/object_detection/metrics/coco_evaluation_test.py
View file @
e00e0e13
...
...
@@ -308,6 +308,99 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated
(
self
):
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
_get_categories_list
())
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
is_annotated
=
tf
.
placeholder
(
tf
.
bool
,
shape
=
())
detection_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
detection_scores
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
input_data_fields
=
standard_fields
.
InputDataFields
detection_fields
=
standard_fields
.
DetectionResultFields
eval_dict
=
{
input_data_fields
.
key
:
image_id
,
input_data_fields
.
groundtruth_boxes
:
groundtruth_boxes
,
input_data_fields
.
groundtruth_classes
:
groundtruth_classes
,
'is_annotated'
:
is_annotated
,
detection_fields
.
detection_boxes
:
detection_boxes
,
detection_fields
.
detection_scores
:
detection_scores
,
detection_fields
.
detection_classes
:
detection_classes
}
eval_metric_ops
=
coco_evaluator
.
get_estimator_eval_metric_ops
(
eval_dict
)
_
,
update_op
=
eval_metric_ops
[
'DetectionBoxes_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image1'
,
groundtruth_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
]]),
groundtruth_classes
:
np
.
array
([
1
]),
is_annotated
:
True
,
detection_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
]]),
detection_scores
:
np
.
array
([.
8
]),
detection_classes
:
np
.
array
([
1
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image2'
,
groundtruth_boxes
:
np
.
array
([[
50.
,
50.
,
100.
,
100.
]]),
groundtruth_classes
:
np
.
array
([
3
]),
is_annotated
:
True
,
detection_boxes
:
np
.
array
([[
50.
,
50.
,
100.
,
100.
]]),
detection_scores
:
np
.
array
([.
7
]),
detection_classes
:
np
.
array
([
3
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image3'
,
groundtruth_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
]]),
groundtruth_classes
:
np
.
array
([
2
]),
is_annotated
:
True
,
detection_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
]]),
detection_scores
:
np
.
array
([.
9
]),
detection_classes
:
np
.
array
([
2
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image4'
,
groundtruth_boxes
:
np
.
zeros
((
0
,
4
)),
groundtruth_classes
:
np
.
zeros
((
0
)),
is_annotated
:
False
,
# Note that this image isn't annotated.
detection_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
],
[
25.
,
25.
,
70.
,
50.
],
[
25.
,
25.
,
80.
,
50.
],
[
25.
,
25.
,
90.
,
50.
]]),
detection_scores
:
np
.
array
([
0.6
,
0.7
,
0.8
,
0.9
]),
detection_classes
:
np
.
array
([
1
,
2
,
2
,
3
])
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.50IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded
(
self
):
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
_get_categories_list
())
...
...
@@ -665,22 +758,40 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
_
,
update_op
=
eval_metric_ops
[
'DetectionMasks_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image1'
,
groundtruth_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
]]),
groundtruth_classes
:
np
.
array
([
1
]),
groundtruth_masks
:
np
.
pad
(
np
.
ones
([
1
,
100
,
100
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
10
,
10
),
(
10
,
10
)),
mode
=
'constant'
),
detection_scores
:
np
.
array
([.
8
]),
detection_classes
:
np
.
array
([
1
]),
detection_masks
:
np
.
pad
(
np
.
ones
([
1
,
100
,
100
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
10
,
10
),
(
10
,
10
)),
mode
=
'constant'
)
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image1'
,
groundtruth_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
],
[
50.
,
50.
,
100.
,
100.
]]),
groundtruth_classes
:
np
.
array
([
1
,
2
]),
groundtruth_masks
:
np
.
stack
([
np
.
pad
(
np
.
ones
([
100
,
100
],
dtype
=
np
.
uint8
),
((
10
,
10
),
(
10
,
10
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
50
,
50
],
dtype
=
np
.
uint8
),
((
0
,
70
),
(
0
,
70
)),
mode
=
'constant'
)
]),
detection_scores
:
np
.
array
([.
9
,
.
8
]),
detection_classes
:
np
.
array
([
2
,
1
]),
detection_masks
:
np
.
stack
([
np
.
pad
(
np
.
ones
([
50
,
50
],
dtype
=
np
.
uint8
),
((
0
,
70
),
(
0
,
70
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
100
,
100
],
dtype
=
np
.
uint8
),
((
10
,
10
),
(
10
,
10
)),
mode
=
'constant'
),
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image2'
,
...
...
@@ -735,6 +846,106 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertFalse
(
coco_evaluator
.
_image_id_to_mask_shape_map
)
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched
(
self
):
coco_evaluator
=
coco_evaluation
.
CocoMaskEvaluator
(
_get_categories_list
())
batch_size
=
3
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
groundtruth_masks
=
tf
.
placeholder
(
tf
.
uint8
,
shape
=
(
batch_size
,
None
,
None
,
None
))
detection_scores
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
detection_masks
=
tf
.
placeholder
(
tf
.
uint8
,
shape
=
(
batch_size
,
None
,
None
,
None
))
input_data_fields
=
standard_fields
.
InputDataFields
detection_fields
=
standard_fields
.
DetectionResultFields
eval_dict
=
{
input_data_fields
.
key
:
image_id
,
input_data_fields
.
groundtruth_boxes
:
groundtruth_boxes
,
input_data_fields
.
groundtruth_classes
:
groundtruth_classes
,
input_data_fields
.
groundtruth_instance_masks
:
groundtruth_masks
,
detection_fields
.
detection_scores
:
detection_scores
,
detection_fields
.
detection_classes
:
detection_classes
,
detection_fields
.
detection_masks
:
detection_masks
,
}
eval_metric_ops
=
coco_evaluator
.
get_estimator_eval_metric_ops
(
eval_dict
)
_
,
update_op
=
eval_metric_ops
[
'DetectionMasks_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
[
'image1'
,
'image2'
,
'image3'
],
groundtruth_boxes
:
np
.
array
([[[
100.
,
100.
,
200.
,
200.
]],
[[
50.
,
50.
,
100.
,
100.
]],
[[
25.
,
25.
,
50.
,
50.
]]]),
groundtruth_classes
:
np
.
array
([[
1
],
[
1
],
[
1
]]),
groundtruth_masks
:
np
.
stack
([
np
.
pad
(
np
.
ones
([
1
,
100
,
100
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
0
,
0
),
(
0
,
0
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
1
,
50
,
50
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
25
,
25
),
(
25
,
25
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
1
,
25
,
25
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
37
,
38
),
(
37
,
38
)),
mode
=
'constant'
)
],
axis
=
0
),
detection_scores
:
np
.
array
([[.
8
],
[.
8
],
[.
8
]]),
detection_classes
:
np
.
array
([[
1
],
[
1
],
[
1
]]),
detection_masks
:
np
.
stack
([
np
.
pad
(
np
.
ones
([
1
,
100
,
100
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
0
,
0
),
(
0
,
0
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
1
,
50
,
50
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
25
,
25
),
(
25
,
25
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
1
,
25
,
25
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
37
,
38
),
(
37
,
38
)),
mode
=
'constant'
)
],
axis
=
0
)
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP@.50IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP (medium)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@100 (medium)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids_with_detections
)
self
.
assertFalse
(
coco_evaluator
.
_image_id_to_mask_shape_map
)
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/model_lib.py
View file @
e00e0e13
...
...
@@ -25,6 +25,7 @@ import os
import
tensorflow
as
tf
from
object_detection
import
eval_util
from
object_detection
import
exporter
as
exporter_lib
from
object_detection
import
inputs
from
object_detection.builders
import
graph_rewriter_builder
from
object_detection.builders
import
model_builder
...
...
@@ -306,8 +307,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
prediction_dict
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
losses
=
[
loss_tensor
for
loss_tensor
in
losses_dict
.
values
()]
if
train_config
.
add_regularization_loss
:
regularization_losses
=
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
regularization_losses
=
detection_model
.
regularization_losses
()
if
regularization_losses
:
regularization_loss
=
tf
.
add_n
(
regularization_losses
,
name
=
'regularization_loss'
)
...
...
@@ -353,20 +353,24 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
for
var
in
optimizer_summary_vars
:
tf
.
summary
.
scalar
(
var
.
op
.
name
,
var
)
summaries
=
[]
if
use_tpu
else
None
if
train_config
.
summarize_gradients
:
summaries
=
[
'gradients'
,
'gradient_norm'
,
'global_gradient_norm'
]
train_op
=
tf
.
contrib
.
layers
.
optimize_loss
(
loss
=
total_loss
,
global_step
=
global_step
,
learning_rate
=
None
,
clip_gradients
=
clip_gradients_value
,
optimizer
=
training_optimizer
,
update_ops
=
detection_model
.
updates
(),
variables
=
trainable_variables
,
summaries
=
summaries
,
name
=
''
)
# Preventing scope prefix on all variables.
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
exported_output
=
exporter_lib
.
add_output_tensor_nodes
(
detections
)
export_outputs
=
{
tf
.
saved_model
.
signature_constants
.
PREDICT_METHOD_NAME
:
tf
.
estimator
.
export
.
PredictOutput
(
detections
)
tf
.
estimator
.
export
.
PredictOutput
(
exported_output
)
}
eval_metric_ops
=
None
...
...
@@ -456,6 +460,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
def
create_estimator_and_inputs
(
run_config
,
hparams
,
pipeline_config_path
,
config_override
=
None
,
train_steps
=
None
,
sample_1_of_n_eval_examples
=
1
,
sample_1_of_n_eval_on_train_examples
=
1
,
...
...
@@ -465,6 +470,7 @@ def create_estimator_and_inputs(run_config,
num_shards
=
1
,
params
=
None
,
override_eval_num_epochs
=
True
,
save_final_config
=
False
,
**
kwargs
):
"""Creates `Estimator`, input functions, and steps.
...
...
@@ -472,6 +478,8 @@ def create_estimator_and_inputs(run_config,
run_config: A `RunConfig`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
sample_1_of_n_eval_examples: Integer representing how often an eval example
...
...
@@ -499,6 +507,8 @@ def create_estimator_and_inputs(run_config,
`use_tpu_estimator` is True.
override_eval_num_epochs: Whether to overwrite the number of epochs to
1 for eval_input.
save_final_config: Whether to save final config (obtained after applying
overrides) to `estimator.model_dir`.
**kwargs: Additional keyword arguments for configuration override.
Returns:
...
...
@@ -522,7 +532,8 @@ def create_estimator_and_inputs(run_config,
create_eval_input_fn
=
MODEL_BUILD_UTIL_MAP
[
'create_eval_input_fn'
]
create_predict_input_fn
=
MODEL_BUILD_UTIL_MAP
[
'create_predict_input_fn'
]
configs
=
get_configs_from_pipeline_file
(
pipeline_config_path
)
configs
=
get_configs_from_pipeline_file
(
pipeline_config_path
,
config_override
=
config_override
)
kwargs
.
update
({
'train_steps'
:
train_steps
,
'sample_1_of_n_eval_examples'
:
sample_1_of_n_eval_examples
...
...
@@ -595,7 +606,7 @@ def create_estimator_and_inputs(run_config,
estimator
=
tf
.
estimator
.
Estimator
(
model_fn
=
model_fn
,
config
=
run_config
)
# Write the as-run pipeline config to disk.
if
run_config
.
is_chief
:
if
run_config
.
is_chief
and
save_final_config
:
pipeline_config_final
=
create_pipeline_proto_from_configs
(
configs
)
config_util
.
save_pipeline_config
(
pipeline_config_final
,
estimator
.
model_dir
)
...
...
@@ -641,11 +652,17 @@ def create_train_and_eval_specs(train_input_fn,
input_fn
=
train_input_fn
,
max_steps
=
train_steps
)
if
eval_spec_names
is
None
:
eval_spec_names
=
[
str
(
i
)
for
i
in
range
(
len
(
eval_input_fns
))
]
eval_spec_names
=
[
str
(
i
)
for
i
in
range
(
len
(
eval_input_fns
))]
eval_specs
=
[]
for
eval_spec_name
,
eval_input_fn
in
zip
(
eval_spec_names
,
eval_input_fns
):
exporter_name
=
'{}_{}'
.
format
(
final_exporter_name
,
eval_spec_name
)
for
index
,
(
eval_spec_name
,
eval_input_fn
)
in
enumerate
(
zip
(
eval_spec_names
,
eval_input_fns
)):
# Uses final_exporter_name as exporter_name for the first eval spec for
# backward compatibility.
if
index
==
0
:
exporter_name
=
final_exporter_name
else
:
exporter_name
=
'{}_{}'
.
format
(
final_exporter_name
,
eval_spec_name
)
exporter
=
tf
.
estimator
.
FinalExporter
(
name
=
exporter_name
,
serving_input_receiver_fn
=
predict_input_fn
)
eval_specs
.
append
(
...
...
@@ -747,6 +764,7 @@ def populate_experiment(run_config,
train_steps
=
train_steps
,
eval_steps
=
eval_steps
,
model_fn_creator
=
model_fn_creator
,
save_final_config
=
True
,
**
kwargs
)
estimator
=
train_and_eval_dict
[
'estimator'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
...
...
research/object_detection/model_lib_test.py
View file @
e00e0e13
...
...
@@ -310,7 +310,7 @@ class ModelLibTest(tf.test.TestCase):
self
.
assertEqual
(
2
,
len
(
eval_specs
))
self
.
assertEqual
(
None
,
eval_specs
[
0
].
steps
)
self
.
assertEqual
(
'holdout'
,
eval_specs
[
0
].
name
)
self
.
assertEqual
(
'exporter
_holdout
'
,
eval_specs
[
0
].
exporters
[
0
].
name
)
self
.
assertEqual
(
'exporter'
,
eval_specs
[
0
].
exporters
[
0
].
name
)
self
.
assertEqual
(
None
,
eval_specs
[
1
].
steps
)
self
.
assertEqual
(
'eval_on_train'
,
eval_specs
[
1
].
name
)
...
...
research/object_detection/model_tpu_main.py
View file @
e00e0e13
...
...
@@ -114,6 +114,7 @@ def main(unused_argv):
use_tpu_estimator
=
True
,
use_tpu
=
FLAGS
.
use_tpu
,
num_shards
=
FLAGS
.
num_shards
,
save_final_config
=
FLAGS
.
mode
==
'train'
,
**
kwargs
)
estimator
=
train_and_eval_dict
[
'estimator'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
...
...
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
View file @
e00e0e13
...
...
@@ -72,6 +72,8 @@ class FasterRCNNResnetV1FeatureExtractor(
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
...
...
@@ -82,8 +84,11 @@ class FasterRCNNResnetV1FeatureExtractor(
tensor representing a batch of images.
"""
channel_means
=
[
123.68
,
116.779
,
103.939
]
return
resized_inputs
-
[[
channel_means
]]
if
resized_inputs
.
shape
.
as_list
()[
3
]
==
3
:
channel_means
=
[
123.68
,
116.779
,
103.939
]
return
resized_inputs
-
[[
channel_means
]]
else
:
return
resized_inputs
def
_extract_proposal_features
(
self
,
preprocessed_inputs
,
scope
):
"""Extracts first stage RPN features.
...
...
research/object_detection/models/feature_map_generators.py
View file @
e00e0e13
...
...
@@ -146,7 +146,6 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
use_depthwise
=
feature_map_layout
[
'use_depthwise'
]
for
index
,
from_layer
in
enumerate
(
feature_map_layout
[
'from_layer'
]):
net
=
[]
self
.
convolutions
.
append
(
net
)
layer_depth
=
feature_map_layout
[
'layer_depth'
][
index
]
conv_kernel_size
=
3
if
'conv_kernel_size'
in
feature_map_layout
:
...
...
@@ -231,6 +230,10 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
conv_hyperparams
.
build_activation_layer
(
name
=
layer_name
))
# Until certain bugs are fixed in checkpointable lists,
# this net must be appended only once it's been filled with layers
self
.
convolutions
.
append
(
net
)
def
call
(
self
,
image_features
):
"""Generate the multi-resolution feature maps.
...
...
@@ -263,7 +266,8 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
def
multi_resolution_feature_maps
(
feature_map_layout
,
depth_multiplier
,
min_depth
,
insert_1x1_conv
,
image_features
):
min_depth
,
insert_1x1_conv
,
image_features
,
pool_residual
=
False
):
"""Generates multi resolution feature maps from input image features.
Generates multi-scale feature maps for detection as in the SSD papers by
...
...
@@ -317,6 +321,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
should be inserted before shrinking the feature map.
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
pool_residual: Whether to add an average pooling layer followed by a
residual connection between subsequent feature maps when the channel
depth match. For example, with option 'layer_depth': [-1, 512, 256, 256],
a pooling and residual layer is added between the third and forth feature
map. This option is better used with Weight Shared Convolution Box
Predictor when all feature maps have the same channel depth to encourage
more consistent features across multi-scale feature maps.
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
...
...
@@ -350,6 +361,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys
.
append
(
from_layer
)
else
:
pre_layer
=
feature_maps
[
-
1
]
pre_layer_depth
=
pre_layer
.
get_shape
().
as_list
()[
3
]
intermediate_layer
=
pre_layer
if
insert_1x1_conv
:
layer_name
=
'{}_1_Conv2d_{}_1x1_{}'
.
format
(
...
...
@@ -383,6 +395,12 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
padding
=
'SAME'
,
stride
=
1
,
scope
=
layer_name
)
if
pool_residual
and
pre_layer_depth
==
depth_fn
(
layer_depth
):
feature_map
+=
slim
.
avg_pool2d
(
pre_layer
,
[
3
,
3
],
padding
=
'SAME'
,
stride
=
2
,
scope
=
layer_name
+
'_pool'
)
else
:
feature_map
=
slim
.
conv2d
(
intermediate_layer
,
...
...
@@ -399,6 +417,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
def
fpn_top_down_feature_maps
(
image_features
,
depth
,
use_depthwise
=
False
,
use_explicit_padding
=
False
,
scope
=
None
):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
...
...
@@ -409,7 +428,9 @@ def fpn_top_down_feature_maps(image_features,
Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2.
depth: depth of output feature maps.
use_depthwise: use depthwise separable conv instead of regular conv.
use_depthwise: whether to use depthwise separable conv instead of regular
conv.
use_explicit_padding: whether to use explicit padding.
scope: A scope name to wrap this op under.
Returns:
...
...
@@ -420,8 +441,10 @@ def fpn_top_down_feature_maps(image_features,
num_levels
=
len
(
image_features
)
output_feature_maps_list
=
[]
output_feature_map_keys
=
[]
padding
=
'VALID'
if
use_explicit_padding
else
'SAME'
kernel_size
=
3
with
slim
.
arg_scope
(
[
slim
.
conv2d
,
slim
.
separable_conv2d
],
padding
=
'SAME'
,
stride
=
1
):
[
slim
.
conv2d
,
slim
.
separable_conv2d
],
padding
=
padding
,
stride
=
1
):
top_down
=
slim
.
conv2d
(
image_features
[
-
1
][
1
],
depth
,
[
1
,
1
],
activation_fn
=
None
,
normalizer_fn
=
None
,
...
...
@@ -436,14 +459,20 @@ def fpn_top_down_feature_maps(image_features,
image_features
[
level
][
1
],
depth
,
[
1
,
1
],
activation_fn
=
None
,
normalizer_fn
=
None
,
scope
=
'projection_%d'
%
(
level
+
1
))
if
use_explicit_padding
:
# slice top_down to the same shape as residual
residual_shape
=
tf
.
shape
(
residual
)
top_down
=
top_down
[:,
:
residual_shape
[
1
],
:
residual_shape
[
2
],
:]
top_down
+=
residual
if
use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
if
use_explicit_padding
:
top_down
=
ops
.
fixed_padding
(
top_down
,
kernel_size
)
output_feature_maps_list
.
append
(
conv_op
(
top_down
,
depth
,
[
3
,
3
],
depth
,
[
kernel_size
,
kernel_size
],
scope
=
'smoothing_%d'
%
(
level
+
1
)))
output_feature_map_keys
.
append
(
'top_down_%s'
%
image_features
[
level
][
0
])
return
collections
.
OrderedDict
(
reversed
(
...
...
research/object_detection/models/feature_map_generators_test.py
View file @
e00e0e13
...
...
@@ -45,6 +45,11 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
'conv_kernel_size'
:
[
-
1
,
-
1
,
3
,
3
,
2
],
}
SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT
=
{
'from_layer'
:
[
'Conv2d_13_pointwise'
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
256
,
256
,
256
],
}
@
parameterized
.
parameters
(
{
'use_keras'
:
False
},
...
...
@@ -67,7 +72,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
_build_feature_map_generator
(
self
,
feature_map_layout
,
use_keras
):
def
_build_feature_map_generator
(
self
,
feature_map_layout
,
use_keras
,
pool_residual
=
False
):
if
use_keras
:
return
feature_map_generators
.
KerasMultiResolutionFeatureMaps
(
feature_map_layout
=
feature_map_layout
,
...
...
@@ -86,7 +92,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
depth_multiplier
=
1
,
min_depth
=
32
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
image_features
=
image_features
,
pool_residual
=
pool_residual
)
return
feature_map_generator
def
test_get_expected_feature_map_shapes_with_inception_v2
(
self
,
use_keras
):
...
...
@@ -209,6 +216,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
def
test_feature_map_shapes_with_pool_residual_ssd_mobilenet_v1
(
self
,
use_keras
):
image_features
=
{
'Conv2d_13_pointwise'
:
tf
.
random_uniform
([
4
,
8
,
8
,
1024
],
dtype
=
tf
.
float32
),
}
feature_map_generator
=
self
.
_build_feature_map_generator
(
feature_map_layout
=
SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT
,
use_keras
=
use_keras
,
pool_residual
=
True
)
feature_maps
=
feature_map_generator
(
image_features
)
expected_feature_map_shapes
=
{
'Conv2d_13_pointwise'
:
(
4
,
8
,
8
,
1024
),
'Conv2d_13_pointwise_2_Conv2d_1_3x3_s2_256'
:
(
4
,
4
,
4
,
256
),
'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_256'
:
(
4
,
2
,
2
,
256
),
'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256'
:
(
4
,
1
,
1
,
256
)}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
def
test_get_expected_variable_names_with_inception_v2
(
self
,
use_keras
):
image_features
=
{
'Mixed_3c'
:
tf
.
random_uniform
([
4
,
28
,
28
,
256
],
dtype
=
tf
.
float32
),
...
...
research/object_detection/models/keras_applications/mobilenet_v2.py
View file @
e00e0e13
...
...
@@ -82,6 +82,8 @@ class _LayersOverride(object):
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_use_explicit_padding
=
use_explicit_padding
self
.
_min_depth
=
min_depth
self
.
regularizer
=
tf
.
keras
.
regularizers
.
l2
(
0.00004
*
0.5
)
self
.
initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
0.09
)
def
_FixedPaddingLayer
(
self
,
kernel_size
):
return
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
ops
.
fixed_padding
(
x
,
kernel_size
))
...
...
@@ -114,6 +116,9 @@ class _LayersOverride(object):
if
self
.
_conv_hyperparams
:
kwargs
=
self
.
_conv_hyperparams
.
params
(
**
kwargs
)
else
:
kwargs
[
'kernel_regularizer'
]
=
self
.
regularizer
kwargs
[
'kernel_initializer'
]
=
self
.
initializer
kwargs
[
'padding'
]
=
'same'
kernel_size
=
kwargs
.
get
(
'kernel_size'
)
...
...
@@ -144,6 +149,8 @@ class _LayersOverride(object):
"""
if
self
.
_conv_hyperparams
:
kwargs
=
self
.
_conv_hyperparams
.
params
(
**
kwargs
)
else
:
kwargs
[
'depthwise_initializer'
]
=
self
.
initializer
kwargs
[
'padding'
]
=
'same'
kernel_size
=
kwargs
.
get
(
'kernel_size'
)
...
...
research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
View file @
e00e0e13
...
...
@@ -31,11 +31,10 @@ slim = tf.contrib.slim
# A modified config of mobilenet v1 that makes it more detection friendly,
def
_create_modified_mobilenet_config
():
conv_defs
=
copy
.
copy
(
mobilenet_v1
.
MOBILENETV1_CONV_DEFS
)
conv_defs
=
copy
.
deep
copy
(
mobilenet_v1
.
MOBILENETV1_CONV_DEFS
)
conv_defs
[
-
2
]
=
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
512
)
conv_defs
[
-
1
]
=
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
256
)
return
conv_defs
_CONV_DEFS
=
_create_modified_mobilenet_config
()
class
SSDMobileNetV1FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
...
...
@@ -98,6 +97,9 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_additional_layer_depth
=
additional_layer_depth
self
.
_conv_defs
=
None
if
self
.
_use_depthwise
:
self
.
_conv_defs
=
_create_modified_mobilenet_config
()
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
...
...
@@ -141,7 +143,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
final_endpoint
=
'Conv2d_13_pointwise'
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
conv_defs
=
_CONV_DEFS
if
self
.
_use_depthwise
else
None
,
conv_defs
=
self
.
_conv_defs
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
scope
=
scope
)
...
...
@@ -159,7 +161,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
depth
=
depth_fn
(
self
.
_additional_layer_depth
),
use_depthwise
=
self
.
_use_depthwise
)
use_depthwise
=
self
.
_use_depthwise
,
use_explicit_padding
=
self
.
_use_explicit_padding
)
feature_maps
=
[]
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
...
...
@@ -167,18 +170,23 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
last_feature_map
=
fpn_features
[
'top_down_{}'
.
format
(
feature_blocks
[
base_fpn_max_level
-
2
])]
# Construct coarse features
padding
=
'VALID'
if
self
.
_use_explicit_padding
else
'SAME'
kernel_size
=
3
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
if
self
.
_use_explicit_padding
:
last_feature_map
=
ops
.
fixed_padding
(
last_feature_map
,
kernel_size
)
last_feature_map
=
conv_op
(
last_feature_map
,
num_outputs
=
depth_fn
(
self
.
_additional_layer_depth
),
kernel_size
=
[
3
,
3
],
kernel_size
=
[
kernel_size
,
kernel_size
],
stride
=
2
,
padding
=
'SAME'
,
padding
=
padding
,
scope
=
'bottom_up_Conv2d_{}'
.
format
(
i
-
base_fpn_max_level
+
13
))
feature_maps
.
append
(
last_feature_map
)
return
feature_maps
research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
View file @
e00e0e13
...
...
@@ -30,17 +30,14 @@ from nets.mobilenet import mobilenet_v2
slim
=
tf
.
contrib
.
slim
# A modified config of mobilenet v2 that makes it more detection friendly
,
# A modified config of mobilenet v2 that makes it more detection friendly
.
def
_create_modified_mobilenet_config
():
conv_defs
=
copy
.
copy
(
mobilenet_v2
.
V2_DEF
)
conv_defs
=
copy
.
deep
copy
(
mobilenet_v2
.
V2_DEF
)
conv_defs
[
'spec'
][
-
1
]
=
mobilenet
.
op
(
slim
.
conv2d
,
stride
=
1
,
kernel_size
=
[
1
,
1
],
num_outputs
=
256
)
return
conv_defs
_CONV_DEFS
=
_create_modified_mobilenet_config
()
class
SSDMobileNetV2FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
"""SSD Feature Extractor using MobilenetV2 FPN features."""
...
...
@@ -100,6 +97,9 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_additional_layer_depth
=
additional_layer_depth
self
.
_conv_defs
=
None
if
self
.
_use_depthwise
:
self
.
_conv_defs
=
_create_modified_mobilenet_config
()
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
...
...
@@ -142,7 +142,7 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'layer_19'
,
depth_multiplier
=
self
.
_depth_multiplier
,
conv_defs
=
_CONV_DEFS
if
self
.
_use_depthwise
else
None
,
conv_defs
=
self
.
_conv_defs
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
scope
=
scope
)
depth_fn
=
lambda
d
:
max
(
int
(
d
*
self
.
_depth_multiplier
),
self
.
_min_depth
)
...
...
@@ -158,7 +158,8 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
depth
=
depth_fn
(
self
.
_additional_layer_depth
),
use_depthwise
=
self
.
_use_depthwise
)
use_depthwise
=
self
.
_use_depthwise
,
use_explicit_padding
=
self
.
_use_explicit_padding
)
feature_maps
=
[]
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
...
...
@@ -166,18 +167,23 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
last_feature_map
=
fpn_features
[
'top_down_{}'
.
format
(
feature_blocks
[
base_fpn_max_level
-
2
])]
# Construct coarse features
padding
=
'VALID'
if
self
.
_use_explicit_padding
else
'SAME'
kernel_size
=
3
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
conv_op
=
slim
.
conv2d
if
self
.
_use_explicit_padding
:
last_feature_map
=
ops
.
fixed_padding
(
last_feature_map
,
kernel_size
)
last_feature_map
=
conv_op
(
last_feature_map
,
num_outputs
=
depth_fn
(
self
.
_additional_layer_depth
),
kernel_size
=
[
3
,
3
],
kernel_size
=
[
kernel_size
,
kernel_size
],
stride
=
2
,
padding
=
'SAME'
,
padding
=
padding
,
scope
=
'bottom_up_Conv2d_{}'
.
format
(
i
-
base_fpn_max_level
+
19
))
feature_maps
.
append
(
last_feature_map
)
return
feature_maps
Prev
1
…
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment