Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e00e0e13
Commit
e00e0e13
authored
Dec 03, 2018
by
dreamdragon
Browse files
Merge remote-tracking branch 'upstream/master'
parents
b915db4e
402b561b
Changes
205
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1040 additions
and
356 deletions
+1040
-356
research/object_detection/inputs.py
research/object_detection/inputs.py
+5
-3
research/object_detection/inputs_test.py
research/object_detection/inputs_test.py
+3
-0
research/object_detection/legacy/trainer_test.py
research/object_detection/legacy/trainer_test.py
+23
-0
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+97
-28
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
...etection/meta_architectures/faster_rcnn_meta_arch_test.py
+4
-1
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+275
-187
research/object_detection/meta_architectures/ssd_meta_arch.py
...arch/object_detection/meta_architectures/ssd_meta_arch.py
+140
-28
research/object_detection/meta_architectures/ssd_meta_arch_test.py
...object_detection/meta_architectures/ssd_meta_arch_test.py
+10
-11
research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
...ct_detection/meta_architectures/ssd_meta_arch_test_lib.py
+6
-4
research/object_detection/metrics/coco_evaluation.py
research/object_detection/metrics/coco_evaluation.py
+108
-45
research/object_detection/metrics/coco_evaluation_test.py
research/object_detection/metrics/coco_evaluation_test.py
+227
-16
research/object_detection/model_lib.py
research/object_detection/model_lib.py
+26
-8
research/object_detection/model_lib_test.py
research/object_detection/model_lib_test.py
+1
-1
research/object_detection/model_tpu_main.py
research/object_detection/model_tpu_main.py
+1
-0
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
...tection/models/faster_rcnn_resnet_v1_feature_extractor.py
+7
-2
research/object_detection/models/feature_map_generators.py
research/object_detection/models/feature_map_generators.py
+34
-5
research/object_detection/models/feature_map_generators_test.py
...ch/object_detection/models/feature_map_generators_test.py
+37
-2
research/object_detection/models/keras_applications/mobilenet_v2.py
...bject_detection/models/keras_applications/mobilenet_v2.py
+7
-0
research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
...etection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
+14
-6
research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
...etection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
+15
-9
No files found.
research/object_detection/inputs.py
View file @
e00e0e13
...
@@ -124,6 +124,8 @@ def transform_input_data(tensor_dict,
...
@@ -124,6 +124,8 @@ def transform_input_data(tensor_dict,
if
fields
.
InputDataFields
.
groundtruth_instance_masks
in
tensor_dict
:
if
fields
.
InputDataFields
.
groundtruth_instance_masks
in
tensor_dict
:
masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
_
,
resized_masks
,
_
=
image_resizer_fn
(
image
,
masks
)
_
,
resized_masks
,
_
=
image_resizer_fn
(
image
,
masks
)
if
use_bfloat16
:
resized_masks
=
tf
.
cast
(
resized_masks
,
tf
.
bfloat16
)
tensor_dict
[
fields
.
InputDataFields
.
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
resized_masks
groundtruth_instance_masks
]
=
resized_masks
...
@@ -161,6 +163,9 @@ def transform_input_data(tensor_dict,
...
@@ -161,6 +163,9 @@ def transform_input_data(tensor_dict,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
merged_classes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
merged_classes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_confidences
]
=
(
merged_confidences
)
merged_confidences
)
if
fields
.
InputDataFields
.
groundtruth_boxes
in
tensor_dict
:
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]
return
tensor_dict
return
tensor_dict
...
@@ -282,12 +287,9 @@ def augment_input_data(tensor_dict, data_augmentation_options):
...
@@ -282,12 +287,9 @@ def augment_input_data(tensor_dict, data_augmentation_options):
in
tensor_dict
)
in
tensor_dict
)
include_keypoints
=
(
fields
.
InputDataFields
.
groundtruth_keypoints
include_keypoints
=
(
fields
.
InputDataFields
.
groundtruth_keypoints
in
tensor_dict
)
in
tensor_dict
)
include_label_scores
=
(
fields
.
InputDataFields
.
groundtruth_confidences
in
tensor_dict
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
data_augmentation_options
,
tensor_dict
,
data_augmentation_options
,
func_arg_map
=
preprocessor
.
get_default_func_arg_map
(
func_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_label_scores
=
include_label_scores
,
include_instance_masks
=
include_instance_masks
,
include_instance_masks
=
include_instance_masks
,
include_keypoints
=
include_keypoints
))
include_keypoints
=
include_keypoints
))
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
tf
.
squeeze
(
...
...
research/object_detection/inputs_test.py
View file @
e00e0e13
...
@@ -630,6 +630,9 @@ class DataTransformationFnTest(test_case.TestCase):
...
@@ -630,6 +630,9 @@ class DataTransformationFnTest(test_case.TestCase):
self
.
assertAllClose
(
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_confidences
],
transformed_inputs
[
fields
.
InputDataFields
.
groundtruth_confidences
],
[[
1
,
0
,
1
]])
[[
1
,
0
,
1
]])
self
.
assertAllClose
(
transformed_inputs
[
fields
.
InputDataFields
.
num_groundtruth_boxes
],
1
)
def
test_returns_resized_masks
(
self
):
def
test_returns_resized_masks
(
self
):
tensor_dict
=
{
tensor_dict
=
{
...
...
research/object_detection/legacy/trainer_test.py
View file @
e00e0e13
...
@@ -160,6 +160,17 @@ class FakeDetectionModel(model.DetectionModel):
...
@@ -160,6 +160,17 @@ class FakeDetectionModel(model.DetectionModel):
}
}
return
loss_dict
return
loss_dict
def
regularization_losses
(
self
):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
pass
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
):
"""Returns a map of variables to load from a foreign checkpoint.
"""Returns a map of variables to load from a foreign checkpoint.
...
@@ -174,6 +185,18 @@ class FakeDetectionModel(model.DetectionModel):
...
@@ -174,6 +185,18 @@ class FakeDetectionModel(model.DetectionModel):
"""
"""
return
{
var
.
op
.
name
:
var
for
var
in
tf
.
global_variables
()}
return
{
var
.
op
.
name
:
var
for
var
in
tf
.
global_variables
()}
def
updates
(
self
):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
pass
class
TrainerTest
(
tf
.
test
.
TestCase
):
class
TrainerTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
e00e0e13
...
@@ -662,7 +662,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -662,7 +662,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
anchors_boxlist
,
clip_window
)
anchors_boxlist
,
clip_window
)
else
:
else
:
anchors_boxlist
=
box_list_ops
.
clip_to_window
(
anchors_boxlist
=
box_list_ops
.
clip_to_window
(
anchors_boxlist
,
clip_window
)
anchors_boxlist
,
clip_window
,
filter_nonoverlapping
=
not
self
.
_use_static_shapes
)
self
.
_anchors
=
anchors_boxlist
self
.
_anchors
=
anchors_boxlist
prediction_dict
=
{
prediction_dict
=
{
...
@@ -917,12 +918,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -917,12 +918,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
_
,
num_classes
,
mask_height
,
mask_width
=
(
_
,
num_classes
,
mask_height
,
mask_width
=
(
detection_masks
.
get_shape
().
as_list
())
detection_masks
.
get_shape
().
as_list
())
_
,
max_detection
=
detection_classes
.
get_shape
().
as_list
()
_
,
max_detection
=
detection_classes
.
get_shape
().
as_list
()
prediction_dict
[
'mask_predictions'
]
=
tf
.
reshape
(
detection_masks
,
[
-
1
,
num_classes
,
mask_height
,
mask_width
])
if
num_classes
>
1
:
if
num_classes
>
1
:
detection_masks
=
self
.
_gather_instance_masks
(
detection_masks
=
self
.
_gather_instance_masks
(
detection_masks
,
detection_classes
)
detection_masks
,
detection_classes
)
prediction_dict
[
fields
.
DetectionResultFields
.
detection_masks
]
=
(
prediction_dict
[
fields
.
DetectionResultFields
.
detection_masks
]
=
(
tf
.
reshape
(
detection_masks
,
tf
.
reshape
(
tf
.
sigmoid
(
detection_masks
)
,
[
batch_size
,
max_detection
,
mask_height
,
mask_width
]))
[
batch_size
,
max_detection
,
mask_height
,
mask_width
]))
return
prediction_dict
return
prediction_dict
...
@@ -1159,9 +1162,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1159,9 +1162,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
}
}
# TODO(jrru): Remove mask_predictions from _post_process_box_classifier.
# TODO(jrru): Remove mask_predictions from _post_process_box_classifier.
with
tf
.
name_scope
(
'SecondStagePostprocessor'
):
if
(
self
.
_number_of_stages
==
2
or
if
(
self
.
_number_of_stages
==
2
or
(
self
.
_number_of_stages
==
3
and
self
.
_is_training
)):
(
self
.
_number_of_stages
==
3
and
self
.
_is_training
)
):
with
tf
.
name_scope
(
'SecondStagePostprocessor'
):
mask_predictions
=
prediction_dict
.
get
(
box_predictor
.
MASK_PREDICTIONS
)
mask_predictions
=
prediction_dict
.
get
(
box_predictor
.
MASK_PREDICTIONS
)
detections_dict
=
self
.
_postprocess_box_classifier
(
detections_dict
=
self
.
_postprocess_box_classifier
(
prediction_dict
[
'refined_box_encodings'
],
prediction_dict
[
'refined_box_encodings'
],
...
@@ -1170,18 +1173,53 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1170,18 +1173,53 @@ class FasterRCNNMetaArch(model.DetectionModel):
prediction_dict
[
'num_proposals'
],
prediction_dict
[
'num_proposals'
],
true_image_shapes
,
true_image_shapes
,
mask_predictions
=
mask_predictions
)
mask_predictions
=
mask_predictions
)
return
detections_dict
if
'rpn_features_to_crop'
in
prediction_dict
and
self
.
_initial_crop_size
:
self
.
_add_detection_features_output_node
(
detections_dict
[
fields
.
DetectionResultFields
.
detection_boxes
],
prediction_dict
[
'rpn_features_to_crop'
])
return
detections_dict
if
self
.
_number_of_stages
==
3
:
if
self
.
_number_of_stages
==
3
:
# Post processing is already performed in 3rd stage. We need to transfer
# Post processing is already performed in 3rd stage. We need to transfer
# postprocessed tensors from `prediction_dict` to `detections_dict`.
# postprocessed tensors from `prediction_dict` to `detections_dict`.
detections_dict
=
{}
return
prediction_dict
for
key
in
prediction_dict
:
if
key
==
fields
.
DetectionResultFields
.
detection_masks
:
def
_add_detection_features_output_node
(
self
,
detection_boxes
,
detections_dict
[
key
]
=
tf
.
sigmoid
(
prediction_dict
[
key
])
rpn_features_to_crop
):
elif
'detection'
in
key
:
"""Add the detection features to the output node.
detections_dict
[
key
]
=
prediction_dict
[
key
]
return
detections_dict
The detection features are from cropping rpn_features with boxes.
Each bounding box has one feature vector of length depth, which comes from
mean_pooling of the cropped rpn_features.
Args:
detection_boxes: a 3-D float32 tensor of shape
[batch_size, max_detection, 4] which represents the bounding boxes.
rpn_features_to_crop: A 4-D float32 tensor with shape
[batch, height, width, depth] representing image features to crop using
the proposals boxes.
"""
with
tf
.
name_scope
(
'SecondStageDetectionFeaturesExtract'
):
flattened_detected_feature_maps
=
(
self
.
_compute_second_stage_input_feature_maps
(
rpn_features_to_crop
,
detection_boxes
))
detection_features_unpooled
=
(
self
.
_feature_extractor
.
extract_box_classifier_features
(
flattened_detected_feature_maps
,
scope
=
self
.
second_stage_feature_extractor_scope
))
batch_size
=
tf
.
shape
(
detection_boxes
)[
0
]
max_detection
=
tf
.
shape
(
detection_boxes
)[
1
]
detection_features_pool
=
tf
.
reduce_mean
(
detection_features_unpooled
,
axis
=
[
1
,
2
])
detection_features
=
tf
.
reshape
(
detection_features_pool
,
[
batch_size
,
max_detection
,
tf
.
shape
(
detection_features_pool
)[
-
1
]])
detection_features
=
tf
.
identity
(
detection_features
,
'detection_features'
)
def
_postprocess_rpn
(
self
,
def
_postprocess_rpn
(
self
,
rpn_box_encodings_batch
,
rpn_box_encodings_batch
,
...
@@ -1454,6 +1492,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1454,6 +1492,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
# to cls_weights. This could happen as boxes within certain IOU ranges
# to cls_weights. This could happen as boxes within certain IOU ranges
# are ignored. If triggered, the selected boxes will still be ignored
# are ignored. If triggered, the selected boxes will still be ignored
# during loss computation.
# during loss computation.
cls_weights
=
tf
.
reduce_mean
(
cls_weights
,
axis
=-
1
)
positive_indicator
=
tf
.
greater
(
tf
.
argmax
(
cls_targets
,
axis
=
1
),
0
)
positive_indicator
=
tf
.
greater
(
tf
.
argmax
(
cls_targets
,
axis
=
1
),
0
)
valid_indicator
=
tf
.
logical_and
(
valid_indicator
=
tf
.
logical_and
(
tf
.
range
(
proposal_boxlist
.
num_boxes
())
<
num_valid_proposals
,
tf
.
range
(
proposal_boxlist
.
num_boxes
())
<
num_valid_proposals
,
...
@@ -1566,6 +1605,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1566,6 +1605,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
mask_predictions_batch
=
tf
.
reshape
(
mask_predictions_batch
=
tf
.
reshape
(
mask_predictions
,
[
-
1
,
self
.
max_num_proposals
,
mask_predictions
,
[
-
1
,
self
.
max_num_proposals
,
self
.
num_classes
,
mask_height
,
mask_width
])
self
.
num_classes
,
mask_height
,
mask_width
])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
_
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
_
,
num_detections
)
=
self
.
_second_stage_nms_fn
(
num_detections
)
=
self
.
_second_stage_nms_fn
(
refined_decoded_boxes_batch
,
refined_decoded_boxes_batch
,
...
@@ -1713,6 +1753,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1713,6 +1753,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
gt_box_batch
=
groundtruth_boxlists
,
gt_box_batch
=
groundtruth_boxlists
,
gt_class_targets_batch
=
(
len
(
groundtruth_boxlists
)
*
[
None
]),
gt_class_targets_batch
=
(
len
(
groundtruth_boxlists
)
*
[
None
]),
gt_weights_batch
=
groundtruth_weights_list
)
gt_weights_batch
=
groundtruth_weights_list
)
batch_cls_weights
=
tf
.
reduce_mean
(
batch_cls_weights
,
axis
=
2
)
batch_cls_targets
=
tf
.
squeeze
(
batch_cls_targets
,
axis
=
2
)
batch_cls_targets
=
tf
.
squeeze
(
batch_cls_targets
,
axis
=
2
)
def
_minibatch_subsample_fn
(
inputs
):
def
_minibatch_subsample_fn
(
inputs
):
...
@@ -1743,7 +1784,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1743,7 +1784,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
losses_mask
=
losses_mask
)
losses_mask
=
losses_mask
)
objectness_losses
=
self
.
_first_stage_objectness_loss
(
objectness_losses
=
self
.
_first_stage_objectness_loss
(
rpn_objectness_predictions_with_background
,
rpn_objectness_predictions_with_background
,
batch_one_hot_targets
,
weights
=
batch_sampled_indices
,
batch_one_hot_targets
,
weights
=
tf
.
expand_dims
(
batch_sampled_indices
,
axis
=-
1
),
losses_mask
=
losses_mask
)
losses_mask
=
losses_mask
)
localization_loss
=
tf
.
reduce_mean
(
localization_loss
=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
localization_losses
,
axis
=
1
)
/
normalizer
)
tf
.
reduce_sum
(
localization_losses
,
axis
=
1
)
/
normalizer
)
...
@@ -1960,25 +2002,28 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1960,25 +2002,28 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
tf
.
expand_dims
(
flat_normalized_proposals
,
axis
=
1
),
[
mask_height
,
mask_width
])
[
mask_height
,
mask_width
])
# Without stopping gradients into cropped groundtruth masks the
# performance with 100-padded groundtruth masks when batch size > 1 is
# about 4% worse.
# TODO(rathodv): Investigate this since we don't expect any variables
# upstream of flat_cropped_gt_mask.
flat_cropped_gt_mask
=
tf
.
stop_gradient
(
flat_cropped_gt_mask
)
batch_cropped_gt_mask
=
tf
.
reshape
(
batch_cropped_gt_mask
=
tf
.
reshape
(
flat_cropped_gt_mask
,
flat_cropped_gt_mask
,
[
batch_size
,
-
1
,
mask_height
*
mask_width
])
[
batch_size
,
-
1
,
mask_height
*
mask_width
])
second_stage_mask_losses
=
ops
.
reduce_sum_trailing_dimensions
(
mask_losses_weights
=
(
self
.
_second_stage_mask_loss
(
batch_mask_target_weights
*
tf
.
to_float
(
paddings_indicator
))
reshaped_prediction_masks
,
mask_losses
=
self
.
_second_stage_mask_loss
(
batch_cropped_gt_mask
,
reshaped_prediction_masks
,
weights
=
batch_mask_target_weights
,
batch_cropped_gt_mask
,
losses_mask
=
losses_mask
),
weights
=
tf
.
expand_dims
(
mask_losses_weights
,
axis
=-
1
),
ndims
=
2
)
/
(
losses_mask
=
losses_mask
)
mask_height
*
mask_width
*
tf
.
maximum
(
total_mask_loss
=
tf
.
reduce_sum
(
mask_losses
)
tf
.
reduce_sum
(
normalizer
=
tf
.
maximum
(
batch_mask_target_weights
,
axis
=
1
,
keep_dims
=
True
tf
.
reduce_sum
(
mask_losses_weights
*
mask_height
*
mask_width
),
1.0
)
),
tf
.
ones
((
batch_size
,
1
))))
second_stage_mask_loss
=
total_mask_loss
/
normalizer
second_stage_mask_loss
=
tf
.
reduce_sum
(
tf
.
where
(
paddings_indicator
,
second_stage_mask_losses
,
tf
.
zeros_like
(
second_stage_mask_losses
)))
if
second_stage_mask_loss
is
not
None
:
if
second_stage_mask_loss
is
not
None
:
mask_loss
=
tf
.
multiply
(
self
.
_second_stage_mask_loss_weight
,
mask_loss
=
tf
.
multiply
(
self
.
_second_stage_mask_loss_weight
,
...
@@ -2073,6 +2118,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -2073,6 +2118,17 @@ class FasterRCNNMetaArch(model.DetectionModel):
cls_losses
=
tf
.
expand_dims
(
single_image_cls_loss
,
0
),
cls_losses
=
tf
.
expand_dims
(
single_image_cls_loss
,
0
),
decoded_boxlist_list
=
[
proposal_boxlist
])
decoded_boxlist_list
=
[
proposal_boxlist
])
def
regularization_losses
(
self
):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
return
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
def
restore_map
(
self
,
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
,
fine_tune_checkpoint_type
=
'detection'
,
load_all_detection_checkpoint_vars
=
False
):
load_all_detection_checkpoint_vars
=
False
):
...
@@ -2117,3 +2173,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -2117,3 +2173,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
feature_extractor_variables
=
tf
.
contrib
.
framework
.
filter_variables
(
feature_extractor_variables
=
tf
.
contrib
.
framework
.
filter_variables
(
variables_to_restore
,
include_patterns
=
include_patterns
)
variables_to_restore
,
include_patterns
=
include_patterns
)
return
{
var
.
op
.
name
:
var
for
var
in
feature_extractor_variables
}
return
{
var
.
op
.
name
:
var
for
var
in
feature_extractor_variables
}
def
updates
(
self
):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
return
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
)
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
View file @
e00e0e13
...
@@ -189,7 +189,7 @@ class FasterRCNNMetaArchTest(
...
@@ -189,7 +189,7 @@ class FasterRCNNMetaArchTest(
set
(
expected_shapes
.
keys
()).
union
(
set
(
expected_shapes
.
keys
()).
union
(
set
([
set
([
'detection_boxes'
,
'detection_scores'
,
'detection_classes'
,
'detection_boxes'
,
'detection_scores'
,
'detection_classes'
,
'detection_masks'
,
'num_detections'
'detection_masks'
,
'num_detections'
,
'mask_predictions'
,
])))
])))
for
key
in
expected_shapes
:
for
key
in
expected_shapes
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
...
@@ -199,6 +199,9 @@ class FasterRCNNMetaArchTest(
...
@@ -199,6 +199,9 @@ class FasterRCNNMetaArchTest(
self
.
assertAllEqual
(
tensor_dict_out
[
'detection_classes'
].
shape
,
[
2
,
5
])
self
.
assertAllEqual
(
tensor_dict_out
[
'detection_classes'
].
shape
,
[
2
,
5
])
self
.
assertAllEqual
(
tensor_dict_out
[
'detection_scores'
].
shape
,
[
2
,
5
])
self
.
assertAllEqual
(
tensor_dict_out
[
'detection_scores'
].
shape
,
[
2
,
5
])
self
.
assertAllEqual
(
tensor_dict_out
[
'num_detections'
].
shape
,
[
2
])
self
.
assertAllEqual
(
tensor_dict_out
[
'num_detections'
].
shape
,
[
2
])
num_classes
=
1
if
masks_are_class_agnostic
else
2
self
.
assertAllEqual
(
tensor_dict_out
[
'mask_predictions'
].
shape
,
[
10
,
num_classes
,
14
,
14
])
@
parameterized
.
parameters
(
@
parameterized
.
parameters
(
{
'masks_are_class_agnostic'
:
False
},
{
'masks_are_class_agnostic'
:
False
},
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
e00e0e13
...
@@ -250,6 +250,7 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -250,6 +250,7 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
iou_threshold: 1.0
iou_threshold: 1.0
max_detections_per_class: 5
max_detections_per_class: 5
max_total_detections: 5
max_total_detections: 5
use_static_shapes: """
+
'{}'
.
format
(
use_static_shapes
)
+
"""
}
}
"""
"""
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
...
@@ -336,61 +337,71 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -336,61 +337,71 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
masks_are_class_agnostic
=
masks_are_class_agnostic
),
**
common_kwargs
)
masks_are_class_agnostic
=
masks_are_class_agnostic
),
**
common_kwargs
)
def
test_predict_gives_correct_shapes_in_inference_mode_first_stage_only
(
def
test_predict_gives_correct_shapes_in_inference_mode_first_stage_only
(
self
):
self
,
use_static_shapes
=
False
):
test_graph
=
tf
.
Graph
()
batch_size
=
2
with
test_graph
.
as_default
():
height
=
10
model
=
self
.
_build_model
(
width
=
12
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
2
)
input_image_shape
=
(
batch_size
,
height
,
width
,
3
)
batch_size
=
2
height
=
10
width
=
12
input_image_shape
=
(
batch_size
,
height
,
width
,
3
)
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
input_image_shape
))
def
graph_fn
(
images
):
preprocessed_inputs
=
tf
.
placeholder
(
"""Function to construct tf graph for the test."""
dtype
=
tf
.
float32
,
shape
=
(
batch_size
,
None
,
None
,
3
))
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
2
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
)
preprocessed_inputs
,
true_image_shapes
=
model
.
preprocess
(
images
)
prediction_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
prediction_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
return
(
prediction_dict
[
'rpn_box_predictor_features'
],
prediction_dict
[
'rpn_features_to_crop'
],
prediction_dict
[
'image_shape'
],
prediction_dict
[
'rpn_box_encodings'
],
prediction_dict
[
'rpn_objectness_predictions_with_background'
],
prediction_dict
[
'anchors'
])
images
=
np
.
zeros
(
input_image_shape
,
dtype
=
np
.
float32
)
# In inference mode, anchors are clipped to the image window, but not
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
# tensor with the same shape as its input, the expected number of anchors
# is height * width * the number of anchors per location (i.e. 3x3).
expected_num_anchors
=
height
*
width
*
3
*
3
expected_output_shapes
=
{
'rpn_box_predictor_features'
:
(
batch_size
,
height
,
width
,
512
),
'rpn_features_to_crop'
:
(
batch_size
,
height
,
width
,
3
),
'rpn_box_encodings'
:
(
batch_size
,
expected_num_anchors
,
4
),
'rpn_objectness_predictions_with_background'
:
(
batch_size
,
expected_num_anchors
,
2
),
'anchors'
:
(
expected_num_anchors
,
4
)
}
# In inference mode, anchors are clipped to the image window, but not
if
use_static_shapes
:
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
results
=
self
.
execute
(
graph_fn
,
[
images
])
# tensor with the same shape as its input, the expected number of anchors
else
:
# is height * width * the number of anchors per location (i.e. 3x3).
results
=
self
.
execute_cpu
(
graph_fn
,
[
images
])
expected_num_anchors
=
height
*
width
*
3
*
3
expected_output_keys
=
set
([
'rpn_box_predictor_features'
,
'rpn_features_to_crop'
,
'image_shape'
,
'rpn_box_encodings'
,
'rpn_objectness_predictions_with_background'
,
'anchors'
])
expected_output_shapes
=
{
'rpn_box_predictor_features'
:
(
batch_size
,
height
,
width
,
512
),
'rpn_features_to_crop'
:
(
batch_size
,
height
,
width
,
3
),
'rpn_box_encodings'
:
(
batch_size
,
expected_num_anchors
,
4
),
'rpn_objectness_predictions_with_background'
:
(
batch_size
,
expected_num_anchors
,
2
),
'anchors'
:
(
expected_num_anchors
,
4
)
}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
sess
.
run
(
init_op
)
prediction_out
=
sess
.
run
(
prediction_dict
,
feed_dict
=
{
preprocessed_inputs
:
np
.
zeros
(
input_image_shape
)
})
self
.
assertEqual
(
set
(
prediction_out
.
keys
()),
expected_output_keys
)
self
.
assertAllEqual
(
prediction_out
[
'image_shape'
],
input_image_shape
)
self
.
assertAllEqual
(
results
[
0
].
shape
,
for
output_key
,
expected_shape
in
expected_output_shapes
.
items
():
expected_output_shapes
[
'rpn_box_predictor_features'
])
self
.
assertAllEqual
(
prediction_out
[
output_key
].
shape
,
expected_shape
)
self
.
assertAllEqual
(
results
[
1
].
shape
,
expected_output_shapes
[
'rpn_features_to_crop'
])
self
.
assertAllEqual
(
results
[
2
],
input_image_shape
)
self
.
assertAllEqual
(
results
[
3
].
shape
,
expected_output_shapes
[
'rpn_box_encodings'
])
self
.
assertAllEqual
(
results
[
4
].
shape
,
expected_output_shapes
[
'rpn_objectness_predictions_with_background'
])
self
.
assertAllEqual
(
results
[
5
].
shape
,
expected_output_shapes
[
'anchors'
])
# Check that anchors are clipped to window.
# Check that anchors are clipped to window.
anchors
=
p
re
diction_out
[
'anchors'
]
anchors
=
re
sults
[
5
]
self
.
assertTrue
(
np
.
all
(
np
.
greater_equal
(
anchors
,
0
)))
self
.
assertTrue
(
np
.
all
(
np
.
greater_equal
(
anchors
,
0
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
0
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
0
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
1
],
width
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
1
],
width
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
2
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
2
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
3
],
width
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
3
],
width
)))
def
test_predict_gives_valid_anchors_in_training_mode_first_stage_only
(
self
):
def
test_predict_gives_valid_anchors_in_training_mode_first_stage_only
(
self
):
test_graph
=
tf
.
Graph
()
test_graph
=
tf
.
Graph
()
...
@@ -446,7 +457,38 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -446,7 +457,38 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
prediction_out
[
'rpn_objectness_predictions_with_background'
].
shape
,
prediction_out
[
'rpn_objectness_predictions_with_background'
].
shape
,
(
batch_size
,
num_anchors_out
,
2
))
(
batch_size
,
num_anchors_out
,
2
))
def
test_predict_correct_shapes_in_inference_mode_two_stages
(
self
):
def
test_predict_correct_shapes_in_inference_mode_two_stages
(
self
,
use_static_shapes
=
False
):
def
compare_results
(
results
,
expected_output_shapes
):
"""Checks if the shape of the predictions are as expected."""
self
.
assertAllEqual
(
results
[
0
].
shape
,
expected_output_shapes
[
'rpn_box_predictor_features'
])
self
.
assertAllEqual
(
results
[
1
].
shape
,
expected_output_shapes
[
'rpn_features_to_crop'
])
self
.
assertAllEqual
(
results
[
2
].
shape
,
expected_output_shapes
[
'image_shape'
])
self
.
assertAllEqual
(
results
[
3
].
shape
,
expected_output_shapes
[
'rpn_box_encodings'
])
self
.
assertAllEqual
(
results
[
4
].
shape
,
expected_output_shapes
[
'rpn_objectness_predictions_with_background'
])
self
.
assertAllEqual
(
results
[
5
].
shape
,
expected_output_shapes
[
'anchors'
])
self
.
assertAllEqual
(
results
[
6
].
shape
,
expected_output_shapes
[
'refined_box_encodings'
])
self
.
assertAllEqual
(
results
[
7
].
shape
,
expected_output_shapes
[
'class_predictions_with_background'
])
self
.
assertAllEqual
(
results
[
8
].
shape
,
expected_output_shapes
[
'num_proposals'
])
self
.
assertAllEqual
(
results
[
9
].
shape
,
expected_output_shapes
[
'proposal_boxes'
])
self
.
assertAllEqual
(
results
[
10
].
shape
,
expected_output_shapes
[
'proposal_boxes_normalized'
])
self
.
assertAllEqual
(
results
[
11
].
shape
,
expected_output_shapes
[
'box_classifier_features'
])
batch_size
=
2
batch_size
=
2
image_size
=
10
image_size
=
10
max_num_proposals
=
8
max_num_proposals
=
8
...
@@ -457,6 +499,32 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -457,6 +499,32 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
(
None
,
image_size
,
image_size
,
3
),
(
None
,
image_size
,
image_size
,
3
),
(
batch_size
,
None
,
None
,
3
),
(
batch_size
,
None
,
None
,
3
),
(
None
,
None
,
None
,
3
)]
(
None
,
None
,
None
,
3
)]
def
graph_fn_tpu
(
images
):
"""Function to construct tf graph for the test."""
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
2
,
second_stage_batch_size
=
2
,
predict_masks
=
False
,
use_matmul_crop_and_resize
=
use_static_shapes
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
)
preprocessed_inputs
,
true_image_shapes
=
model
.
preprocess
(
images
)
prediction_dict
=
model
.
predict
(
preprocessed_inputs
,
true_image_shapes
)
return
(
prediction_dict
[
'rpn_box_predictor_features'
],
prediction_dict
[
'rpn_features_to_crop'
],
prediction_dict
[
'image_shape'
],
prediction_dict
[
'rpn_box_encodings'
],
prediction_dict
[
'rpn_objectness_predictions_with_background'
],
prediction_dict
[
'anchors'
],
prediction_dict
[
'refined_box_encodings'
],
prediction_dict
[
'class_predictions_with_background'
],
prediction_dict
[
'num_proposals'
],
prediction_dict
[
'proposal_boxes'
],
prediction_dict
[
'proposal_boxes_normalized'
],
prediction_dict
[
'box_classifier_features'
])
expected_num_anchors
=
image_size
*
image_size
*
3
*
3
expected_num_anchors
=
image_size
*
image_size
*
3
*
3
expected_shapes
=
{
expected_shapes
=
{
'rpn_box_predictor_features'
:
'rpn_box_predictor_features'
:
...
@@ -481,28 +549,34 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -481,28 +549,34 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
3
)
3
)
}
}
for
input_shape
in
input_shapes
:
if
use_static_shapes
:
test_graph
=
tf
.
Graph
()
input_shape
=
(
batch_size
,
image_size
,
image_size
,
3
)
with
test_graph
.
as_default
():
images
=
np
.
zeros
(
input_shape
,
dtype
=
np
.
float32
)
model
=
self
.
_build_model
(
results
=
self
.
execute
(
graph_fn_tpu
,
[
images
])
is_training
=
False
,
compare_results
(
results
,
expected_shapes
)
number_of_stages
=
2
,
else
:
second_stage_batch_size
=
2
,
for
input_shape
in
input_shapes
:
predict_masks
=
False
)
test_graph
=
tf
.
Graph
()
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
input_shape
)
with
test_graph
.
as_default
():
_
,
true_image_shapes
=
model
.
preprocess
(
preprocessed_inputs
)
model
=
self
.
_build_model
(
result_tensor_dict
=
model
.
predict
(
is_training
=
False
,
preprocessed_inputs
,
true_image_shapes
)
number_of_stages
=
2
,
init_op
=
tf
.
global_variables_initializer
()
second_stage_batch_size
=
2
,
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
predict_masks
=
False
)
sess
.
run
(
init_op
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
input_shape
)
tensor_dict_out
=
sess
.
run
(
result_tensor_dict
,
feed_dict
=
{
_
,
true_image_shapes
=
model
.
preprocess
(
preprocessed_inputs
)
preprocessed_inputs
:
result_tensor_dict
=
model
.
predict
(
np
.
zeros
((
batch_size
,
image_size
,
image_size
,
3
))})
preprocessed_inputs
,
true_image_shapes
)
self
.
assertEqual
(
set
(
tensor_dict_out
.
keys
()),
init_op
=
tf
.
global_variables_initializer
()
set
(
expected_shapes
.
keys
()))
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
for
key
in
expected_shapes
:
sess
.
run
(
init_op
)
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
tensor_dict_out
=
sess
.
run
(
result_tensor_dict
,
feed_dict
=
{
preprocessed_inputs
:
np
.
zeros
((
batch_size
,
image_size
,
image_size
,
3
))})
self
.
assertEqual
(
set
(
tensor_dict_out
.
keys
()),
set
(
expected_shapes
.
keys
()))
for
key
in
expected_shapes
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
def
test_predict_gives_correct_shapes_in_train_mode_both_stages
(
def
test_predict_gives_correct_shapes_in_train_mode_both_stages
(
self
,
self
,
...
@@ -596,23 +670,46 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -596,23 +670,46 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
self
.
assertAllEqual
(
results
[
8
].
shape
,
self
.
assertAllEqual
(
results
[
8
].
shape
,
expected_shapes
[
'rpn_box_predictor_features'
])
expected_shapes
[
'rpn_box_predictor_features'
])
def
_test_postprocess_first_stage_only_inference_mode
(
def
test_postprocess_first_stage_only_inference_mode
(
self
,
pad_to_max_dimension
=
None
):
self
,
use_static_shapes
=
False
,
pad_to_max_dimension
=
None
):
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
6
,
pad_to_max_dimension
=
pad_to_max_dimension
)
batch_size
=
2
batch_size
=
2
anchors
=
tf
.
constant
(
first_stage_max_proposals
=
4
if
use_static_shapes
else
8
def
graph_fn
(
images
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
rpn_features_to_crop
,
anchors
):
"""Function to construct tf graph for the test."""
model
=
self
.
_build_model
(
is_training
=
False
,
number_of_stages
=
1
,
second_stage_batch_size
=
6
,
use_matmul_crop_and_resize
=
use_static_shapes
,
clip_anchors_to_image
=
use_static_shapes
,
use_static_shapes
=
use_static_shapes
,
use_matmul_gather_in_matcher
=
use_static_shapes
,
first_stage_max_proposals
=
first_stage_max_proposals
,
pad_to_max_dimension
=
pad_to_max_dimension
)
_
,
true_image_shapes
=
model
.
preprocess
(
images
)
proposals
=
model
.
postprocess
({
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
rpn_objectness_predictions_with_background
,
'rpn_features_to_crop'
:
rpn_features_to_crop
,
'anchors'
:
anchors
},
true_image_shapes
)
return
(
proposals
[
'num_detections'
],
proposals
[
'detection_boxes'
],
proposals
[
'detection_scores'
])
anchors
=
np
.
array
(
[[
0
,
0
,
16
,
16
],
[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
16
,
0
,
32
,
16
],
[
16
,
16
,
32
,
32
]],
dtype
=
tf
.
float32
)
[
16
,
16
,
32
,
32
]],
dtype
=
np
.
float32
)
rpn_box_encodings
=
tf
.
zeros
(
rpn_box_encodings
=
np
.
zeros
(
[
batch_size
,
anchors
.
get_shape
().
as_list
()[
0
],
(
batch_size
,
anchors
.
shape
[
0
],
BOX_CODE_SIZE
),
dtype
=
np
.
float32
)
BOX_CODE_SIZE
],
dtype
=
tf
.
float32
)
# use different numbers for the objectness category to break ties in
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
# order of boxes returned by NMS
rpn_objectness_predictions_with_background
=
tf
.
constant
([
rpn_objectness_predictions_with_background
=
np
.
array
([
[[
-
10
,
13
],
[[
-
10
,
13
],
[
10
,
-
10
],
[
10
,
-
10
],
[
10
,
-
11
],
[
10
,
-
11
],
...
@@ -620,16 +717,22 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -620,16 +717,22 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
[[
10
,
-
10
],
[[
10
,
-
10
],
[
-
10
,
13
],
[
-
10
,
13
],
[
-
10
,
12
],
[
-
10
,
12
],
[
10
,
-
11
]]],
dtype
=
tf
.
float32
)
[
10
,
-
11
]]],
dtype
=
np
.
float32
)
rpn_features_to_crop
=
tf
.
ones
((
batch_size
,
8
,
8
,
10
),
dtype
=
tf
.
float32
)
rpn_features_to_crop
=
np
.
ones
((
batch_size
,
8
,
8
,
10
),
dtype
=
np
.
float32
)
image_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
int32
)
image_shape
=
(
batch_size
,
32
,
32
,
3
)
_
,
true_image_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image_shape
))
images
=
np
.
zeros
(
image_shape
,
dtype
=
np
.
float32
)
proposals
=
model
.
postprocess
({
'rpn_box_encodings'
:
rpn_box_encodings
,
if
use_static_shapes
:
'rpn_objectness_predictions_with_background'
:
results
=
self
.
execute
(
graph_fn
,
rpn_objectness_predictions_with_background
,
[
images
,
rpn_box_encodings
,
'rpn_features_to_crop'
:
rpn_features_to_crop
,
rpn_objectness_predictions_with_background
,
'anchors'
:
anchors
},
true_image_shapes
)
rpn_features_to_crop
,
anchors
])
else
:
results
=
self
.
execute_cpu
(
graph_fn
,
[
images
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
rpn_features_to_crop
,
anchors
])
expected_proposal_boxes
=
[
expected_proposal_boxes
=
[
[[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
],
[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1.0
,
.
5
]]
[[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
],
[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1.0
,
.
5
]]
+
4
*
[
4
*
[
0
]],
+
4
*
[
4
*
[
0
]],
...
@@ -639,24 +742,12 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -639,24 +742,12 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
[
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
]]
[
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
]]
expected_num_proposals
=
[
4
,
4
]
expected_num_proposals
=
[
4
,
4
]
expected_output_keys
=
set
([
'detection_boxes'
,
'detection_scores'
,
self
.
assertAllClose
(
results
[
0
],
expected_num_proposals
)
'num_detections'
])
for
indx
,
num_proposals
in
enumerate
(
expected_num_proposals
):
self
.
assertEqual
(
set
(
proposals
.
keys
()),
expected_output_keys
)
self
.
assertAllClose
(
results
[
1
][
indx
][
0
:
num_proposals
],
with
self
.
test_session
()
as
sess
:
expected_proposal_boxes
[
indx
][
0
:
num_proposals
])
proposals_out
=
sess
.
run
(
proposals
)
self
.
assertAllClose
(
results
[
2
][
indx
][
0
:
num_proposals
],
self
.
assertAllClose
(
proposals_out
[
'detection_boxes'
],
expected_proposal_scores
[
indx
][
0
:
num_proposals
])
expected_proposal_boxes
)
self
.
assertAllClose
(
proposals_out
[
'detection_scores'
],
expected_proposal_scores
)
self
.
assertAllEqual
(
proposals_out
[
'num_detections'
],
expected_num_proposals
)
def
test_postprocess_first_stage_only_inference_mode
(
self
):
self
.
_test_postprocess_first_stage_only_inference_mode
()
def
test_postprocess_first_stage_only_inference_mode_padded_image
(
self
):
self
.
_test_postprocess_first_stage_only_inference_mode
(
pad_to_max_dimension
=
56
)
def
_test_postprocess_first_stage_only_train_mode
(
self
,
def
_test_postprocess_first_stage_only_train_mode
(
self
,
pad_to_max_dimension
=
None
):
pad_to_max_dimension
=
None
):
...
@@ -733,83 +824,80 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
...
@@ -733,83 +824,80 @@ class FasterRCNNMetaArchTestBase(test_case.TestCase, parameterized.TestCase):
def
test_postprocess_first_stage_only_train_mode_padded_image
(
self
):
def
test_postprocess_first_stage_only_train_mode_padded_image
(
self
):
self
.
_test_postprocess_first_stage_only_train_mode
(
pad_to_max_dimension
=
56
)
self
.
_test_postprocess_first_stage_only_train_mode
(
pad_to_max_dimension
=
56
)
def
_test_postprocess_second_stage_only_inference_mode
(
def
test_postprocess_second_stage_only_inference_mode
(
self
,
pad_to_max_dimension
=
None
):
self
,
use_static_shapes
=
False
,
pad_to_max_dimension
=
None
):
num_proposals_shapes
=
[(
2
),
(
None
,)]
refined_box_encodings_shapes
=
[(
16
,
2
,
4
),
(
None
,
2
,
4
)]
class_predictions_with_background_shapes
=
[(
16
,
3
),
(
None
,
3
)]
proposal_boxes_shapes
=
[(
2
,
8
,
4
),
(
None
,
8
,
4
)]
batch_size
=
2
batch_size
=
2
num_classes
=
2
image_shape
=
np
.
array
((
2
,
36
,
48
,
3
),
dtype
=
np
.
int32
)
image_shape
=
np
.
array
((
2
,
36
,
48
,
3
),
dtype
=
np
.
int32
)
f
or
(
num_proposals_shape
,
refined_box_encoding_shape
,
f
irst_stage_max_proposals
=
8
class_predictions_with_background_shape
,
total_num_padded_proposals
=
batch_size
*
first_stage_max_proposals
proposal_boxes_shape
)
in
zip
(
num_proposals_shapes
,
refined_box_encodings_shap
es
,
def
graph_fn
(
imag
es
,
class_predictions_with_background_shape
s
,
refined_box_encoding
s
,
proposal_boxes_shapes
):
class_predictions_with_background
,
tf_graph
=
tf
.
Graph
()
num_proposals
,
with
tf_graph
.
as_default
(
):
proposal_boxes
):
model
=
self
.
_build_model
(
"""Function to construct tf graph for the test."""
is_training
=
False
,
number_of_stages
=
2
,
model
=
self
.
_build_model
(
second_stage_batch_size
=
6
,
is_training
=
False
,
number_of_stages
=
2
,
pad_to_max_dimension
=
pad_to_max_dimension
)
second_stage_batch_size
=
6
,
_
,
tru
e_
i
ma
ge_shapes
=
model
.
preprocess
(
tf
.
zeros
(
image
_shape
))
us
e_ma
tmul_crop_and_resize
=
use_static
_shape
s
,
total_num_padded_proposals
=
batch_size
*
model
.
max_num_proposals
clip_anchors_to_image
=
use_static_shapes
,
proposal_boxes
=
np
.
array
(
use_static_shapes
=
use_static_shapes
,
[[[
1
,
1
,
2
,
3
]
,
use_matmul_gather_in_matcher
=
use_static_shapes
,
[
0
,
0
,
1
,
1
],
pad_to_max_dimension
=
pad_to_max_dimension
)
[.
5
,
.
5
,
.
6
,
.
6
],
_
,
true_image_shapes
=
model
.
preprocess
(
images
)
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]],
detections
=
model
.
postprocess
({
[[
2
,
3
,
6
,
8
]
,
'refined_box_encodings'
:
refined_box_encodings
,
[
1
,
2
,
5
,
3
],
'class_predictions_with_background'
:
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]]])
class_predictions_with_background
,
num_proposals
=
np
.
array
([
3
,
2
],
dtype
=
np
.
int32
)
'
num_proposals
'
:
num_proposals
,
refined_box_encodings
=
np
.
zeros
(
'proposal_boxes'
:
proposal_boxes
,
[
total_num_padded_proposals
,
model
.
num_classes
,
4
]
)
},
true_image_shapes
)
class_predictions_with_background
=
np
.
ones
(
return
(
detections
[
'num_detections'
],
[
total_num_padded_proposals
,
model
.
num_classes
+
1
])
detections
[
'detection_boxes'
],
detections
[
'detection_scores'
],
num_proposals_placeholder
=
tf
.
placeholder
(
tf
.
int32
,
detections
[
'detection_classes'
])
shape
=
num_proposals_shape
)
refined_box_encodings_placeholder
=
tf
.
placeholder
(
proposal_boxes
=
np
.
array
(
tf
.
float32
,
shape
=
refined_box_encoding_shape
)
[[[
1
,
1
,
2
,
3
],
class_predictions_with_background_placeholder
=
tf
.
placeholder
(
[
0
,
0
,
1
,
1
],
tf
.
float32
,
shape
=
class_predictions_with_background_shape
)
[.
5
,
.
5
,
.
6
,
.
6
],
proposal_boxes_placeholder
=
tf
.
placeholder
(
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]],
tf
.
float32
,
shape
=
proposal_boxes_shape
)
[[
2
,
3
,
6
,
8
],
image_shape_placeholder
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
4
))
[
1
,
2
,
5
,
3
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
],
4
*
[
0
]]],
dtype
=
np
.
float32
)
detections
=
model
.
postprocess
({
num_proposals
=
np
.
array
([
3
,
2
],
dtype
=
np
.
int32
)
'
refined_box_encodings
'
:
refined_box_encodings_placeholder
,
refined_box_encodings
=
np
.
zeros
(
'class_predictions_with_background'
:
[
total_num_padded_proposals
,
num_classes
,
4
],
dtype
=
np
.
float32
)
class_predictions_with_background
_placeholder
,
class_predictions_with_background
=
np
.
ones
(
'num
_proposals
'
:
num_
proposals_placeholder
,
[
total_num_padded
_proposals
,
num_
classes
+
1
],
dtype
=
np
.
float32
)
'proposal_boxes'
:
proposal_boxes_placeholder
,
images
=
np
.
zeros
(
image_shape
,
dtype
=
np
.
float32
)
},
true_image_shapes
)
with
self
.
test_session
(
graph
=
tf_graph
)
as
ses
s
:
if
use_static_shape
s
:
detections_out
=
sess
.
run
(
results
=
self
.
execute
(
graph_fn
,
detection
s
,
[
images
,
refined_box_encoding
s
,
feed_dict
=
{
class_predictions_with_background
,
refined_box_encodings_placeholder
:
refined_box_encodings
,
num_proposals
,
proposal_boxes
])
class_predictions_with_background_placeholder
:
else
:
class_predictions_with_background
,
results
=
self
.
execute_cpu
(
graph_fn
,
num_proposals_placeholder
:
num_proposal
s
,
[
images
,
refined_box_encoding
s
,
proposal_boxes_placeholder
:
proposal_boxes
,
class_predictions_with_background
,
image_shape_placeholder
:
image_shape
num_proposals
,
proposal_boxes
])
})
expected_num_detections
=
[
5
,
4
]
self
.
assertAllEqual
(
detections_out
[
'detection_boxes'
].
shape
,
[
2
,
5
,
4
])
expected_detection_classes
=
[[
0
,
0
,
0
,
1
,
1
],
[
0
,
0
,
1
,
1
,
0
]]
self
.
assertAllClose
(
detections_out
[
'detection_scores'
],
expected_detection_scores
=
[[
1
,
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
,
0
]]
[[
1
,
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
,
0
]])
self
.
assertAllClose
(
detections_out
[
'detection_classes'
],
self
.
assertAllClose
(
results
[
0
],
expected_num_detections
)
[[
0
,
0
,
0
,
1
,
1
],
[
0
,
0
,
1
,
1
,
0
]])
self
.
assertAllClose
(
detections_out
[
'
num_detections
'
],
[
5
,
4
]
)
for
indx
,
num_proposals
in
enumerate
(
expected_
num_detections
)
:
self
.
assertAllClose
(
results
[
2
][
indx
][
0
:
num_proposals
],
def
test_postprocess_sec
on
d
_s
tage_only_inference_mode
(
self
):
expected_detecti
on_s
cores
[
indx
][
0
:
num_proposals
])
self
.
_test_postprocess_second_stage_only_inference_mode
()
self
.
assertAllClose
(
results
[
3
][
indx
][
0
:
num_proposals
],
expected_detection_classes
[
indx
][
0
:
num_proposals
])
def
test_postprocess_second_stage_only_inference_mode_padded_image
(
self
):
self
.
_test_postprocess_second_stage_only_inference_mode
(
if
not
use_static_shapes
:
pad_to_max_dimension
=
56
)
self
.
assertAllEqual
(
results
[
1
].
shape
,
[
2
,
5
,
4
]
)
def
test_preprocess_preserves_input_shapes
(
self
):
def
test_preprocess_preserves_input_shapes
(
self
):
image_shapes
=
[(
3
,
None
,
None
,
3
),
image_shapes
=
[(
3
,
None
,
None
,
3
),
...
...
research/object_detection/meta_architectures/ssd_meta_arch.py
View file @
e00e0e13
...
@@ -19,7 +19,6 @@ models.
...
@@ -19,7 +19,6 @@ models.
"""
"""
from
abc
import
abstractmethod
from
abc
import
abstractmethod
import
re
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list
...
@@ -116,6 +115,25 @@ class SSDFeatureExtractor(object):
...
@@ -116,6 +115,25 @@ class SSDFeatureExtractor(object):
"""
"""
raise
NotImplementedError
raise
NotImplementedError
def
restore_from_classification_checkpoint_fn
(
self
,
feature_extractor_scope
):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
var_name
=
variable
.
op
.
name
if
var_name
.
startswith
(
feature_extractor_scope
+
'/'
):
var_name
=
var_name
.
replace
(
feature_extractor_scope
+
'/'
,
''
)
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
class
SSDKerasFeatureExtractor
(
tf
.
keras
.
Model
):
class
SSDKerasFeatureExtractor
(
tf
.
keras
.
Model
):
"""SSD Feature Extractor definition."""
"""SSD Feature Extractor definition."""
...
@@ -218,6 +236,25 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
...
@@ -218,6 +236,25 @@ class SSDKerasFeatureExtractor(tf.keras.Model):
def
call
(
self
,
inputs
,
**
kwargs
):
def
call
(
self
,
inputs
,
**
kwargs
):
return
self
.
_extract_features
(
inputs
)
return
self
.
_extract_features
(
inputs
)
def
restore_from_classification_checkpoint_fn
(
self
,
feature_extractor_scope
):
"""Returns a map of variables to load from a foreign checkpoint.
Args:
feature_extractor_scope: A scope name for the feature extractor.
Returns:
A dict mapping variable names (to load from a checkpoint) to variables in
the model graph.
"""
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
var_name
=
variable
.
op
.
name
if
var_name
.
startswith
(
feature_extractor_scope
+
'/'
):
var_name
=
var_name
.
replace
(
feature_extractor_scope
+
'/'
,
''
)
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
class
SSDMetaArch
(
model
.
DetectionModel
):
class
SSDMetaArch
(
model
.
DetectionModel
):
"""SSD Meta-architecture definition."""
"""SSD Meta-architecture definition."""
...
@@ -333,13 +370,15 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -333,13 +370,15 @@ class SSDMetaArch(model.DetectionModel):
# Slim feature extractors get an explicit naming scope
# Slim feature extractors get an explicit naming scope
self
.
_extract_features_scope
=
'FeatureExtractor'
self
.
_extract_features_scope
=
'FeatureExtractor'
# TODO(jonathanhuang): handle agnostic mode
if
self
.
_add_background_class
and
encode_background_as_zeros
:
# weights
self
.
_unmatched_class_label
=
tf
.
constant
([
1
]
+
self
.
num_classes
*
[
0
],
tf
.
float32
)
if
encode_background_as_zeros
:
self
.
_unmatched_class_label
=
tf
.
constant
((
self
.
num_classes
+
1
)
*
[
0
],
self
.
_unmatched_class_label
=
tf
.
constant
((
self
.
num_classes
+
1
)
*
[
0
],
tf
.
float32
)
tf
.
float32
)
elif
self
.
_add_background_class
:
self
.
_unmatched_class_label
=
tf
.
constant
([
1
]
+
self
.
num_classes
*
[
0
],
tf
.
float32
)
else
:
self
.
_unmatched_class_label
=
tf
.
constant
(
self
.
num_classes
*
[
0
],
tf
.
float32
)
self
.
_target_assigner
=
target_assigner_instance
self
.
_target_assigner
=
target_assigner_instance
...
@@ -606,14 +645,22 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -606,14 +645,22 @@ class SSDMetaArch(model.DetectionModel):
detection_boxes
=
tf
.
identity
(
detection_boxes
,
'raw_box_locations'
)
detection_boxes
=
tf
.
identity
(
detection_boxes
,
'raw_box_locations'
)
detection_boxes
=
tf
.
expand_dims
(
detection_boxes
,
axis
=
2
)
detection_boxes
=
tf
.
expand_dims
(
detection_boxes
,
axis
=
2
)
detection_scores_with_background
=
self
.
_score_conversion_fn
(
detection_scores
=
self
.
_score_conversion_fn
(
class_predictions
)
class_predictions
)
detection_scores
=
tf
.
identity
(
detection_scores
,
'raw_box_scores'
)
detection_scores_with_background
=
tf
.
identity
(
if
self
.
_add_background_class
:
detection_scores_with_background
,
'raw_box_scores'
)
detection_scores
=
tf
.
slice
(
detection_scores
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
detection_scores
=
tf
.
slice
(
detection_scores_with_background
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
additional_fields
=
None
additional_fields
=
None
batch_size
=
(
shape_utils
.
combined_static_and_dynamic_shape
(
preprocessed_images
)[
0
])
if
'feature_maps'
in
prediction_dict
:
feature_map_list
=
[]
for
feature_map
in
prediction_dict
[
'feature_maps'
]:
feature_map_list
.
append
(
tf
.
reshape
(
feature_map
,
[
batch_size
,
-
1
]))
box_features
=
tf
.
concat
(
feature_map_list
,
1
)
box_features
=
tf
.
identity
(
box_features
,
'raw_box_features'
)
if
detection_keypoints
is
not
None
:
if
detection_keypoints
is
not
None
:
additional_fields
=
{
additional_fields
=
{
fields
.
BoxListFields
.
keypoints
:
detection_keypoints
}
fields
.
BoxListFields
.
keypoints
:
detection_keypoints
}
...
@@ -683,17 +730,20 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -683,17 +730,20 @@ class SSDMetaArch(model.DetectionModel):
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
match_list
)
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
match_list
)
if
self
.
_random_example_sampler
:
if
self
.
_random_example_sampler
:
batch_cls_per_anchor_weights
=
tf
.
reduce_mean
(
batch_cls_weights
,
axis
=-
1
)
batch_sampled_indicator
=
tf
.
to_float
(
batch_sampled_indicator
=
tf
.
to_float
(
shape_utils
.
static_or_dynamic_map_fn
(
shape_utils
.
static_or_dynamic_map_fn
(
self
.
_minibatch_subsample_fn
,
self
.
_minibatch_subsample_fn
,
[
batch_cls_targets
,
batch_cls_weights
],
[
batch_cls_targets
,
batch_cls_
per_anchor_
weights
],
dtype
=
tf
.
bool
,
dtype
=
tf
.
bool
,
parallel_iterations
=
self
.
_parallel_iterations
,
parallel_iterations
=
self
.
_parallel_iterations
,
back_prop
=
True
))
back_prop
=
True
))
batch_reg_weights
=
tf
.
multiply
(
batch_sampled_indicator
,
batch_reg_weights
=
tf
.
multiply
(
batch_sampled_indicator
,
batch_reg_weights
)
batch_reg_weights
)
batch_cls_weights
=
tf
.
multiply
(
batch_sampled_indicator
,
batch_cls_weights
=
tf
.
multiply
(
batch_cls_weights
)
tf
.
expand_dims
(
batch_sampled_indicator
,
-
1
),
batch_cls_weights
)
losses_mask
=
None
losses_mask
=
None
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
if
self
.
groundtruth_has_field
(
fields
.
InputDataFields
.
is_annotated
):
...
@@ -713,16 +763,32 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -713,16 +763,32 @@ class SSDMetaArch(model.DetectionModel):
losses_mask
=
losses_mask
)
losses_mask
=
losses_mask
)
if
self
.
_expected_classification_loss_under_sampling
:
if
self
.
_expected_classification_loss_under_sampling
:
# Need to compute losses for assigned targets against the
# unmatched_class_label as well as their assigned targets.
# simplest thing (but wasteful) is just to calculate all losses
# twice
batch_size
,
num_anchors
,
num_classes
=
batch_cls_targets
.
get_shape
()
unmatched_targets
=
tf
.
ones
([
batch_size
,
num_anchors
,
1
])
*
self
.
_unmatched_class_label
unmatched_cls_losses
=
self
.
_classification_loss
(
prediction_dict
[
'class_predictions_with_background'
],
unmatched_targets
,
weights
=
batch_cls_weights
,
losses_mask
=
losses_mask
)
if
cls_losses
.
get_shape
().
ndims
==
3
:
if
cls_losses
.
get_shape
().
ndims
==
3
:
batch_size
,
num_anchors
,
num_classes
=
cls_losses
.
get_shape
()
batch_size
,
num_anchors
,
num_classes
=
cls_losses
.
get_shape
()
cls_losses
=
tf
.
reshape
(
cls_losses
,
[
batch_size
,
-
1
])
cls_losses
=
tf
.
reshape
(
cls_losses
,
[
batch_size
,
-
1
])
unmatched_cls_losses
=
tf
.
reshape
(
unmatched_cls_losses
,
[
batch_size
,
-
1
])
batch_cls_targets
=
tf
.
reshape
(
batch_cls_targets
=
tf
.
reshape
(
batch_cls_targets
,
[
batch_size
,
num_anchors
*
num_classes
,
-
1
])
batch_cls_targets
,
[
batch_size
,
num_anchors
*
num_classes
,
-
1
])
batch_cls_targets
=
tf
.
concat
(
batch_cls_targets
=
tf
.
concat
(
[
1
-
batch_cls_targets
,
batch_cls_targets
],
axis
=-
1
)
[
1
-
batch_cls_targets
,
batch_cls_targets
],
axis
=-
1
)
cls_losses
=
self
.
_expected_classification_loss_under_sampling
(
cls_losses
=
self
.
_expected_classification_loss_under_sampling
(
batch_cls_targets
,
cls_losses
)
batch_cls_targets
,
cls_losses
,
unmatched_
cls_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
...
@@ -971,6 +1037,26 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -971,6 +1037,26 @@ class SSDMetaArch(model.DetectionModel):
[
combined_shape
[
0
],
combined_shape
[
1
],
4
]))
[
combined_shape
[
0
],
combined_shape
[
1
],
4
]))
return
decoded_boxes
,
decoded_keypoints
return
decoded_boxes
,
decoded_keypoints
def
regularization_losses
(
self
):
"""Returns a list of regularization losses for this model.
Returns a list of regularization losses for this model that the estimator
needs to use during training/optimization.
Returns:
A list of regularization loss tensors.
"""
losses
=
[]
slim_losses
=
tf
.
get_collection
(
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
# Copy the slim losses to avoid modifying the collection
if
slim_losses
:
losses
.
extend
(
slim_losses
)
if
self
.
_box_predictor
.
is_keras_model
:
losses
.
extend
(
self
.
_box_predictor
.
losses
)
if
self
.
_feature_extractor
.
is_keras_model
:
losses
.
extend
(
self
.
_feature_extractor
.
losses
)
return
losses
def
restore_map
(
self
,
def
restore_map
(
self
,
fine_tune_checkpoint_type
=
'detection'
,
fine_tune_checkpoint_type
=
'detection'
,
load_all_detection_checkpoint_vars
=
False
):
load_all_detection_checkpoint_vars
=
False
):
...
@@ -997,18 +1083,44 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -997,18 +1083,44 @@ class SSDMetaArch(model.DetectionModel):
if
fine_tune_checkpoint_type
not
in
[
'detection'
,
'classification'
]:
if
fine_tune_checkpoint_type
not
in
[
'detection'
,
'classification'
]:
raise
ValueError
(
'Not supported fine_tune_checkpoint_type: {}'
.
format
(
raise
ValueError
(
'Not supported fine_tune_checkpoint_type: {}'
.
format
(
fine_tune_checkpoint_type
))
fine_tune_checkpoint_type
))
variables_to_restore
=
{}
for
variable
in
tf
.
global_variables
():
if
fine_tune_checkpoint_type
==
'classification'
:
var_name
=
variable
.
op
.
name
return
self
.
_feature_extractor
.
restore_from_classification_checkpoint_fn
(
if
(
fine_tune_checkpoint_type
==
'detection'
and
self
.
_extract_features_scope
)
load_all_detection_checkpoint_vars
):
variables_to_restore
[
var_name
]
=
variable
if
fine_tune_checkpoint_type
==
'detection'
:
else
:
variables_to_restore
=
{}
if
var_name
.
startswith
(
self
.
_extract_features_scope
):
for
variable
in
tf
.
global_variables
():
if
fine_tune_checkpoint_type
==
'classification'
:
var_name
=
variable
.
op
.
name
var_name
=
(
if
load_all_detection_checkpoint_vars
:
re
.
split
(
'^'
+
self
.
_extract_features_scope
+
'/'
,
var_name
)[
-
1
])
variables_to_restore
[
var_name
]
=
variable
variables_to_restore
[
var_name
]
=
variable
else
:
if
var_name
.
startswith
(
self
.
_extract_features_scope
):
variables_to_restore
[
var_name
]
=
variable
return
variables_to_restore
return
variables_to_restore
def
updates
(
self
):
"""Returns a list of update operators for this model.
Returns a list of update operators for this model that must be executed at
each training step. The estimator's train op needs to have a control
dependency on these updates.
Returns:
A list of update operators.
"""
update_ops
=
[]
slim_update_ops
=
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
)
# Copy the slim ops to avoid modifying the collection
if
slim_update_ops
:
update_ops
.
extend
(
slim_update_ops
)
if
self
.
_box_predictor
.
is_keras_model
:
update_ops
.
extend
(
self
.
_box_predictor
.
get_updates_for
(
None
))
update_ops
.
extend
(
self
.
_box_predictor
.
get_updates_for
(
self
.
_box_predictor
.
inputs
))
if
self
.
_feature_extractor
.
is_keras_model
:
update_ops
.
extend
(
self
.
_feature_extractor
.
get_updates_for
(
None
))
update_ops
.
extend
(
self
.
_feature_extractor
.
get_updates_for
(
self
.
_feature_extractor
.
inputs
))
return
update_ops
research/object_detection/meta_architectures/ssd_meta_arch_test.py
View file @
e00e0e13
...
@@ -42,7 +42,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
...
@@ -42,7 +42,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
random_example_sampling
=
False
,
random_example_sampling
=
False
,
weight_regression_loss_by_score
=
False
,
weight_regression_loss_by_score
=
False
,
use_expected_classification_loss_under_sampling
=
False
,
use_expected_classification_loss_under_sampling
=
False
,
min
im
um_negative_sampl
ing
=
1
,
min
_n
um_negative_sampl
es
=
1
,
desired_negative_sampling_ratio
=
3
,
desired_negative_sampling_ratio
=
3
,
use_keras
=
False
,
use_keras
=
False
,
predict_mask
=
False
,
predict_mask
=
False
,
...
@@ -57,7 +57,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
...
@@ -57,7 +57,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
weight_regression_loss_by_score
=
weight_regression_loss_by_score
,
weight_regression_loss_by_score
=
weight_regression_loss_by_score
,
use_expected_classification_loss_under_sampling
=
use_expected_classification_loss_under_sampling
=
use_expected_classification_loss_under_sampling
,
use_expected_classification_loss_under_sampling
,
min
im
um_negative_sampl
ing
=
min
im
um_negative_sampl
ing
,
min
_n
um_negative_sampl
es
=
min
_n
um_negative_sampl
es
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
,
use_keras
=
use_keras
,
use_keras
=
use_keras
,
predict_mask
=
predict_mask
,
predict_mask
=
predict_mask
,
...
@@ -344,11 +344,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
...
@@ -344,11 +344,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_boxes2
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_boxes2
=
np
.
array
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
0
,
1
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
0
,
1
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
expected_localization_loss
=
0.0
expected_localization_loss
=
0.0
expected_classification_loss
=
(
expected_classification_loss
=
(
batch_size
*
num_anchors
*
(
num_classes
+
1
)
*
np
.
log
(
2.0
))
batch_size
*
num_anchors
*
num_classes
*
np
.
log
(
2.0
))
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
graph_fn
,
[
graph_fn
,
[
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
...
@@ -371,7 +371,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
...
@@ -371,7 +371,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
apply_hard_mining
=
False
,
apply_hard_mining
=
False
,
add_background_class
=
True
,
add_background_class
=
True
,
use_expected_classification_loss_under_sampling
=
True
,
use_expected_classification_loss_under_sampling
=
True
,
min
im
um_negative_sampl
ing
=
1
,
min
_n
um_negative_sampl
es
=
1
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
)
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
)
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
)
groundtruth_classes_list
)
...
@@ -391,8 +391,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
...
@@ -391,8 +391,7 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
expected_localization_loss
=
0.0
expected_localization_loss
=
0.0
expected_classification_loss
=
(
expected_classification_loss
=
(
batch_size
*
(
desired_negative_sampling_ratio
*
num_anchors
+
batch_size
*
(
num_anchors
+
num_classes
*
num_anchors
)
*
np
.
log
(
2.0
))
num_classes
*
num_anchors
)
*
np
.
log
(
2.0
))
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
graph_fn
,
[
graph_fn
,
[
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
...
@@ -432,11 +431,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
...
@@ -432,11 +431,11 @@ class SsdMetaArchTest(ssd_meta_arch_test_lib.SSDMetaArchTestBase,
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
preprocessed_input
=
np
.
random
.
rand
(
batch_size
,
2
,
2
,
3
).
astype
(
np
.
float32
)
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
np
.
float32
)
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
np
.
float32
)
groundtruth_boxes2
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
np
.
float32
)
groundtruth_boxes2
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
0
,
1
]],
dtype
=
np
.
float32
)
groundtruth_classes1
=
np
.
array
([[
1
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
1
,
0
]],
dtype
=
np
.
float32
)
groundtruth_classes2
=
np
.
array
([[
0
]],
dtype
=
np
.
float32
)
expected_localization_loss
=
0.25
expected_localization_loss
=
0.25
expected_classification_loss
=
(
expected_classification_loss
=
(
batch_size
*
num_anchors
*
(
num_classes
+
1
)
*
np
.
log
(
2.0
))
batch_size
*
num_anchors
*
num_classes
*
np
.
log
(
2.0
))
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
(
localization_loss
,
classification_loss
)
=
self
.
execute
(
graph_fn
,
[
graph_fn
,
[
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
preprocessed_input
,
groundtruth_boxes1
,
groundtruth_boxes2
,
...
...
research/object_detection/meta_architectures/ssd_meta_arch_test_lib.py
View file @
e00e0e13
...
@@ -119,7 +119,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
...
@@ -119,7 +119,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
random_example_sampling
=
False
,
random_example_sampling
=
False
,
weight_regression_loss_by_score
=
False
,
weight_regression_loss_by_score
=
False
,
use_expected_classification_loss_under_sampling
=
False
,
use_expected_classification_loss_under_sampling
=
False
,
min
im
um_negative_sampl
ing
=
1
,
min
_n
um_negative_sampl
es
=
1
,
desired_negative_sampling_ratio
=
3
,
desired_negative_sampling_ratio
=
3
,
use_keras
=
False
,
use_keras
=
False
,
predict_mask
=
False
,
predict_mask
=
False
,
...
@@ -130,10 +130,12 @@ class SSDMetaArchTestBase(test_case.TestCase):
...
@@ -130,10 +130,12 @@ class SSDMetaArchTestBase(test_case.TestCase):
mock_anchor_generator
=
MockAnchorGenerator2x2
()
mock_anchor_generator
=
MockAnchorGenerator2x2
()
if
use_keras
:
if
use_keras
:
mock_box_predictor
=
test_utils
.
MockKerasBoxPredictor
(
mock_box_predictor
=
test_utils
.
MockKerasBoxPredictor
(
is_training
,
num_classes
,
predict_mask
=
predict_mask
)
is_training
,
num_classes
,
add_background_class
=
add_background_class
,
predict_mask
=
predict_mask
)
else
:
else
:
mock_box_predictor
=
test_utils
.
MockBoxPredictor
(
mock_box_predictor
=
test_utils
.
MockBoxPredictor
(
is_training
,
num_classes
,
predict_mask
=
predict_mask
)
is_training
,
num_classes
,
add_background_class
=
add_background_class
,
predict_mask
=
predict_mask
)
mock_box_coder
=
test_utils
.
MockBoxCoder
()
mock_box_coder
=
test_utils
.
MockBoxCoder
()
if
use_keras
:
if
use_keras
:
fake_feature_extractor
=
FakeSSDKerasFeatureExtractor
()
fake_feature_extractor
=
FakeSSDKerasFeatureExtractor
()
...
@@ -182,7 +184,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
...
@@ -182,7 +184,7 @@ class SSDMetaArchTestBase(test_case.TestCase):
if
use_expected_classification_loss_under_sampling
:
if
use_expected_classification_loss_under_sampling
:
expected_classification_loss_under_sampling
=
functools
.
partial
(
expected_classification_loss_under_sampling
=
functools
.
partial
(
ops
.
expected_classification_loss_under_sampling
,
ops
.
expected_classification_loss_under_sampling
,
min
im
um_negative_sampl
ing
=
min
im
um_negative_sampl
ing
,
min
_n
um_negative_sampl
es
=
min
_n
um_negative_sampl
es
,
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
)
desired_negative_sampling_ratio
=
desired_negative_sampling_ratio
)
code_size
=
4
code_size
=
4
...
...
research/object_detection/metrics/coco_evaluation.py
View file @
e00e0e13
...
@@ -248,27 +248,30 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -248,27 +248,30 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes_batched
,
detection_boxes_batched
,
detection_scores_batched
,
detection_scores_batched
,
detection_classes_batched
,
detection_classes_batched
,
num_det_boxes_per_image
):
num_det_boxes_per_image
,
is_annotated_batched
):
"""Update operation for adding batch of images to Coco evaluator."""
"""Update operation for adding batch of images to Coco evaluator."""
for
(
image_id
,
gt_box
,
gt_class
,
gt_is_crowd
,
num_gt_box
,
det_box
,
for
(
image_id
,
gt_box
,
gt_class
,
gt_is_crowd
,
num_gt_box
,
det_box
,
det_score
,
det_class
,
num_det_box
)
in
zip
(
det_score
,
det_class
,
num_det_box
,
is_annotated
)
in
zip
(
image_id_batched
,
groundtruth_boxes_batched
,
image_id_batched
,
groundtruth_boxes_batched
,
groundtruth_classes_batched
,
groundtruth_is_crowd_batched
,
groundtruth_classes_batched
,
groundtruth_is_crowd_batched
,
num_gt_boxes_per_image
,
num_gt_boxes_per_image
,
detection_boxes_batched
,
detection_scores_batched
,
detection_boxes_batched
,
detection_scores_batched
,
detection_classes_batched
,
num_det_boxes_per_image
):
detection_classes_batched
,
num_det_boxes_per_image
,
self
.
add_single_ground_truth_image_info
(
is_annotated_batched
):
image_id
,
{
if
is_annotated
:
'groundtruth_boxes'
:
gt_box
[:
num_gt_box
],
self
.
add_single_ground_truth_image_info
(
'groundtruth_classes'
:
gt_class
[:
num_gt_box
],
image_id
,
{
'groundtruth_is_crowd'
:
gt_is_crowd
[:
num_gt_box
]
'groundtruth_boxes'
:
gt_box
[:
num_gt_box
],
})
'groundtruth_classes'
:
gt_class
[:
num_gt_box
],
self
.
add_single_detected_image_info
(
'groundtruth_is_crowd'
:
gt_is_crowd
[:
num_gt_box
]
image_id
,
})
{
'detection_boxes'
:
det_box
[:
num_det_box
],
self
.
add_single_detected_image_info
(
'detection_scores'
:
det_score
[:
num_det_box
],
image_id
,
'detection_classes'
:
det_class
[:
num_det_box
]})
{
'detection_boxes'
:
det_box
[:
num_det_box
],
'detection_scores'
:
det_score
[:
num_det_box
],
'detection_classes'
:
det_class
[:
num_det_box
]})
# Unpack items from the evaluation dictionary.
# Unpack items from the evaluation dictionary.
input_data_fields
=
standard_fields
.
InputDataFields
input_data_fields
=
standard_fields
.
InputDataFields
...
@@ -284,6 +287,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -284,6 +287,7 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_gt_boxes_per_image
=
eval_dict
.
get
(
num_gt_boxes_per_image
=
eval_dict
.
get
(
'num_groundtruth_boxes_per_image'
,
None
)
'num_groundtruth_boxes_per_image'
,
None
)
num_det_boxes_per_image
=
eval_dict
.
get
(
'num_det_boxes_per_image'
,
None
)
num_det_boxes_per_image
=
eval_dict
.
get
(
'num_det_boxes_per_image'
,
None
)
is_annotated
=
eval_dict
.
get
(
'is_annotated'
,
None
)
if
groundtruth_is_crowd
is
None
:
if
groundtruth_is_crowd
is
None
:
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
...
@@ -306,6 +310,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -306,6 +310,11 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image
=
tf
.
shape
(
detection_boxes
)[
1
:
2
]
num_det_boxes_per_image
=
tf
.
shape
(
detection_boxes
)[
1
:
2
]
else
:
else
:
num_det_boxes_per_image
=
tf
.
expand_dims
(
num_det_boxes_per_image
,
0
)
num_det_boxes_per_image
=
tf
.
expand_dims
(
num_det_boxes_per_image
,
0
)
if
is_annotated
is
None
:
is_annotated
=
tf
.
constant
([
True
])
else
:
is_annotated
=
tf
.
expand_dims
(
is_annotated
,
0
)
else
:
else
:
if
num_gt_boxes_per_image
is
None
:
if
num_gt_boxes_per_image
is
None
:
num_gt_boxes_per_image
=
tf
.
tile
(
num_gt_boxes_per_image
=
tf
.
tile
(
...
@@ -315,6 +324,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -315,6 +324,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
num_det_boxes_per_image
=
tf
.
tile
(
num_det_boxes_per_image
=
tf
.
tile
(
tf
.
shape
(
detection_boxes
)[
1
:
2
],
tf
.
shape
(
detection_boxes
)[
1
:
2
],
multiples
=
tf
.
shape
(
detection_boxes
)[
0
:
1
])
multiples
=
tf
.
shape
(
detection_boxes
)[
0
:
1
])
if
is_annotated
is
None
:
is_annotated
=
tf
.
ones_like
(
image_id
,
dtype
=
tf
.
bool
)
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
groundtruth_boxes
,
groundtruth_boxes
,
...
@@ -324,7 +335,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -324,7 +335,8 @@ class CocoDetectionEvaluator(object_detection_evaluation.DetectionEvaluator):
detection_boxes
,
detection_boxes
,
detection_scores
,
detection_scores
,
detection_classes
,
detection_classes
,
num_det_boxes_per_image
],
[])
num_det_boxes_per_image
,
is_annotated
],
[])
metric_names
=
[
'DetectionBoxes_Precision/mAP'
,
metric_names
=
[
'DetectionBoxes_Precision/mAP'
,
'DetectionBoxes_Precision/mAP@.50IOU'
,
'DetectionBoxes_Precision/mAP@.50IOU'
,
'DetectionBoxes_Precision/mAP@.75IOU'
,
'DetectionBoxes_Precision/mAP@.75IOU'
,
...
@@ -581,8 +593,11 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -581,8 +593,11 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
Args:
Args:
eval_dict: A dictionary that holds tensors for evaluating object detection
eval_dict: A dictionary that holds tensors for evaluating object detection
performance. This dictionary may be produced from
performance. For single-image evaluation, this dictionary may be
eval_util.result_dict_for_single_example().
produced from eval_util.result_dict_for_single_example(). If multi-image
evaluation, `eval_dict` should contain the fields
'num_groundtruth_boxes_per_image' and 'num_det_boxes_per_image' to
properly unpad the tensors from the batch.
Returns:
Returns:
a dictionary of metric names to tuple of value_op and update_op that can
a dictionary of metric names to tuple of value_op and update_op that can
...
@@ -590,27 +605,41 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -590,27 +605,41 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
update ops must be run together and similarly all value ops must be run
update ops must be run together and similarly all value ops must be run
together to guarantee correct behaviour.
together to guarantee correct behaviour.
"""
"""
def
update_op
(
image_id
,
def
update_op
(
image_id_batched
,
groundtruth_boxes_batched
,
groundtruth_boxes
,
groundtruth_classes_batched
,
groundtruth_classes
,
groundtruth_instance_masks_batched
,
groundtruth_instance_masks
,
groundtruth_is_crowd_batched
,
num_gt_boxes_per_image
,
groundtruth_is_crowd
,
detection_scores_batched
,
detection_classes_batched
,
detection_scores
,
detection_masks_batched
,
num_det_boxes_per_image
):
detection_classes
,
detection_masks
):
"""Update op for metrics."""
"""Update op for metrics."""
self
.
add_single_ground_truth_image_info
(
image_id
,
for
(
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
{
'groundtruth_boxes'
:
groundtruth_boxes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
num_gt_box
,
'groundtruth_classes'
:
groundtruth_classes
,
detection_scores
,
detection_classes
,
'groundtruth_instance_masks'
:
groundtruth_instance_masks
,
detection_masks
,
num_det_box
)
in
zip
(
'groundtruth_is_crowd'
:
groundtruth_is_crowd
})
image_id_batched
,
groundtruth_boxes_batched
,
self
.
add_single_detected_image_info
(
groundtruth_classes_batched
,
groundtruth_instance_masks_batched
,
image_id
,
groundtruth_is_crowd_batched
,
num_gt_boxes_per_image
,
{
'detection_scores'
:
detection_scores
,
detection_scores_batched
,
detection_classes_batched
,
'detection_classes'
:
detection_classes
,
detection_masks_batched
,
num_det_boxes_per_image
):
'detection_masks'
:
detection_masks
})
self
.
add_single_ground_truth_image_info
(
image_id
,
{
'groundtruth_boxes'
:
groundtruth_boxes
[:
num_gt_box
],
'groundtruth_classes'
:
groundtruth_classes
[:
num_gt_box
],
'groundtruth_instance_masks'
:
groundtruth_instance_masks
[:
num_gt_box
],
'groundtruth_is_crowd'
:
groundtruth_is_crowd
[:
num_gt_box
]
})
self
.
add_single_detected_image_info
(
image_id
,
{
'detection_scores'
:
detection_scores
[:
num_det_box
],
'detection_classes'
:
detection_classes
[:
num_det_box
],
'detection_masks'
:
detection_masks
[:
num_det_box
]
})
# Unpack items from the evaluation dictionary.
# Unpack items from the evaluation dictionary.
input_data_fields
=
standard_fields
.
InputDataFields
input_data_fields
=
standard_fields
.
InputDataFields
...
@@ -622,20 +651,54 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
...
@@ -622,20 +651,54 @@ class CocoMaskEvaluator(object_detection_evaluation.DetectionEvaluator):
input_data_fields
.
groundtruth_instance_masks
]
input_data_fields
.
groundtruth_instance_masks
]
groundtruth_is_crowd
=
eval_dict
.
get
(
groundtruth_is_crowd
=
eval_dict
.
get
(
input_data_fields
.
groundtruth_is_crowd
,
None
)
input_data_fields
.
groundtruth_is_crowd
,
None
)
num_gt_boxes_per_image
=
eval_dict
.
get
(
input_data_fields
.
num_groundtruth_boxes
,
None
)
detection_scores
=
eval_dict
[
detection_fields
.
detection_scores
]
detection_scores
=
eval_dict
[
detection_fields
.
detection_scores
]
detection_classes
=
eval_dict
[
detection_fields
.
detection_classes
]
detection_classes
=
eval_dict
[
detection_fields
.
detection_classes
]
detection_masks
=
eval_dict
[
detection_fields
.
detection_masks
]
detection_masks
=
eval_dict
[
detection_fields
.
detection_masks
]
num_det_boxes_per_image
=
eval_dict
.
get
(
detection_fields
.
num_detections
,
None
)
if
groundtruth_is_crowd
is
None
:
if
groundtruth_is_crowd
is
None
:
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
groundtruth_is_crowd
=
tf
.
zeros_like
(
groundtruth_classes
,
dtype
=
tf
.
bool
)
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
groundtruth_boxes
,
if
not
image_id
.
shape
.
as_list
():
groundtruth_classes
,
# Apply a batch dimension to all tensors.
groundtruth_instance_masks
,
image_id
=
tf
.
expand_dims
(
image_id
,
0
)
groundtruth_is_crowd
,
groundtruth_boxes
=
tf
.
expand_dims
(
groundtruth_boxes
,
0
)
detection_scores
,
groundtruth_classes
=
tf
.
expand_dims
(
groundtruth_classes
,
0
)
detection_classes
,
groundtruth_instance_masks
=
tf
.
expand_dims
(
groundtruth_instance_masks
,
0
)
detection_masks
],
[])
groundtruth_is_crowd
=
tf
.
expand_dims
(
groundtruth_is_crowd
,
0
)
detection_scores
=
tf
.
expand_dims
(
detection_scores
,
0
)
detection_classes
=
tf
.
expand_dims
(
detection_classes
,
0
)
detection_masks
=
tf
.
expand_dims
(
detection_masks
,
0
)
if
num_gt_boxes_per_image
is
None
:
num_gt_boxes_per_image
=
tf
.
shape
(
groundtruth_boxes
)[
1
:
2
]
else
:
num_gt_boxes_per_image
=
tf
.
expand_dims
(
num_gt_boxes_per_image
,
0
)
if
num_det_boxes_per_image
is
None
:
num_det_boxes_per_image
=
tf
.
shape
(
detection_scores
)[
1
:
2
]
else
:
num_det_boxes_per_image
=
tf
.
expand_dims
(
num_det_boxes_per_image
,
0
)
else
:
if
num_gt_boxes_per_image
is
None
:
num_gt_boxes_per_image
=
tf
.
tile
(
tf
.
shape
(
groundtruth_boxes
)[
1
:
2
],
multiples
=
tf
.
shape
(
groundtruth_boxes
)[
0
:
1
])
if
num_det_boxes_per_image
is
None
:
num_det_boxes_per_image
=
tf
.
tile
(
tf
.
shape
(
detection_scores
)[
1
:
2
],
multiples
=
tf
.
shape
(
detection_scores
)[
0
:
1
])
update_op
=
tf
.
py_func
(
update_op
,
[
image_id
,
groundtruth_boxes
,
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_is_crowd
,
num_gt_boxes_per_image
,
detection_scores
,
detection_classes
,
detection_masks
,
num_det_boxes_per_image
],
[])
metric_names
=
[
'DetectionMasks_Precision/mAP'
,
metric_names
=
[
'DetectionMasks_Precision/mAP'
,
'DetectionMasks_Precision/mAP@.50IOU'
,
'DetectionMasks_Precision/mAP@.50IOU'
,
'DetectionMasks_Precision/mAP@.75IOU'
,
'DetectionMasks_Precision/mAP@.75IOU'
,
...
...
research/object_detection/metrics/coco_evaluation_test.py
View file @
e00e0e13
...
@@ -308,6 +308,99 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -308,6 +308,99 @@ class CocoEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsIsAnnotated
(
self
):
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
_get_categories_list
())
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
())
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
is_annotated
=
tf
.
placeholder
(
tf
.
bool
,
shape
=
())
detection_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
detection_scores
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
))
input_data_fields
=
standard_fields
.
InputDataFields
detection_fields
=
standard_fields
.
DetectionResultFields
eval_dict
=
{
input_data_fields
.
key
:
image_id
,
input_data_fields
.
groundtruth_boxes
:
groundtruth_boxes
,
input_data_fields
.
groundtruth_classes
:
groundtruth_classes
,
'is_annotated'
:
is_annotated
,
detection_fields
.
detection_boxes
:
detection_boxes
,
detection_fields
.
detection_scores
:
detection_scores
,
detection_fields
.
detection_classes
:
detection_classes
}
eval_metric_ops
=
coco_evaluator
.
get_estimator_eval_metric_ops
(
eval_dict
)
_
,
update_op
=
eval_metric_ops
[
'DetectionBoxes_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image1'
,
groundtruth_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
]]),
groundtruth_classes
:
np
.
array
([
1
]),
is_annotated
:
True
,
detection_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
]]),
detection_scores
:
np
.
array
([.
8
]),
detection_classes
:
np
.
array
([
1
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image2'
,
groundtruth_boxes
:
np
.
array
([[
50.
,
50.
,
100.
,
100.
]]),
groundtruth_classes
:
np
.
array
([
3
]),
is_annotated
:
True
,
detection_boxes
:
np
.
array
([[
50.
,
50.
,
100.
,
100.
]]),
detection_scores
:
np
.
array
([.
7
]),
detection_classes
:
np
.
array
([
3
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image3'
,
groundtruth_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
]]),
groundtruth_classes
:
np
.
array
([
2
]),
is_annotated
:
True
,
detection_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
]]),
detection_scores
:
np
.
array
([.
9
]),
detection_classes
:
np
.
array
([
2
])
})
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
'image4'
,
groundtruth_boxes
:
np
.
zeros
((
0
,
4
)),
groundtruth_classes
:
np
.
zeros
((
0
)),
is_annotated
:
False
,
# Note that this image isn't annotated.
detection_boxes
:
np
.
array
([[
25.
,
25.
,
50.
,
50.
],
[
25.
,
25.
,
70.
,
50.
],
[
25.
,
25.
,
80.
,
50.
],
[
25.
,
25.
,
90.
,
50.
]]),
detection_scores
:
np
.
array
([
0.6
,
0.7
,
0.8
,
0.9
]),
detection_classes
:
np
.
array
([
1
,
2
,
2
,
3
])
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.50IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (medium)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (medium)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionBoxes_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_boxes_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded
(
self
):
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsPadded
(
self
):
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
coco_evaluator
=
coco_evaluation
.
CocoDetectionEvaluator
(
_get_categories_list
())
_get_categories_list
())
...
@@ -665,22 +758,40 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -665,22 +758,40 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
_
,
update_op
=
eval_metric_ops
[
'DetectionMasks_Precision/mAP'
]
_
,
update_op
=
eval_metric_ops
[
'DetectionMasks_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
sess
.
run
(
feed_dict
=
{
update_op
,
image_id
:
'image1'
,
feed_dict
=
{
groundtruth_boxes
:
np
.
array
([[
100.
,
100.
,
200.
,
200.
]]),
image_id
:
groundtruth_classes
:
np
.
array
([
1
]),
'image1'
,
groundtruth_masks
:
np
.
pad
(
np
.
ones
([
1
,
100
,
100
],
groundtruth_boxes
:
dtype
=
np
.
uint8
),
np
.
array
([[
100.
,
100.
,
200.
,
200.
],
[
50.
,
50.
,
100.
,
100.
]]),
((
0
,
0
),
(
10
,
10
),
(
10
,
10
)),
groundtruth_classes
:
mode
=
'constant'
),
np
.
array
([
1
,
2
]),
detection_scores
:
np
.
array
([.
8
]),
groundtruth_masks
:
detection_classes
:
np
.
array
([
1
]),
np
.
stack
([
detection_masks
:
np
.
pad
(
np
.
ones
([
1
,
100
,
100
],
np
.
pad
(
dtype
=
np
.
uint8
),
np
.
ones
([
100
,
100
],
dtype
=
np
.
uint8
),
((
10
,
10
),
((
0
,
0
),
(
10
,
10
),
(
10
,
10
)),
(
10
,
10
)),
mode
=
'constant'
)
mode
=
'constant'
),
})
np
.
pad
(
np
.
ones
([
50
,
50
],
dtype
=
np
.
uint8
),
((
0
,
70
),
(
0
,
70
)),
mode
=
'constant'
)
]),
detection_scores
:
np
.
array
([.
9
,
.
8
]),
detection_classes
:
np
.
array
([
2
,
1
]),
detection_masks
:
np
.
stack
([
np
.
pad
(
np
.
ones
([
50
,
50
],
dtype
=
np
.
uint8
),
((
0
,
70
),
(
0
,
70
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
100
,
100
],
dtype
=
np
.
uint8
),
((
10
,
10
),
(
10
,
10
)),
mode
=
'constant'
),
])
})
sess
.
run
(
update_op
,
sess
.
run
(
update_op
,
feed_dict
=
{
feed_dict
=
{
image_id
:
'image2'
,
image_id
:
'image2'
,
...
@@ -735,6 +846,106 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
...
@@ -735,6 +846,106 @@ class CocoMaskEvaluationPyFuncTest(tf.test.TestCase):
self
.
assertFalse
(
coco_evaluator
.
_image_id_to_mask_shape_map
)
self
.
assertFalse
(
coco_evaluator
.
_image_id_to_mask_shape_map
)
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
def
testGetOneMAPWithMatchingGroundtruthAndDetectionsBatched
(
self
):
coco_evaluator
=
coco_evaluation
.
CocoMaskEvaluator
(
_get_categories_list
())
batch_size
=
3
image_id
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
batch_size
))
groundtruth_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
,
4
))
groundtruth_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
groundtruth_masks
=
tf
.
placeholder
(
tf
.
uint8
,
shape
=
(
batch_size
,
None
,
None
,
None
))
detection_scores
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
detection_classes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
batch_size
,
None
))
detection_masks
=
tf
.
placeholder
(
tf
.
uint8
,
shape
=
(
batch_size
,
None
,
None
,
None
))
input_data_fields
=
standard_fields
.
InputDataFields
detection_fields
=
standard_fields
.
DetectionResultFields
eval_dict
=
{
input_data_fields
.
key
:
image_id
,
input_data_fields
.
groundtruth_boxes
:
groundtruth_boxes
,
input_data_fields
.
groundtruth_classes
:
groundtruth_classes
,
input_data_fields
.
groundtruth_instance_masks
:
groundtruth_masks
,
detection_fields
.
detection_scores
:
detection_scores
,
detection_fields
.
detection_classes
:
detection_classes
,
detection_fields
.
detection_masks
:
detection_masks
,
}
eval_metric_ops
=
coco_evaluator
.
get_estimator_eval_metric_ops
(
eval_dict
)
_
,
update_op
=
eval_metric_ops
[
'DetectionMasks_Precision/mAP'
]
with
self
.
test_session
()
as
sess
:
sess
.
run
(
update_op
,
feed_dict
=
{
image_id
:
[
'image1'
,
'image2'
,
'image3'
],
groundtruth_boxes
:
np
.
array
([[[
100.
,
100.
,
200.
,
200.
]],
[[
50.
,
50.
,
100.
,
100.
]],
[[
25.
,
25.
,
50.
,
50.
]]]),
groundtruth_classes
:
np
.
array
([[
1
],
[
1
],
[
1
]]),
groundtruth_masks
:
np
.
stack
([
np
.
pad
(
np
.
ones
([
1
,
100
,
100
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
0
,
0
),
(
0
,
0
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
1
,
50
,
50
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
25
,
25
),
(
25
,
25
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
1
,
25
,
25
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
37
,
38
),
(
37
,
38
)),
mode
=
'constant'
)
],
axis
=
0
),
detection_scores
:
np
.
array
([[.
8
],
[.
8
],
[.
8
]]),
detection_classes
:
np
.
array
([[
1
],
[
1
],
[
1
]]),
detection_masks
:
np
.
stack
([
np
.
pad
(
np
.
ones
([
1
,
100
,
100
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
0
,
0
),
(
0
,
0
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
1
,
50
,
50
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
25
,
25
),
(
25
,
25
)),
mode
=
'constant'
),
np
.
pad
(
np
.
ones
([
1
,
25
,
25
],
dtype
=
np
.
uint8
),
((
0
,
0
),
(
37
,
38
),
(
37
,
38
)),
mode
=
'constant'
)
],
axis
=
0
)
})
metrics
=
{}
for
key
,
(
value_op
,
_
)
in
eval_metric_ops
.
iteritems
():
metrics
[
key
]
=
value_op
metrics
=
sess
.
run
(
metrics
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP@.50IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP@.75IOU'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP (medium)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Precision/mAP (small)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@1'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@10'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@100'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@100 (large)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@100 (medium)'
],
1.0
)
self
.
assertAlmostEqual
(
metrics
[
'DetectionMasks_Recall/AR@100 (small)'
],
1.0
)
self
.
assertFalse
(
coco_evaluator
.
_groundtruth_list
)
self
.
assertFalse
(
coco_evaluator
.
_image_ids_with_detections
)
self
.
assertFalse
(
coco_evaluator
.
_image_id_to_mask_shape_map
)
self
.
assertFalse
(
coco_evaluator
.
_detection_masks_list
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/model_lib.py
View file @
e00e0e13
...
@@ -25,6 +25,7 @@ import os
...
@@ -25,6 +25,7 @@ import os
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection
import
eval_util
from
object_detection
import
eval_util
from
object_detection
import
exporter
as
exporter_lib
from
object_detection
import
inputs
from
object_detection
import
inputs
from
object_detection.builders
import
graph_rewriter_builder
from
object_detection.builders
import
graph_rewriter_builder
from
object_detection.builders
import
model_builder
from
object_detection.builders
import
model_builder
...
@@ -306,8 +307,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -306,8 +307,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
prediction_dict
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
prediction_dict
,
features
[
fields
.
InputDataFields
.
true_image_shape
])
losses
=
[
loss_tensor
for
loss_tensor
in
losses_dict
.
values
()]
losses
=
[
loss_tensor
for
loss_tensor
in
losses_dict
.
values
()]
if
train_config
.
add_regularization_loss
:
if
train_config
.
add_regularization_loss
:
regularization_losses
=
tf
.
get_collection
(
regularization_losses
=
detection_model
.
regularization_losses
()
tf
.
GraphKeys
.
REGULARIZATION_LOSSES
)
if
regularization_losses
:
if
regularization_losses
:
regularization_loss
=
tf
.
add_n
(
regularization_loss
=
tf
.
add_n
(
regularization_losses
,
name
=
'regularization_loss'
)
regularization_losses
,
name
=
'regularization_loss'
)
...
@@ -353,20 +353,24 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -353,20 +353,24 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
for
var
in
optimizer_summary_vars
:
for
var
in
optimizer_summary_vars
:
tf
.
summary
.
scalar
(
var
.
op
.
name
,
var
)
tf
.
summary
.
scalar
(
var
.
op
.
name
,
var
)
summaries
=
[]
if
use_tpu
else
None
summaries
=
[]
if
use_tpu
else
None
if
train_config
.
summarize_gradients
:
summaries
=
[
'gradients'
,
'gradient_norm'
,
'global_gradient_norm'
]
train_op
=
tf
.
contrib
.
layers
.
optimize_loss
(
train_op
=
tf
.
contrib
.
layers
.
optimize_loss
(
loss
=
total_loss
,
loss
=
total_loss
,
global_step
=
global_step
,
global_step
=
global_step
,
learning_rate
=
None
,
learning_rate
=
None
,
clip_gradients
=
clip_gradients_value
,
clip_gradients
=
clip_gradients_value
,
optimizer
=
training_optimizer
,
optimizer
=
training_optimizer
,
update_ops
=
detection_model
.
updates
(),
variables
=
trainable_variables
,
variables
=
trainable_variables
,
summaries
=
summaries
,
summaries
=
summaries
,
name
=
''
)
# Preventing scope prefix on all variables.
name
=
''
)
# Preventing scope prefix on all variables.
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
if
mode
==
tf
.
estimator
.
ModeKeys
.
PREDICT
:
exported_output
=
exporter_lib
.
add_output_tensor_nodes
(
detections
)
export_outputs
=
{
export_outputs
=
{
tf
.
saved_model
.
signature_constants
.
PREDICT_METHOD_NAME
:
tf
.
saved_model
.
signature_constants
.
PREDICT_METHOD_NAME
:
tf
.
estimator
.
export
.
PredictOutput
(
detections
)
tf
.
estimator
.
export
.
PredictOutput
(
exported_output
)
}
}
eval_metric_ops
=
None
eval_metric_ops
=
None
...
@@ -456,6 +460,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
...
@@ -456,6 +460,7 @@ def create_model_fn(detection_model_fn, configs, hparams, use_tpu=False):
def
create_estimator_and_inputs
(
run_config
,
def
create_estimator_and_inputs
(
run_config
,
hparams
,
hparams
,
pipeline_config_path
,
pipeline_config_path
,
config_override
=
None
,
train_steps
=
None
,
train_steps
=
None
,
sample_1_of_n_eval_examples
=
1
,
sample_1_of_n_eval_examples
=
1
,
sample_1_of_n_eval_on_train_examples
=
1
,
sample_1_of_n_eval_on_train_examples
=
1
,
...
@@ -465,6 +470,7 @@ def create_estimator_and_inputs(run_config,
...
@@ -465,6 +470,7 @@ def create_estimator_and_inputs(run_config,
num_shards
=
1
,
num_shards
=
1
,
params
=
None
,
params
=
None
,
override_eval_num_epochs
=
True
,
override_eval_num_epochs
=
True
,
save_final_config
=
False
,
**
kwargs
):
**
kwargs
):
"""Creates `Estimator`, input functions, and steps.
"""Creates `Estimator`, input functions, and steps.
...
@@ -472,6 +478,8 @@ def create_estimator_and_inputs(run_config,
...
@@ -472,6 +478,8 @@ def create_estimator_and_inputs(run_config,
run_config: A `RunConfig`.
run_config: A `RunConfig`.
hparams: A `HParams`.
hparams: A `HParams`.
pipeline_config_path: A path to a pipeline config file.
pipeline_config_path: A path to a pipeline config file.
config_override: A pipeline_pb2.TrainEvalPipelineConfig text proto to
override the config from `pipeline_config_path`.
train_steps: Number of training steps. If None, the number of training steps
train_steps: Number of training steps. If None, the number of training steps
is set from the `TrainConfig` proto.
is set from the `TrainConfig` proto.
sample_1_of_n_eval_examples: Integer representing how often an eval example
sample_1_of_n_eval_examples: Integer representing how often an eval example
...
@@ -499,6 +507,8 @@ def create_estimator_and_inputs(run_config,
...
@@ -499,6 +507,8 @@ def create_estimator_and_inputs(run_config,
`use_tpu_estimator` is True.
`use_tpu_estimator` is True.
override_eval_num_epochs: Whether to overwrite the number of epochs to
override_eval_num_epochs: Whether to overwrite the number of epochs to
1 for eval_input.
1 for eval_input.
save_final_config: Whether to save final config (obtained after applying
overrides) to `estimator.model_dir`.
**kwargs: Additional keyword arguments for configuration override.
**kwargs: Additional keyword arguments for configuration override.
Returns:
Returns:
...
@@ -522,7 +532,8 @@ def create_estimator_and_inputs(run_config,
...
@@ -522,7 +532,8 @@ def create_estimator_and_inputs(run_config,
create_eval_input_fn
=
MODEL_BUILD_UTIL_MAP
[
'create_eval_input_fn'
]
create_eval_input_fn
=
MODEL_BUILD_UTIL_MAP
[
'create_eval_input_fn'
]
create_predict_input_fn
=
MODEL_BUILD_UTIL_MAP
[
'create_predict_input_fn'
]
create_predict_input_fn
=
MODEL_BUILD_UTIL_MAP
[
'create_predict_input_fn'
]
configs
=
get_configs_from_pipeline_file
(
pipeline_config_path
)
configs
=
get_configs_from_pipeline_file
(
pipeline_config_path
,
config_override
=
config_override
)
kwargs
.
update
({
kwargs
.
update
({
'train_steps'
:
train_steps
,
'train_steps'
:
train_steps
,
'sample_1_of_n_eval_examples'
:
sample_1_of_n_eval_examples
'sample_1_of_n_eval_examples'
:
sample_1_of_n_eval_examples
...
@@ -595,7 +606,7 @@ def create_estimator_and_inputs(run_config,
...
@@ -595,7 +606,7 @@ def create_estimator_and_inputs(run_config,
estimator
=
tf
.
estimator
.
Estimator
(
model_fn
=
model_fn
,
config
=
run_config
)
estimator
=
tf
.
estimator
.
Estimator
(
model_fn
=
model_fn
,
config
=
run_config
)
# Write the as-run pipeline config to disk.
# Write the as-run pipeline config to disk.
if
run_config
.
is_chief
:
if
run_config
.
is_chief
and
save_final_config
:
pipeline_config_final
=
create_pipeline_proto_from_configs
(
configs
)
pipeline_config_final
=
create_pipeline_proto_from_configs
(
configs
)
config_util
.
save_pipeline_config
(
pipeline_config_final
,
estimator
.
model_dir
)
config_util
.
save_pipeline_config
(
pipeline_config_final
,
estimator
.
model_dir
)
...
@@ -641,11 +652,17 @@ def create_train_and_eval_specs(train_input_fn,
...
@@ -641,11 +652,17 @@ def create_train_and_eval_specs(train_input_fn,
input_fn
=
train_input_fn
,
max_steps
=
train_steps
)
input_fn
=
train_input_fn
,
max_steps
=
train_steps
)
if
eval_spec_names
is
None
:
if
eval_spec_names
is
None
:
eval_spec_names
=
[
str
(
i
)
for
i
in
range
(
len
(
eval_input_fns
))
]
eval_spec_names
=
[
str
(
i
)
for
i
in
range
(
len
(
eval_input_fns
))]
eval_specs
=
[]
eval_specs
=
[]
for
eval_spec_name
,
eval_input_fn
in
zip
(
eval_spec_names
,
eval_input_fns
):
for
index
,
(
eval_spec_name
,
eval_input_fn
)
in
enumerate
(
exporter_name
=
'{}_{}'
.
format
(
final_exporter_name
,
eval_spec_name
)
zip
(
eval_spec_names
,
eval_input_fns
)):
# Uses final_exporter_name as exporter_name for the first eval spec for
# backward compatibility.
if
index
==
0
:
exporter_name
=
final_exporter_name
else
:
exporter_name
=
'{}_{}'
.
format
(
final_exporter_name
,
eval_spec_name
)
exporter
=
tf
.
estimator
.
FinalExporter
(
exporter
=
tf
.
estimator
.
FinalExporter
(
name
=
exporter_name
,
serving_input_receiver_fn
=
predict_input_fn
)
name
=
exporter_name
,
serving_input_receiver_fn
=
predict_input_fn
)
eval_specs
.
append
(
eval_specs
.
append
(
...
@@ -747,6 +764,7 @@ def populate_experiment(run_config,
...
@@ -747,6 +764,7 @@ def populate_experiment(run_config,
train_steps
=
train_steps
,
train_steps
=
train_steps
,
eval_steps
=
eval_steps
,
eval_steps
=
eval_steps
,
model_fn_creator
=
model_fn_creator
,
model_fn_creator
=
model_fn_creator
,
save_final_config
=
True
,
**
kwargs
)
**
kwargs
)
estimator
=
train_and_eval_dict
[
'estimator'
]
estimator
=
train_and_eval_dict
[
'estimator'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
...
...
research/object_detection/model_lib_test.py
View file @
e00e0e13
...
@@ -310,7 +310,7 @@ class ModelLibTest(tf.test.TestCase):
...
@@ -310,7 +310,7 @@ class ModelLibTest(tf.test.TestCase):
self
.
assertEqual
(
2
,
len
(
eval_specs
))
self
.
assertEqual
(
2
,
len
(
eval_specs
))
self
.
assertEqual
(
None
,
eval_specs
[
0
].
steps
)
self
.
assertEqual
(
None
,
eval_specs
[
0
].
steps
)
self
.
assertEqual
(
'holdout'
,
eval_specs
[
0
].
name
)
self
.
assertEqual
(
'holdout'
,
eval_specs
[
0
].
name
)
self
.
assertEqual
(
'exporter
_holdout
'
,
eval_specs
[
0
].
exporters
[
0
].
name
)
self
.
assertEqual
(
'exporter'
,
eval_specs
[
0
].
exporters
[
0
].
name
)
self
.
assertEqual
(
None
,
eval_specs
[
1
].
steps
)
self
.
assertEqual
(
None
,
eval_specs
[
1
].
steps
)
self
.
assertEqual
(
'eval_on_train'
,
eval_specs
[
1
].
name
)
self
.
assertEqual
(
'eval_on_train'
,
eval_specs
[
1
].
name
)
...
...
research/object_detection/model_tpu_main.py
View file @
e00e0e13
...
@@ -114,6 +114,7 @@ def main(unused_argv):
...
@@ -114,6 +114,7 @@ def main(unused_argv):
use_tpu_estimator
=
True
,
use_tpu_estimator
=
True
,
use_tpu
=
FLAGS
.
use_tpu
,
use_tpu
=
FLAGS
.
use_tpu
,
num_shards
=
FLAGS
.
num_shards
,
num_shards
=
FLAGS
.
num_shards
,
save_final_config
=
FLAGS
.
mode
==
'train'
,
**
kwargs
)
**
kwargs
)
estimator
=
train_and_eval_dict
[
'estimator'
]
estimator
=
train_and_eval_dict
[
'estimator'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
train_input_fn
=
train_and_eval_dict
[
'train_input_fn'
]
...
...
research/object_detection/models/faster_rcnn_resnet_v1_feature_extractor.py
View file @
e00e0e13
...
@@ -72,6 +72,8 @@ class FasterRCNNResnetV1FeatureExtractor(
...
@@ -72,6 +72,8 @@ class FasterRCNNResnetV1FeatureExtractor(
VGG style channel mean subtraction as described here:
VGG style channel mean subtraction as described here:
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
Note that if the number of channels is not equal to 3, the mean subtraction
will be skipped and the original resized_inputs will be returned.
Args:
Args:
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor
...
@@ -82,8 +84,11 @@ class FasterRCNNResnetV1FeatureExtractor(
...
@@ -82,8 +84,11 @@ class FasterRCNNResnetV1FeatureExtractor(
tensor representing a batch of images.
tensor representing a batch of images.
"""
"""
channel_means
=
[
123.68
,
116.779
,
103.939
]
if
resized_inputs
.
shape
.
as_list
()[
3
]
==
3
:
return
resized_inputs
-
[[
channel_means
]]
channel_means
=
[
123.68
,
116.779
,
103.939
]
return
resized_inputs
-
[[
channel_means
]]
else
:
return
resized_inputs
def
_extract_proposal_features
(
self
,
preprocessed_inputs
,
scope
):
def
_extract_proposal_features
(
self
,
preprocessed_inputs
,
scope
):
"""Extracts first stage RPN features.
"""Extracts first stage RPN features.
...
...
research/object_detection/models/feature_map_generators.py
View file @
e00e0e13
...
@@ -146,7 +146,6 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
...
@@ -146,7 +146,6 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
use_depthwise
=
feature_map_layout
[
'use_depthwise'
]
use_depthwise
=
feature_map_layout
[
'use_depthwise'
]
for
index
,
from_layer
in
enumerate
(
feature_map_layout
[
'from_layer'
]):
for
index
,
from_layer
in
enumerate
(
feature_map_layout
[
'from_layer'
]):
net
=
[]
net
=
[]
self
.
convolutions
.
append
(
net
)
layer_depth
=
feature_map_layout
[
'layer_depth'
][
index
]
layer_depth
=
feature_map_layout
[
'layer_depth'
][
index
]
conv_kernel_size
=
3
conv_kernel_size
=
3
if
'conv_kernel_size'
in
feature_map_layout
:
if
'conv_kernel_size'
in
feature_map_layout
:
...
@@ -231,6 +230,10 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
...
@@ -231,6 +230,10 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
conv_hyperparams
.
build_activation_layer
(
conv_hyperparams
.
build_activation_layer
(
name
=
layer_name
))
name
=
layer_name
))
# Until certain bugs are fixed in checkpointable lists,
# this net must be appended only once it's been filled with layers
self
.
convolutions
.
append
(
net
)
def
call
(
self
,
image_features
):
def
call
(
self
,
image_features
):
"""Generate the multi-resolution feature maps.
"""Generate the multi-resolution feature maps.
...
@@ -263,7 +266,8 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
...
@@ -263,7 +266,8 @@ class KerasMultiResolutionFeatureMaps(tf.keras.Model):
def
multi_resolution_feature_maps
(
feature_map_layout
,
depth_multiplier
,
def
multi_resolution_feature_maps
(
feature_map_layout
,
depth_multiplier
,
min_depth
,
insert_1x1_conv
,
image_features
):
min_depth
,
insert_1x1_conv
,
image_features
,
pool_residual
=
False
):
"""Generates multi resolution feature maps from input image features.
"""Generates multi resolution feature maps from input image features.
Generates multi-scale feature maps for detection as in the SSD papers by
Generates multi-scale feature maps for detection as in the SSD papers by
...
@@ -317,6 +321,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -317,6 +321,13 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
should be inserted before shrinking the feature map.
should be inserted before shrinking the feature map.
image_features: A dictionary of handles to activation tensors from the
image_features: A dictionary of handles to activation tensors from the
base feature extractor.
base feature extractor.
pool_residual: Whether to add an average pooling layer followed by a
residual connection between subsequent feature maps when the channel
depth match. For example, with option 'layer_depth': [-1, 512, 256, 256],
a pooling and residual layer is added between the third and forth feature
map. This option is better used with Weight Shared Convolution Box
Predictor when all feature maps have the same channel depth to encourage
more consistent features across multi-scale feature maps.
Returns:
Returns:
feature_maps: an OrderedDict mapping keys (feature map names) to
feature_maps: an OrderedDict mapping keys (feature map names) to
...
@@ -350,6 +361,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -350,6 +361,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
feature_map_keys
.
append
(
from_layer
)
feature_map_keys
.
append
(
from_layer
)
else
:
else
:
pre_layer
=
feature_maps
[
-
1
]
pre_layer
=
feature_maps
[
-
1
]
pre_layer_depth
=
pre_layer
.
get_shape
().
as_list
()[
3
]
intermediate_layer
=
pre_layer
intermediate_layer
=
pre_layer
if
insert_1x1_conv
:
if
insert_1x1_conv
:
layer_name
=
'{}_1_Conv2d_{}_1x1_{}'
.
format
(
layer_name
=
'{}_1_Conv2d_{}_1x1_{}'
.
format
(
...
@@ -383,6 +395,12 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -383,6 +395,12 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
padding
=
'SAME'
,
padding
=
'SAME'
,
stride
=
1
,
stride
=
1
,
scope
=
layer_name
)
scope
=
layer_name
)
if
pool_residual
and
pre_layer_depth
==
depth_fn
(
layer_depth
):
feature_map
+=
slim
.
avg_pool2d
(
pre_layer
,
[
3
,
3
],
padding
=
'SAME'
,
stride
=
2
,
scope
=
layer_name
+
'_pool'
)
else
:
else
:
feature_map
=
slim
.
conv2d
(
feature_map
=
slim
.
conv2d
(
intermediate_layer
,
intermediate_layer
,
...
@@ -399,6 +417,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
...
@@ -399,6 +417,7 @@ def multi_resolution_feature_maps(feature_map_layout, depth_multiplier,
def
fpn_top_down_feature_maps
(
image_features
,
def
fpn_top_down_feature_maps
(
image_features
,
depth
,
depth
,
use_depthwise
=
False
,
use_depthwise
=
False
,
use_explicit_padding
=
False
,
scope
=
None
):
scope
=
None
):
"""Generates `top-down` feature maps for Feature Pyramid Networks.
"""Generates `top-down` feature maps for Feature Pyramid Networks.
...
@@ -409,7 +428,9 @@ def fpn_top_down_feature_maps(image_features,
...
@@ -409,7 +428,9 @@ def fpn_top_down_feature_maps(image_features,
Spatial resolutions of succesive tensors must reduce exactly by a factor
Spatial resolutions of succesive tensors must reduce exactly by a factor
of 2.
of 2.
depth: depth of output feature maps.
depth: depth of output feature maps.
use_depthwise: use depthwise separable conv instead of regular conv.
use_depthwise: whether to use depthwise separable conv instead of regular
conv.
use_explicit_padding: whether to use explicit padding.
scope: A scope name to wrap this op under.
scope: A scope name to wrap this op under.
Returns:
Returns:
...
@@ -420,8 +441,10 @@ def fpn_top_down_feature_maps(image_features,
...
@@ -420,8 +441,10 @@ def fpn_top_down_feature_maps(image_features,
num_levels
=
len
(
image_features
)
num_levels
=
len
(
image_features
)
output_feature_maps_list
=
[]
output_feature_maps_list
=
[]
output_feature_map_keys
=
[]
output_feature_map_keys
=
[]
padding
=
'VALID'
if
use_explicit_padding
else
'SAME'
kernel_size
=
3
with
slim
.
arg_scope
(
with
slim
.
arg_scope
(
[
slim
.
conv2d
,
slim
.
separable_conv2d
],
padding
=
'SAME'
,
stride
=
1
):
[
slim
.
conv2d
,
slim
.
separable_conv2d
],
padding
=
padding
,
stride
=
1
):
top_down
=
slim
.
conv2d
(
top_down
=
slim
.
conv2d
(
image_features
[
-
1
][
1
],
image_features
[
-
1
][
1
],
depth
,
[
1
,
1
],
activation_fn
=
None
,
normalizer_fn
=
None
,
depth
,
[
1
,
1
],
activation_fn
=
None
,
normalizer_fn
=
None
,
...
@@ -436,14 +459,20 @@ def fpn_top_down_feature_maps(image_features,
...
@@ -436,14 +459,20 @@ def fpn_top_down_feature_maps(image_features,
image_features
[
level
][
1
],
depth
,
[
1
,
1
],
image_features
[
level
][
1
],
depth
,
[
1
,
1
],
activation_fn
=
None
,
normalizer_fn
=
None
,
activation_fn
=
None
,
normalizer_fn
=
None
,
scope
=
'projection_%d'
%
(
level
+
1
))
scope
=
'projection_%d'
%
(
level
+
1
))
if
use_explicit_padding
:
# slice top_down to the same shape as residual
residual_shape
=
tf
.
shape
(
residual
)
top_down
=
top_down
[:,
:
residual_shape
[
1
],
:
residual_shape
[
2
],
:]
top_down
+=
residual
top_down
+=
residual
if
use_depthwise
:
if
use_depthwise
:
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
else
:
conv_op
=
slim
.
conv2d
conv_op
=
slim
.
conv2d
if
use_explicit_padding
:
top_down
=
ops
.
fixed_padding
(
top_down
,
kernel_size
)
output_feature_maps_list
.
append
(
conv_op
(
output_feature_maps_list
.
append
(
conv_op
(
top_down
,
top_down
,
depth
,
[
3
,
3
],
depth
,
[
kernel_size
,
kernel_size
],
scope
=
'smoothing_%d'
%
(
level
+
1
)))
scope
=
'smoothing_%d'
%
(
level
+
1
)))
output_feature_map_keys
.
append
(
'top_down_%s'
%
image_features
[
level
][
0
])
output_feature_map_keys
.
append
(
'top_down_%s'
%
image_features
[
level
][
0
])
return
collections
.
OrderedDict
(
reversed
(
return
collections
.
OrderedDict
(
reversed
(
...
...
research/object_detection/models/feature_map_generators_test.py
View file @
e00e0e13
...
@@ -45,6 +45,11 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
...
@@ -45,6 +45,11 @@ EMBEDDED_SSD_MOBILENET_V1_LAYOUT = {
'conv_kernel_size'
:
[
-
1
,
-
1
,
3
,
3
,
2
],
'conv_kernel_size'
:
[
-
1
,
-
1
,
3
,
3
,
2
],
}
}
SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT
=
{
'from_layer'
:
[
'Conv2d_13_pointwise'
,
''
,
''
,
''
],
'layer_depth'
:
[
-
1
,
256
,
256
,
256
],
}
@
parameterized
.
parameters
(
@
parameterized
.
parameters
(
{
'use_keras'
:
False
},
{
'use_keras'
:
False
},
...
@@ -67,7 +72,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -67,7 +72,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
return
hyperparams_builder
.
KerasLayerHyperparams
(
conv_hyperparams
)
def
_build_feature_map_generator
(
self
,
feature_map_layout
,
use_keras
):
def
_build_feature_map_generator
(
self
,
feature_map_layout
,
use_keras
,
pool_residual
=
False
):
if
use_keras
:
if
use_keras
:
return
feature_map_generators
.
KerasMultiResolutionFeatureMaps
(
return
feature_map_generators
.
KerasMultiResolutionFeatureMaps
(
feature_map_layout
=
feature_map_layout
,
feature_map_layout
=
feature_map_layout
,
...
@@ -86,7 +92,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -86,7 +92,8 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
depth_multiplier
=
1
,
depth_multiplier
=
1
,
min_depth
=
32
,
min_depth
=
32
,
insert_1x1_conv
=
True
,
insert_1x1_conv
=
True
,
image_features
=
image_features
)
image_features
=
image_features
,
pool_residual
=
pool_residual
)
return
feature_map_generator
return
feature_map_generator
def
test_get_expected_feature_map_shapes_with_inception_v2
(
self
,
use_keras
):
def
test_get_expected_feature_map_shapes_with_inception_v2
(
self
,
use_keras
):
...
@@ -209,6 +216,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
...
@@ -209,6 +216,34 @@ class MultiResolutionFeatureMapGeneratorTest(tf.test.TestCase):
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
def
test_feature_map_shapes_with_pool_residual_ssd_mobilenet_v1
(
self
,
use_keras
):
image_features
=
{
'Conv2d_13_pointwise'
:
tf
.
random_uniform
([
4
,
8
,
8
,
1024
],
dtype
=
tf
.
float32
),
}
feature_map_generator
=
self
.
_build_feature_map_generator
(
feature_map_layout
=
SSD_MOBILENET_V1_WEIGHT_SHARED_LAYOUT
,
use_keras
=
use_keras
,
pool_residual
=
True
)
feature_maps
=
feature_map_generator
(
image_features
)
expected_feature_map_shapes
=
{
'Conv2d_13_pointwise'
:
(
4
,
8
,
8
,
1024
),
'Conv2d_13_pointwise_2_Conv2d_1_3x3_s2_256'
:
(
4
,
4
,
4
,
256
),
'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_256'
:
(
4
,
2
,
2
,
256
),
'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256'
:
(
4
,
1
,
1
,
256
)}
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
out_feature_maps
=
sess
.
run
(
feature_maps
)
out_feature_map_shapes
=
dict
(
(
key
,
value
.
shape
)
for
key
,
value
in
out_feature_maps
.
items
())
self
.
assertDictEqual
(
expected_feature_map_shapes
,
out_feature_map_shapes
)
def
test_get_expected_variable_names_with_inception_v2
(
self
,
use_keras
):
def
test_get_expected_variable_names_with_inception_v2
(
self
,
use_keras
):
image_features
=
{
image_features
=
{
'Mixed_3c'
:
tf
.
random_uniform
([
4
,
28
,
28
,
256
],
dtype
=
tf
.
float32
),
'Mixed_3c'
:
tf
.
random_uniform
([
4
,
28
,
28
,
256
],
dtype
=
tf
.
float32
),
...
...
research/object_detection/models/keras_applications/mobilenet_v2.py
View file @
e00e0e13
...
@@ -82,6 +82,8 @@ class _LayersOverride(object):
...
@@ -82,6 +82,8 @@ class _LayersOverride(object):
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_use_explicit_padding
=
use_explicit_padding
self
.
_use_explicit_padding
=
use_explicit_padding
self
.
_min_depth
=
min_depth
self
.
_min_depth
=
min_depth
self
.
regularizer
=
tf
.
keras
.
regularizers
.
l2
(
0.00004
*
0.5
)
self
.
initializer
=
tf
.
truncated_normal_initializer
(
stddev
=
0.09
)
def
_FixedPaddingLayer
(
self
,
kernel_size
):
def
_FixedPaddingLayer
(
self
,
kernel_size
):
return
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
ops
.
fixed_padding
(
x
,
kernel_size
))
return
tf
.
keras
.
layers
.
Lambda
(
lambda
x
:
ops
.
fixed_padding
(
x
,
kernel_size
))
...
@@ -114,6 +116,9 @@ class _LayersOverride(object):
...
@@ -114,6 +116,9 @@ class _LayersOverride(object):
if
self
.
_conv_hyperparams
:
if
self
.
_conv_hyperparams
:
kwargs
=
self
.
_conv_hyperparams
.
params
(
**
kwargs
)
kwargs
=
self
.
_conv_hyperparams
.
params
(
**
kwargs
)
else
:
kwargs
[
'kernel_regularizer'
]
=
self
.
regularizer
kwargs
[
'kernel_initializer'
]
=
self
.
initializer
kwargs
[
'padding'
]
=
'same'
kwargs
[
'padding'
]
=
'same'
kernel_size
=
kwargs
.
get
(
'kernel_size'
)
kernel_size
=
kwargs
.
get
(
'kernel_size'
)
...
@@ -144,6 +149,8 @@ class _LayersOverride(object):
...
@@ -144,6 +149,8 @@ class _LayersOverride(object):
"""
"""
if
self
.
_conv_hyperparams
:
if
self
.
_conv_hyperparams
:
kwargs
=
self
.
_conv_hyperparams
.
params
(
**
kwargs
)
kwargs
=
self
.
_conv_hyperparams
.
params
(
**
kwargs
)
else
:
kwargs
[
'depthwise_initializer'
]
=
self
.
initializer
kwargs
[
'padding'
]
=
'same'
kwargs
[
'padding'
]
=
'same'
kernel_size
=
kwargs
.
get
(
'kernel_size'
)
kernel_size
=
kwargs
.
get
(
'kernel_size'
)
...
...
research/object_detection/models/ssd_mobilenet_v1_fpn_feature_extractor.py
View file @
e00e0e13
...
@@ -31,11 +31,10 @@ slim = tf.contrib.slim
...
@@ -31,11 +31,10 @@ slim = tf.contrib.slim
# A modified config of mobilenet v1 that makes it more detection friendly,
# A modified config of mobilenet v1 that makes it more detection friendly,
def
_create_modified_mobilenet_config
():
def
_create_modified_mobilenet_config
():
conv_defs
=
copy
.
copy
(
mobilenet_v1
.
MOBILENETV1_CONV_DEFS
)
conv_defs
=
copy
.
deep
copy
(
mobilenet_v1
.
MOBILENETV1_CONV_DEFS
)
conv_defs
[
-
2
]
=
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
512
)
conv_defs
[
-
2
]
=
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
2
,
depth
=
512
)
conv_defs
[
-
1
]
=
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
256
)
conv_defs
[
-
1
]
=
mobilenet_v1
.
DepthSepConv
(
kernel
=
[
3
,
3
],
stride
=
1
,
depth
=
256
)
return
conv_defs
return
conv_defs
_CONV_DEFS
=
_create_modified_mobilenet_config
()
class
SSDMobileNetV1FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
class
SSDMobileNetV1FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
...
@@ -98,6 +97,9 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -98,6 +97,9 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_additional_layer_depth
=
additional_layer_depth
self
.
_additional_layer_depth
=
additional_layer_depth
self
.
_conv_defs
=
None
if
self
.
_use_depthwise
:
self
.
_conv_defs
=
_create_modified_mobilenet_config
()
def
preprocess
(
self
,
resized_inputs
):
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
"""SSD preprocessing.
...
@@ -141,7 +143,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -141,7 +143,7 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
final_endpoint
=
'Conv2d_13_pointwise'
,
final_endpoint
=
'Conv2d_13_pointwise'
,
min_depth
=
self
.
_min_depth
,
min_depth
=
self
.
_min_depth
,
depth_multiplier
=
self
.
_depth_multiplier
,
depth_multiplier
=
self
.
_depth_multiplier
,
conv_defs
=
_CONV_DEFS
if
self
.
_use_depthwise
else
None
,
conv_defs
=
self
.
_conv_defs
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
scope
=
scope
)
scope
=
scope
)
...
@@ -159,7 +161,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -159,7 +161,8 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
depth
=
depth_fn
(
self
.
_additional_layer_depth
),
depth
=
depth_fn
(
self
.
_additional_layer_depth
),
use_depthwise
=
self
.
_use_depthwise
)
use_depthwise
=
self
.
_use_depthwise
,
use_explicit_padding
=
self
.
_use_explicit_padding
)
feature_maps
=
[]
feature_maps
=
[]
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
...
@@ -167,18 +170,23 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -167,18 +170,23 @@ class SSDMobileNetV1FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
last_feature_map
=
fpn_features
[
'top_down_{}'
.
format
(
last_feature_map
=
fpn_features
[
'top_down_{}'
.
format
(
feature_blocks
[
base_fpn_max_level
-
2
])]
feature_blocks
[
base_fpn_max_level
-
2
])]
# Construct coarse features
# Construct coarse features
padding
=
'VALID'
if
self
.
_use_explicit_padding
else
'SAME'
kernel_size
=
3
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
if
self
.
_use_depthwise
:
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
else
:
conv_op
=
slim
.
conv2d
conv_op
=
slim
.
conv2d
if
self
.
_use_explicit_padding
:
last_feature_map
=
ops
.
fixed_padding
(
last_feature_map
,
kernel_size
)
last_feature_map
=
conv_op
(
last_feature_map
=
conv_op
(
last_feature_map
,
last_feature_map
,
num_outputs
=
depth_fn
(
self
.
_additional_layer_depth
),
num_outputs
=
depth_fn
(
self
.
_additional_layer_depth
),
kernel_size
=
[
3
,
3
],
kernel_size
=
[
kernel_size
,
kernel_size
],
stride
=
2
,
stride
=
2
,
padding
=
'SAME'
,
padding
=
padding
,
scope
=
'bottom_up_Conv2d_{}'
.
format
(
i
-
base_fpn_max_level
+
13
))
scope
=
'bottom_up_Conv2d_{}'
.
format
(
i
-
base_fpn_max_level
+
13
))
feature_maps
.
append
(
last_feature_map
)
feature_maps
.
append
(
last_feature_map
)
return
feature_maps
return
feature_maps
research/object_detection/models/ssd_mobilenet_v2_fpn_feature_extractor.py
View file @
e00e0e13
...
@@ -30,17 +30,14 @@ from nets.mobilenet import mobilenet_v2
...
@@ -30,17 +30,14 @@ from nets.mobilenet import mobilenet_v2
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
# A modified config of mobilenet v2 that makes it more detection friendly
,
# A modified config of mobilenet v2 that makes it more detection friendly
.
def
_create_modified_mobilenet_config
():
def
_create_modified_mobilenet_config
():
conv_defs
=
copy
.
copy
(
mobilenet_v2
.
V2_DEF
)
conv_defs
=
copy
.
deep
copy
(
mobilenet_v2
.
V2_DEF
)
conv_defs
[
'spec'
][
-
1
]
=
mobilenet
.
op
(
conv_defs
[
'spec'
][
-
1
]
=
mobilenet
.
op
(
slim
.
conv2d
,
stride
=
1
,
kernel_size
=
[
1
,
1
],
num_outputs
=
256
)
slim
.
conv2d
,
stride
=
1
,
kernel_size
=
[
1
,
1
],
num_outputs
=
256
)
return
conv_defs
return
conv_defs
_CONV_DEFS
=
_create_modified_mobilenet_config
()
class
SSDMobileNetV2FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
class
SSDMobileNetV2FpnFeatureExtractor
(
ssd_meta_arch
.
SSDFeatureExtractor
):
"""SSD Feature Extractor using MobilenetV2 FPN features."""
"""SSD Feature Extractor using MobilenetV2 FPN features."""
...
@@ -100,6 +97,9 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -100,6 +97,9 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_min_level
=
fpn_min_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_fpn_max_level
=
fpn_max_level
self
.
_additional_layer_depth
=
additional_layer_depth
self
.
_additional_layer_depth
=
additional_layer_depth
self
.
_conv_defs
=
None
if
self
.
_use_depthwise
:
self
.
_conv_defs
=
_create_modified_mobilenet_config
()
def
preprocess
(
self
,
resized_inputs
):
def
preprocess
(
self
,
resized_inputs
):
"""SSD preprocessing.
"""SSD preprocessing.
...
@@ -142,7 +142,7 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -142,7 +142,7 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
ops
.
pad_to_multiple
(
preprocessed_inputs
,
self
.
_pad_to_multiple
),
final_endpoint
=
'layer_19'
,
final_endpoint
=
'layer_19'
,
depth_multiplier
=
self
.
_depth_multiplier
,
depth_multiplier
=
self
.
_depth_multiplier
,
conv_defs
=
_CONV_DEFS
if
self
.
_use_depthwise
else
None
,
conv_defs
=
self
.
_conv_defs
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
use_explicit_padding
=
self
.
_use_explicit_padding
,
scope
=
scope
)
scope
=
scope
)
depth_fn
=
lambda
d
:
max
(
int
(
d
*
self
.
_depth_multiplier
),
self
.
_min_depth
)
depth_fn
=
lambda
d
:
max
(
int
(
d
*
self
.
_depth_multiplier
),
self
.
_min_depth
)
...
@@ -158,7 +158,8 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -158,7 +158,8 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
fpn_features
=
feature_map_generators
.
fpn_top_down_feature_maps
(
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
[(
key
,
image_features
[
key
])
for
key
in
feature_block_list
],
depth
=
depth_fn
(
self
.
_additional_layer_depth
),
depth
=
depth_fn
(
self
.
_additional_layer_depth
),
use_depthwise
=
self
.
_use_depthwise
)
use_depthwise
=
self
.
_use_depthwise
,
use_explicit_padding
=
self
.
_use_explicit_padding
)
feature_maps
=
[]
feature_maps
=
[]
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
for
level
in
range
(
self
.
_fpn_min_level
,
base_fpn_max_level
+
1
):
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
feature_maps
.
append
(
fpn_features
[
'top_down_{}'
.
format
(
...
@@ -166,18 +167,23 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -166,18 +167,23 @@ class SSDMobileNetV2FpnFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
last_feature_map
=
fpn_features
[
'top_down_{}'
.
format
(
last_feature_map
=
fpn_features
[
'top_down_{}'
.
format
(
feature_blocks
[
base_fpn_max_level
-
2
])]
feature_blocks
[
base_fpn_max_level
-
2
])]
# Construct coarse features
# Construct coarse features
padding
=
'VALID'
if
self
.
_use_explicit_padding
else
'SAME'
kernel_size
=
3
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
for
i
in
range
(
base_fpn_max_level
+
1
,
self
.
_fpn_max_level
+
1
):
if
self
.
_use_depthwise
:
if
self
.
_use_depthwise
:
conv_op
=
functools
.
partial
(
conv_op
=
functools
.
partial
(
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
slim
.
separable_conv2d
,
depth_multiplier
=
1
)
else
:
else
:
conv_op
=
slim
.
conv2d
conv_op
=
slim
.
conv2d
if
self
.
_use_explicit_padding
:
last_feature_map
=
ops
.
fixed_padding
(
last_feature_map
,
kernel_size
)
last_feature_map
=
conv_op
(
last_feature_map
=
conv_op
(
last_feature_map
,
last_feature_map
,
num_outputs
=
depth_fn
(
self
.
_additional_layer_depth
),
num_outputs
=
depth_fn
(
self
.
_additional_layer_depth
),
kernel_size
=
[
3
,
3
],
kernel_size
=
[
kernel_size
,
kernel_size
],
stride
=
2
,
stride
=
2
,
padding
=
'SAME'
,
padding
=
padding
,
scope
=
'bottom_up_Conv2d_{}'
.
format
(
i
-
base_fpn_max_level
+
19
))
scope
=
'bottom_up_Conv2d_{}'
.
format
(
i
-
base_fpn_max_level
+
19
))
feature_maps
.
append
(
last_feature_map
)
feature_maps
.
append
(
last_feature_map
)
return
feature_maps
return
feature_maps
Prev
1
…
4
5
6
7
8
9
10
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment