Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
12714f88
Commit
12714f88
authored
Oct 28, 2017
by
Vivek Rathod
Browse files
update post_processing module, builders, and meta architectures.
parent
c46caa56
Changes
12
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
1027 additions
and
264 deletions
+1027
-264
research/object_detection/builders/post_processing_builder.py
...arch/object_detection/builders/post_processing_builder.py
+19
-7
research/object_detection/builders/post_processing_builder_test.py
...object_detection/builders/post_processing_builder_test.py
+37
-3
research/object_detection/core/post_processing.py
research/object_detection/core/post_processing.py
+106
-23
research/object_detection/core/post_processing_test.py
research/object_detection/core/post_processing_test.py
+192
-18
research/object_detection/meta_architectures/BUILD
research/object_detection/meta_architectures/BUILD
+1
-0
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+220
-76
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
...etection/meta_architectures/faster_rcnn_meta_arch_test.py
+23
-11
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+268
-81
research/object_detection/meta_architectures/rfcn_meta_arch.py
...rch/object_detection/meta_architectures/rfcn_meta_arch.py
+19
-3
research/object_detection/meta_architectures/rfcn_meta_arch_test.py
...bject_detection/meta_architectures/rfcn_meta_arch_test.py
+9
-0
research/object_detection/meta_architectures/ssd_meta_arch.py
...arch/object_detection/meta_architectures/ssd_meta_arch.py
+124
-38
research/object_detection/meta_architectures/ssd_meta_arch_test.py
...object_detection/meta_architectures/ssd_meta_arch_test.py
+9
-4
No files found.
research/object_detection/builders/post_processing_builder.py
View file @
12714f88
...
...
@@ -28,8 +28,8 @@ def build(post_processing_config):
configuration.
Non-max suppression callable takes `boxes`, `scores`, and optionally
`clip_window`, `parallel_iterations` and `scope` as inputs. It returns
`nms_boxes`, `nms_scores`, `nms_
nms_
classes` and `num_detections`. See
`clip_window`, `parallel_iterations`
`masks,
and `scope` as inputs. It returns
`nms_boxes`, `nms_scores`, `nms_classes`
`nms_masks`
and `num_detections`. See
post_processing.batch_multiclass_non_max_suppression for the type and shape
of these tensors.
...
...
@@ -55,7 +55,8 @@ def build(post_processing_config):
non_max_suppressor_fn
=
_build_non_max_suppressor
(
post_processing_config
.
batch_non_max_suppression
)
score_converter_fn
=
_build_score_converter
(
post_processing_config
.
score_converter
)
post_processing_config
.
score_converter
,
post_processing_config
.
logit_scale
)
return
non_max_suppressor_fn
,
score_converter_fn
...
...
@@ -87,7 +88,17 @@ def _build_non_max_suppressor(nms_config):
return
non_max_suppressor_fn
def
_build_score_converter
(
score_converter_config
):
def
_score_converter_fn_with_logit_scale
(
tf_score_converter_fn
,
logit_scale
):
"""Create a function to scale logits then apply a Tensorflow function."""
def
score_converter_fn
(
logits
):
scaled_logits
=
tf
.
divide
(
logits
,
logit_scale
,
name
=
'scale_logits'
)
return
tf_score_converter_fn
(
scaled_logits
,
name
=
'convert_scores'
)
score_converter_fn
.
__name__
=
'%s_with_logit_scale'
%
(
tf_score_converter_fn
.
__name__
)
return
score_converter_fn
def
_build_score_converter
(
score_converter_config
,
logit_scale
):
"""Builds score converter based on the config.
Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
...
...
@@ -95,6 +106,7 @@ def _build_score_converter(score_converter_config):
Args:
score_converter_config: post_processing_pb2.PostProcessing.score_converter.
logit_scale: temperature to use for SOFTMAX score_converter.
Returns:
Callable score converter op.
...
...
@@ -103,9 +115,9 @@ def _build_score_converter(score_converter_config):
ValueError: On unknown score converter.
"""
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
IDENTITY
:
return
tf
.
identity
return
_score_converter_fn_with_logit_scale
(
tf
.
identity
,
logit_scale
)
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
SIGMOID
:
return
tf
.
sigmoid
return
_score_converter_fn_with_logit_scale
(
tf
.
sigmoid
,
logit_scale
)
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
SOFTMAX
:
return
tf
.
nn
.
softmax
return
_score_converter_fn_with_logit_scale
(
tf
.
nn
.
softmax
,
logit_scale
)
raise
ValueError
(
'Unknown score converter.'
)
research/object_detection/builders/post_processing_builder_test.py
View file @
12714f88
...
...
@@ -48,7 +48,31 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
,
tf
.
identity
)
self
.
assertEqual
(
score_converter
.
__name__
,
'identity_with_logit_scale'
)
inputs
=
tf
.
constant
([
1
,
1
],
tf
.
float32
)
outputs
=
score_converter
(
inputs
)
with
self
.
test_session
()
as
sess
:
converted_scores
=
sess
.
run
(
outputs
)
expected_converted_scores
=
sess
.
run
(
inputs
)
self
.
assertAllClose
(
converted_scores
,
expected_converted_scores
)
def
test_build_identity_score_converter_with_logit_scale
(
self
):
post_processing_text_proto
=
"""
score_converter: IDENTITY
logit_scale: 2.0
"""
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
.
__name__
,
'identity_with_logit_scale'
)
inputs
=
tf
.
constant
([
1
,
1
],
tf
.
float32
)
outputs
=
score_converter
(
inputs
)
with
self
.
test_session
()
as
sess
:
converted_scores
=
sess
.
run
(
outputs
)
expected_converted_scores
=
sess
.
run
(
tf
.
constant
([.
5
,
.
5
],
tf
.
float32
))
self
.
assertAllClose
(
converted_scores
,
expected_converted_scores
)
def
test_build_sigmoid_score_converter
(
self
):
post_processing_text_proto
=
"""
...
...
@@ -57,7 +81,7 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
,
tf
.
sigmoid
)
self
.
assertEqual
(
score_converter
.
__name__
,
'sigmoid_with_logit_scale'
)
def
test_build_softmax_score_converter
(
self
):
post_processing_text_proto
=
"""
...
...
@@ -66,7 +90,17 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
,
tf
.
nn
.
softmax
)
self
.
assertEqual
(
score_converter
.
__name__
,
'softmax_with_logit_scale'
)
def
test_build_softmax_score_converter_with_temperature
(
self
):
post_processing_text_proto
=
"""
score_converter: SOFTMAX
logit_scale: 2.0
"""
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
.
__name__
,
'softmax_with_logit_scale'
)
if
__name__
==
'__main__'
:
...
...
research/object_detection/core/post_processing.py
View file @
12714f88
...
...
@@ -76,8 +76,6 @@ def multiclass_non_max_suppression(boxes,
a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
contain masks, keypoints, keypoint_heatmaps corresponding to boxes.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
...
...
@@ -174,6 +172,7 @@ def batch_multiclass_non_max_suppression(boxes,
change_coordinate_frame
=
False
,
num_valid_boxes
=
None
,
masks
=
None
,
additional_fields
=
None
,
scope
=
None
,
parallel_iterations
=
32
):
"""Multi-class version of non maximum suppression that operates on a batch.
...
...
@@ -208,6 +207,8 @@ def batch_multiclass_non_max_suppression(boxes,
masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
float32 tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose dimensions are [batch_size, num_anchors, ...].
scope: tf scope name.
parallel_iterations: (optional) number of batch items to process in
parallel.
...
...
@@ -223,9 +224,13 @@ def batch_multiclass_non_max_suppression(boxes,
[batch_size, max_detections, mask_height, mask_width] float32 tensor
containing masks for each selected box. This is set to None if input
`masks` is None.
'nmsed_additional_fields': (optional) a dictionary of
[batch_size, max_detections, ...] float32 tensors corresponding to the
tensors specified in the input `additional_fields`. This is not returned
if input `additional_fields` is None.
'num_detections': A [batch_size] int32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i] entries in
nms_boxes[i], nms_scores[i] and nms_class[i] are valid.
t
he rest of the
nms_boxes[i], nms_scores[i] and nms_class[i] are valid.
T
he rest of the
entries are zero paddings.
Raises:
...
...
@@ -239,6 +244,7 @@ def batch_multiclass_non_max_suppression(boxes,
'to the third dimension of scores'
)
original_masks
=
masks
original_additional_fields
=
additional_fields
with
tf
.
name_scope
(
scope
,
'BatchMultiClassNonMaxSuppression'
):
boxes_shape
=
boxes
.
shape
batch_size
=
boxes_shape
[
0
].
value
...
...
@@ -255,15 +261,61 @@ def batch_multiclass_non_max_suppression(boxes,
num_valid_boxes
=
tf
.
ones
([
batch_size
],
dtype
=
tf
.
int32
)
*
num_anchors
# If masks aren't provided, create dummy masks so we can only have one copy
# of single_image_nms_fn and discard the dummy masks after map_fn.
# of
_
single_image_nms_fn and discard the dummy masks after map_fn.
if
masks
is
None
:
masks_shape
=
tf
.
stack
([
batch_size
,
num_anchors
,
1
,
0
,
0
])
masks
=
tf
.
zeros
(
masks_shape
)
def
single_image_nms_fn
(
args
):
"""Runs NMS on a single image and returns padded output."""
(
per_image_boxes
,
per_image_scores
,
per_image_masks
,
per_image_num_valid_boxes
)
=
args
if
additional_fields
is
None
:
additional_fields
=
{}
def
_single_image_nms_fn
(
args
):
"""Runs NMS on a single image and returns padded output.
Args:
args: A list of tensors consisting of the following:
per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
detections. If `q` is 1 then same boxes are used for all classes
otherwise, if `q` is equal to number of classes, class-specific
boxes are used.
per_image_scores - A [num_anchors, num_classes] float32 tensor
containing the scores for each of the `num_anchors` detections.
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
per_image_additional_fields - (optional) A variable number of float32
tensors each with size [num_anchors, ...].
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
shape [batch_size] representing the number of valid boxes to be
considered for each image in the batch. This parameter allows for
ignoring zero paddings.
Returns:
'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
non-max suppressed boxes.
'nmsed_scores': A [max_detections] float32 tensor containing the scores
for the boxes.
'nmsed_classes': A [max_detections] float32 tensor containing the class
for boxes.
'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
float32 tensor containing masks for each selected box. This is set to
None if input `masks` is None.
'nmsed_additional_fields': (optional) A variable number of float32
tensors each with size [max_detections, ...] corresponding to the
input `per_image_additional_fields`.
'num_detections': A [batch_size] int32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i]
entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
rest of the entries are zero paddings.
"""
per_image_boxes
=
args
[
0
]
per_image_scores
=
args
[
1
]
per_image_masks
=
args
[
2
]
per_image_additional_fields
=
{
key
:
value
for
key
,
value
in
zip
(
additional_fields
,
args
[
3
:
-
1
])
}
per_image_num_valid_boxes
=
args
[
-
1
]
per_image_boxes
=
tf
.
reshape
(
tf
.
slice
(
per_image_boxes
,
3
*
[
0
],
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
,
-
1
])),
[
-
1
,
q
,
4
])
...
...
@@ -271,12 +323,21 @@ def batch_multiclass_non_max_suppression(boxes,
tf
.
slice
(
per_image_scores
,
[
0
,
0
],
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
])),
[
-
1
,
num_classes
])
per_image_masks
=
tf
.
reshape
(
tf
.
slice
(
per_image_masks
,
4
*
[
0
],
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
,
-
1
,
-
1
])),
[
-
1
,
q
,
per_image_masks
.
shape
[
2
].
value
,
per_image_masks
.
shape
[
3
].
value
])
if
per_image_additional_fields
is
not
None
:
for
key
,
tensor
in
per_image_additional_fields
.
items
():
additional_field_shape
=
tensor
.
get_shape
()
additional_field_dim
=
len
(
additional_field_shape
)
per_image_additional_fields
[
key
]
=
tf
.
reshape
(
tf
.
slice
(
per_image_additional_fields
[
key
],
additional_field_dim
*
[
0
],
tf
.
stack
([
per_image_num_valid_boxes
]
+
(
additional_field_dim
-
1
)
*
[
-
1
])),
[
-
1
]
+
[
dim
.
value
for
dim
in
additional_field_shape
[
1
:]])
nmsed_boxlist
=
multiclass_non_max_suppression
(
per_image_boxes
,
per_image_scores
,
...
...
@@ -284,9 +345,10 @@ def batch_multiclass_non_max_suppression(boxes,
iou_thresh
,
max_size_per_class
,
max_total_size
,
masks
=
per_image_masks
,
clip_window
=
clip_window
,
change_coordinate_frame
=
change_coordinate_frame
)
change_coordinate_frame
=
change_coordinate_frame
,
masks
=
per_image_masks
,
additional_fields
=
per_image_additional_fields
)
padded_boxlist
=
box_list_ops
.
pad_or_clip_box_list
(
nmsed_boxlist
,
max_total_size
)
num_detections
=
nmsed_boxlist
.
num_boxes
()
...
...
@@ -294,19 +356,40 @@ def batch_multiclass_non_max_suppression(boxes,
nmsed_scores
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
scores
)
nmsed_classes
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
classes
)
nmsed_masks
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
masks
)
return
[
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
]
(
batch_nmsed_boxes
,
batch_nmsed_scores
,
batch_nmsed_classes
,
batch_nmsed_masks
,
batch_num_detections
)
=
tf
.
map_fn
(
single_image_nms_fn
,
elems
=
[
boxes
,
scores
,
masks
,
num_valid_boxes
],
dtype
=
[
tf
.
float32
,
tf
.
float32
,
tf
.
float32
,
tf
.
float32
,
tf
.
int32
],
nmsed_additional_fields
=
[
padded_boxlist
.
get_field
(
key
)
for
key
in
per_image_additional_fields
]
return
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
]
+
nmsed_additional_fields
+
[
num_detections
])
num_additional_fields
=
0
if
additional_fields
is
not
None
:
num_additional_fields
=
len
(
additional_fields
)
num_nmsed_outputs
=
4
+
num_additional_fields
batch_outputs
=
tf
.
map_fn
(
_single_image_nms_fn
,
elems
=
([
boxes
,
scores
,
masks
]
+
list
(
additional_fields
.
values
())
+
[
num_valid_boxes
]),
dtype
=
(
num_nmsed_outputs
*
[
tf
.
float32
]
+
[
tf
.
int32
]),
parallel_iterations
=
parallel_iterations
)
batch_nmsed_boxes
=
batch_outputs
[
0
]
batch_nmsed_scores
=
batch_outputs
[
1
]
batch_nmsed_classes
=
batch_outputs
[
2
]
batch_nmsed_masks
=
batch_outputs
[
3
]
batch_nmsed_additional_fields
=
{
key
:
value
for
key
,
value
in
zip
(
additional_fields
,
batch_outputs
[
4
:
-
1
])
}
batch_num_detections
=
batch_outputs
[
-
1
]
if
original_masks
is
None
:
batch_nmsed_masks
=
None
if
original_additional_fields
is
None
:
batch_nmsed_additional_fields
=
None
return
(
batch_nmsed_boxes
,
batch_nmsed_scores
,
batch_nmsed_classes
,
batch_nmsed_masks
,
batch_num_detections
)
batch_nmsed_masks
,
batch_nmsed_additional_fields
,
batch_num_detections
)
research/object_detection/core/post_processing_test.py
View file @
12714f88
...
...
@@ -497,11 +497,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
exp_nms_classes
=
[[
0
,
0
,
1
,
0
]]
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
...
...
@@ -544,11 +546,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[
1
,
0
,
0
,
0
]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
...
...
@@ -616,11 +620,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[
0
,
0
],
[
0
,
0
]]]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
masks
=
masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
...
...
@@ -639,6 +645,91 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self
.
assertAllClose
(
num_detections
,
[
2
,
3
])
self
.
assertAllClose
(
nmsed_masks
,
exp_nms_masks
)
def
test_batch_multiclass_nms_with_additional_fields
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
additional_fields
=
{
'keypoints'
:
tf
.
constant
(
[[[[
6
,
7
],
[
8
,
9
]],
[[
0
,
1
],
[
2
,
3
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
13
,
14
],
[
15
,
16
]],
[[
8
,
9
],
[
10
,
11
]],
[[
10
,
11
],
[
12
,
13
]],
[[
0
,
0
],
[
0
,
0
]]]],
tf
.
float32
)
}
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
np
.
array
([[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
999
,
2
,
1004
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
]]])
exp_nms_scores
=
np
.
array
([[.
95
,
.
9
,
0
,
0
],
[.
85
,
.
5
,
.
3
,
0
]])
exp_nms_classes
=
np
.
array
([[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]])
exp_nms_additional_fields
=
{
'keypoints'
:
np
.
array
([[[[
0
,
0
],
[
0
,
0
]],
[[
6
,
7
],
[
8
,
9
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
10
,
11
],
[
12
,
13
]],
[[
13
,
14
],
[
15
,
16
]],
[[
8
,
9
],
[
10
,
11
]],
[[
0
,
0
],
[
0
,
0
]]]])
}
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
additional_fields
=
additional_fields
)
self
.
assertIsNone
(
nmsed_masks
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
self
.
assertAllEqual
(
nmsed_classes
.
shape
.
as_list
(),
exp_nms_classes
.
shape
)
self
.
assertEqual
(
len
(
nmsed_additional_fields
),
len
(
exp_nms_additional_fields
))
for
key
in
exp_nms_additional_fields
:
self
.
assertAllEqual
(
nmsed_additional_fields
[
key
].
shape
.
as_list
(),
exp_nms_additional_fields
[
key
].
shape
)
self
.
assertEqual
(
num_detections
.
shape
.
as_list
(),
[
2
])
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_additional_fields
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_additional_fields
,
num_detections
])
self
.
assertAllClose
(
nmsed_boxes
,
exp_nms_corners
)
self
.
assertAllClose
(
nmsed_scores
,
exp_nms_scores
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
for
key
in
exp_nms_additional_fields
:
self
.
assertAllClose
(
nmsed_additional_fields
[
key
],
exp_nms_additional_fields
[
key
])
self
.
assertAllClose
(
num_detections
,
[
2
,
3
])
def
test_batch_multiclass_nms_with_dynamic_batch_size
(
self
):
boxes_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
2
,
4
))
scores_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
2
))
...
...
@@ -690,11 +781,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[
0
,
0
],
[
0
,
0
]]]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes_placeholder
,
scores_placeholder
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
masks
=
masks_placeholder
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
[
None
,
4
,
4
])
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
[
None
,
4
])
...
...
@@ -765,11 +858,14 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[
0
,
0
],
[
0
,
0
]]]]
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
num_valid_boxes
=
num_valid_boxes
,
masks
=
masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
...
...
@@ -780,6 +876,84 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self
.
assertAllClose
(
num_detections
,
[
1
,
1
])
self
.
assertAllClose
(
nmsed_masks
,
exp_nms_masks
)
def
test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
additional_fields
=
{
'keypoints'
:
tf
.
constant
(
[[[[
6
,
7
],
[
8
,
9
]],
[[
0
,
1
],
[
2
,
3
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
13
,
14
],
[
15
,
16
]],
[[
8
,
9
],
[
10
,
11
]],
[[
10
,
11
],
[
12
,
13
]],
[[
0
,
0
],
[
0
,
0
]]]],
tf
.
float32
)
}
num_valid_boxes
=
tf
.
constant
([
1
,
1
],
tf
.
int32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]]
exp_nms_scores
=
[[.
9
,
0
,
0
,
0
],
[.
5
,
0
,
0
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
exp_nms_additional_fields
=
{
'keypoints'
:
np
.
array
([[[[
6
,
7
],
[
8
,
9
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
13
,
14
],
[
15
,
16
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]]])
}
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
num_valid_boxes
=
num_valid_boxes
,
additional_fields
=
additional_fields
)
self
.
assertIsNone
(
nmsed_masks
)
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_additional_fields
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_additional_fields
,
num_detections
])
self
.
assertAllClose
(
nmsed_boxes
,
exp_nms_corners
)
self
.
assertAllClose
(
nmsed_scores
,
exp_nms_scores
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
for
key
in
exp_nms_additional_fields
:
self
.
assertAllClose
(
nmsed_additional_fields
[
key
],
exp_nms_additional_fields
[
key
])
self
.
assertAllClose
(
num_detections
,
[
1
,
1
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/meta_architectures/BUILD
View file @
12714f88
...
...
@@ -18,6 +18,7 @@ py_library(
"//tensorflow_models/object_detection/core:model"
,
"//tensorflow_models/object_detection/core:target_assigner"
,
"//tensorflow_models/object_detection/utils:shape_utils"
,
"//tensorflow_models/object_detection/utils:visualization_utils"
,
],
)
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
12714f88
This diff is collapsed.
Click to expand it.
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
View file @
12714f88
...
...
@@ -15,6 +15,7 @@
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch_test_lib
...
...
@@ -46,10 +47,10 @@ class FasterRCNNMetaArchTest(
mask_height
=
2
mask_width
=
2
mask_predictions
=
.
6
*
tf
.
ones
(
mask_predictions
=
30
.
*
tf
.
ones
(
[
total_num_padded_proposals
,
model
.
num_classes
,
mask_height
,
mask_width
],
dtype
=
tf
.
float32
)
exp_detection_masks
=
[[[[
1
,
1
],
[
1
,
1
]],
exp_detection_masks
=
np
.
array
(
[[[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
...
...
@@ -58,7 +59,7 @@ class FasterRCNNMetaArchTest(
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
0
,
0
],
[
0
,
0
]]]]
[[
0
,
0
],
[
0
,
0
]]]]
)
detections
=
model
.
postprocess
({
'refined_box_encodings'
:
refined_box_encodings
,
...
...
@@ -79,6 +80,17 @@ class FasterRCNNMetaArchTest(
self
.
assertAllClose
(
detections_out
[
'detection_masks'
],
exp_detection_masks
)
def
_get_box_classifier_features_shape
(
self
,
image_size
,
batch_size
,
max_num_proposals
,
initial_crop_size
,
maxpool_stride
,
num_features
):
return
(
batch_size
*
max_num_proposals
,
initial_crop_size
/
maxpool_stride
,
initial_crop_size
/
maxpool_stride
,
num_features
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
12714f88
This diff is collapsed.
Click to expand it.
research/object_detection/meta_architectures/rfcn_meta_arch.py
View file @
12714f88
...
...
@@ -73,6 +73,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_score_conversion_fn
,
second_stage_localization_loss_weight
,
second_stage_classification_loss_weight
,
second_stage_classification_loss
,
hard_example_miner
,
parallel_iterations
=
16
):
"""RFCNMetaArch Constructor.
...
...
@@ -149,6 +150,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
used to convert logits to probabilities.
second_stage_localization_loss_weight: A float
second_stage_classification_loss_weight: A float
second_stage_classification_loss: A string indicating which loss function
to use, supports 'softmax' and 'sigmoid'.
hard_example_miner: A losses.HardExampleMiner object (can be None).
parallel_iterations: (Optional) The number of iterations allowed to run
in parallel for calls to tf.map_fn.
...
...
@@ -185,6 +188,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_score_conversion_fn
,
second_stage_localization_loss_weight
,
second_stage_classification_loss_weight
,
second_stage_classification_loss
,
1.0
,
# second stage mask prediction loss weight isn't used in R-FCN.
hard_example_miner
,
parallel_iterations
)
...
...
@@ -198,10 +203,10 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
"""Predicts the output tensors from 2nd stage of FasterRCNN.
Args:
rpn_box_encodings:
3
-D float tensor of shape
rpn_box_encodings:
4
-D float tensor of shape
[batch_size, num_valid_anchors, self._box_coder.code_size] containing
predicted boxes.
rpn_objectness_predictions_with_background:
3
-D float tensor of shape
rpn_objectness_predictions_with_background:
2
-D float tensor of shape
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
...
...
@@ -232,6 +237,15 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
4) proposal_boxes: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes (in absolute coordinates).
5) proposal_boxes_normalized: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing decoded proposal
bounding boxes (in normalized coordinates). Can be used to override
the boxes proposed by the RPN, thus enabling one to extract box
classification and prediction for externally selected areas of the
image.
6) box_classifier_features: a 4-D float32 tensor, of shape
[batch_size, feature_map_height, feature_map_width, depth],
representing the box classifier features.
"""
proposal_boxes_normalized
,
_
,
num_proposals
=
self
.
_postprocess_rpn
(
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
...
...
@@ -263,5 +277,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
class_predictions_with_background
,
'num_proposals'
:
num_proposals
,
'proposal_boxes'
:
absolute_proposal_boxes
,
'box_classifier_features'
:
box_classifier_features
,
'proposal_boxes_normalized'
:
proposal_boxes_normalized
,
}
return
prediction_dict
research/object_detection/meta_architectures/rfcn_meta_arch_test.py
View file @
12714f88
...
...
@@ -51,6 +51,15 @@ class RFCNMetaArchTest(
return
rfcn_meta_arch
.
RFCNMetaArch
(
second_stage_rfcn_box_predictor
=
box_predictor
,
**
common_kwargs
)
def
_get_box_classifier_features_shape
(
self
,
image_size
,
batch_size
,
max_num_proposals
,
initial_crop_size
,
maxpool_stride
,
num_features
):
return
(
batch_size
,
image_size
,
image_size
,
num_features
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/meta_architectures/ssd_meta_arch.py
View file @
12714f88
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""SSD Meta-architecture definition.
General tensorflow implementation of convolutional Multibox/SSD detection
...
...
@@ -29,6 +28,7 @@ from object_detection.core import model
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
target_assigner
from
object_detection.utils
import
shape_utils
from
object_detection.utils
import
visualization_utils
slim
=
tf
.
contrib
.
slim
...
...
@@ -37,13 +37,34 @@ class SSDFeatureExtractor(object):
"""SSD Feature Extractor definition."""
def
__init__
(
self
,
is_training
,
depth_multiplier
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
"""Constructor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: whether to reuse variables. Default is None.
"""
self
.
_is_training
=
is_training
self
.
_depth_multiplier
=
depth_multiplier
self
.
_min_depth
=
min_depth
self
.
_pad_to_multiple
=
pad_to_multiple
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_batch_norm_trainable
=
batch_norm_trainable
self
.
_reuse_weights
=
reuse_weights
@
abstractmethod
...
...
@@ -101,9 +122,9 @@ class SSDMetaArch(model.DetectionModel):
add_summaries
=
True
):
"""SSDMetaArch Constructor.
TODO: group NMS parameters + score converter into
a class and loss
parameters into a class and write config protos for
postprocessing
and losses.
TODO: group NMS parameters + score converter into
a class and loss
parameters into a class and write config protos for
postprocessing
and losses.
Args:
is_training: A boolean indicating whether the training version of the
...
...
@@ -204,8 +225,8 @@ class SSDMetaArch(model.DetectionModel):
if
inputs
.
dtype
is
not
tf
.
float32
:
raise
ValueError
(
'`preprocess` expects a tf.float32 tensor'
)
with
tf
.
name_scope
(
'Preprocessor'
):
# TODO: revisit whether to always use batch size as
the number of
#
parallel
iterations vs allow for dynamic batching.
# TODO: revisit whether to always use batch size as the number of
parallel
# iterations vs allow for dynamic batching.
resized_inputs
=
tf
.
map_fn
(
self
.
_image_resizer_fn
,
elems
=
inputs
,
dtype
=
tf
.
float32
)
...
...
@@ -226,7 +247,7 @@ class SSDMetaArch(model.DetectionModel):
Returns:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) box_encodings:
3
-D float tensor of shape [batch_size, num_anchors,
1) box_encodings:
4
-D float tensor of shape [batch_size, num_anchors,
box_code_dimension] containing predicted boxes.
2) class_predictions_with_background: 3-D float tensor of shape
[batch_size, num_anchors, num_classes+1] containing class predictions
...
...
@@ -234,19 +255,26 @@ class SSDMetaArch(model.DetectionModel):
background class predictions (at class index 0).
3) feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i].
4) anchors: 2-D float tensor of shape [num_anchors, 4] containing
the generated anchors in normalized coordinates.
"""
with
tf
.
variable_scope
(
None
,
self
.
_extract_features_scope
,
[
preprocessed_inputs
]):
feature_maps
=
self
.
_feature_extractor
.
extract_features
(
preprocessed_inputs
)
feature_map_spatial_dims
=
self
.
_get_feature_map_spatial_dims
(
feature_maps
)
self
.
_anchors
=
self
.
_anchor_generator
.
generate
(
feature_map_spatial_dims
)
image_shape
=
tf
.
shape
(
preprocessed_inputs
)
self
.
_anchors
=
self
.
_anchor_generator
.
generate
(
feature_map_spatial_dims
,
im_height
=
image_shape
[
1
],
im_width
=
image_shape
[
2
])
(
box_encodings
,
class_predictions_with_background
)
=
self
.
_add_box_predictions_to_feature_maps
(
feature_maps
)
predictions_dict
=
{
'box_encodings'
:
box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'feature_maps'
:
feature_maps
'feature_maps'
:
feature_maps
,
'anchors'
:
self
.
_anchors
.
get
()
}
return
predictions_dict
...
...
@@ -351,9 +379,11 @@ class SSDMetaArch(model.DetectionModel):
Returns:
detections: a dictionary containing the following fields
detection_boxes: [batch, max_detection, 4]
detection_boxes: [batch, max_detection
s
, 4]
detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections]
detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
encoded in the prediction_dict 'box_encodings')
num_detections: [batch]
Raises:
ValueError: if prediction_dict does not contain `box_encodings` or
...
...
@@ -365,7 +395,7 @@ class SSDMetaArch(model.DetectionModel):
with
tf
.
name_scope
(
'Postprocessor'
):
box_encodings
=
prediction_dict
[
'box_encodings'
]
class_predictions
=
prediction_dict
[
'class_predictions_with_background'
]
detection_boxes
=
self
.
_batch_decode
(
box_encodings
)
detection_boxes
,
detection_keypoints
=
self
.
_batch_decode
(
box_encodings
)
detection_boxes
=
tf
.
expand_dims
(
detection_boxes
,
axis
=
2
)
class_predictions_without_background
=
tf
.
slice
(
class_predictions
,
...
...
@@ -374,14 +404,25 @@ class SSDMetaArch(model.DetectionModel):
detection_scores
=
self
.
_score_conversion_fn
(
class_predictions_without_background
)
clip_window
=
tf
.
constant
([
0
,
0
,
1
,
1
],
tf
.
float32
)
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
_
,
num_detections
)
=
self
.
_non_max_suppression_fn
(
detection_boxes
,
additional_fields
=
None
if
detection_keypoints
is
not
None
:
additional_fields
=
{
fields
.
BoxListFields
.
keypoints
:
detection_keypoints
}
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
_
,
nmsed_additional_fields
,
num_detections
)
=
self
.
_non_max_suppression_fn
(
detection_boxes
,
detection_scores
,
clip_window
=
clip_window
)
return
{
'detection_boxes'
:
nmsed_boxes
,
clip_window
=
clip_window
,
additional_fields
=
additional_fields
)
detection_dict
=
{
'detection_boxes'
:
nmsed_boxes
,
'detection_scores'
:
nmsed_scores
,
'detection_classes'
:
nmsed_classes
,
'num_detections'
:
tf
.
to_float
(
num_detections
)}
if
(
nmsed_additional_fields
is
not
None
and
fields
.
BoxListFields
.
keypoints
in
nmsed_additional_fields
):
detection_dict
[
'detection_keypoints'
]
=
nmsed_additional_fields
[
fields
.
BoxListFields
.
keypoints
]
return
detection_dict
def
loss
(
self
,
prediction_dict
,
scope
=
None
):
"""Compute scalar loss tensors with respect to provided groundtruth.
...
...
@@ -405,10 +446,14 @@ class SSDMetaArch(model.DetectionModel):
values.
"""
with
tf
.
name_scope
(
scope
,
'Loss'
,
prediction_dict
.
values
()):
keypoints
=
None
if
self
.
groundtruth_has_field
(
fields
.
BoxListFields
.
keypoints
):
keypoints
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
keypoints
)
(
batch_cls_targets
,
batch_cls_weights
,
batch_reg_targets
,
batch_reg_weights
,
match_list
)
=
self
.
_assign_targets
(
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
classes
))
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
classes
),
keypoints
)
if
self
.
_add_summaries
:
self
.
_summarize_input
(
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
match_list
)
...
...
@@ -417,35 +462,60 @@ class SSDMetaArch(model.DetectionModel):
location_losses
=
self
.
_localization_loss
(
prediction_dict
[
'box_encodings'
],
batch_reg_targets
,
ignore_nan_targets
=
True
,
weights
=
batch_reg_weights
)
cls_losses
=
self
.
_classification_loss
(
prediction_dict
[
'class_predictions_with_background'
],
batch_cls_targets
,
weights
=
batch_cls_weights
)
# Optionally apply hard mining on top of loss values
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
if
self
.
_hard_example_miner
:
(
localization_loss
,
classification_loss
)
=
self
.
_apply_hard_mining
(
location_losses
,
cls_losses
,
prediction_dict
,
match_list
)
if
self
.
_add_summaries
:
self
.
_hard_example_miner
.
summarize
()
else
:
if
self
.
_add_summaries
:
class_ids
=
tf
.
argmax
(
batch_cls_targets
,
axis
=
2
)
flattened_class_ids
=
tf
.
reshape
(
class_ids
,
[
-
1
])
flattened_classification_losses
=
tf
.
reshape
(
cls_losses
,
[
-
1
])
self
.
_summarize_anchor_classification_loss
(
flattened_class_ids
,
flattened_classification_losses
)
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
# Optionally normalize by number of positive matches
normalizer
=
tf
.
constant
(
1.0
,
dtype
=
tf
.
float32
)
if
self
.
_normalize_loss_by_num_matches
:
normalizer
=
tf
.
maximum
(
tf
.
to_float
(
tf
.
reduce_sum
(
num_matches
)),
1.0
)
with
tf
.
name_scope
(
'localization_loss'
):
localization_loss
=
((
self
.
_localization_loss_weight
/
normalizer
)
*
localization_loss
)
with
tf
.
name_scope
(
'classification_loss'
):
classification_loss
=
((
self
.
_classification_loss_weight
/
normalizer
)
*
classification_loss
)
loss_dict
=
{
'localization_loss'
:
(
self
.
_localization_loss_weight
/
normalizer
)
*
localization_loss
,
'classification_loss'
:
(
self
.
_classification_loss_weight
/
normalizer
)
*
classification_loss
'localization_loss'
:
localization_loss
,
'classification_loss'
:
classification_loss
}
return
loss_dict
def
_assign_targets
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
):
def
_summarize_anchor_classification_loss
(
self
,
class_ids
,
cls_losses
):
positive_indices
=
tf
.
where
(
tf
.
greater
(
class_ids
,
0
))
positive_anchor_cls_loss
=
tf
.
squeeze
(
tf
.
gather
(
cls_losses
,
positive_indices
),
axis
=
1
)
visualization_utils
.
add_cdf_image_summary
(
positive_anchor_cls_loss
,
'PositiveAnchorLossCDF'
)
negative_indices
=
tf
.
where
(
tf
.
equal
(
class_ids
,
0
))
negative_anchor_cls_loss
=
tf
.
squeeze
(
tf
.
gather
(
cls_losses
,
negative_indices
),
axis
=
1
)
visualization_utils
.
add_cdf_image_summary
(
negative_anchor_cls_loss
,
'NegativeAnchorLossCDF'
)
def
_assign_targets
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_keypoints_list
=
None
):
"""Assign groundtruth targets.
Adds a background class to each one-hot encoding of groundtruth classes
...
...
@@ -460,6 +530,8 @@ class SSDMetaArch(model.DetectionModel):
groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of
shape [num_boxes, num_classes] containing the class targets with the 0th
index assumed to map to the first non-background class.
groundtruth_keypoints_list: (optional) a list of 3-D tensors of shape
[num_boxes, num_keypoints, 2]
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
...
...
@@ -480,6 +552,10 @@ class SSDMetaArch(model.DetectionModel):
tf
.
pad
(
one_hot_encoding
,
[[
0
,
0
],
[
1
,
0
]],
mode
=
'CONSTANT'
)
for
one_hot_encoding
in
groundtruth_classes_list
]
if
groundtruth_keypoints_list
is
not
None
:
for
boxlist
,
keypoints
in
zip
(
groundtruth_boxlists
,
groundtruth_keypoints_list
):
boxlist
.
add_field
(
fields
.
BoxListFields
.
keypoints
,
keypoints
)
return
target_assigner
.
batch_assign_targets
(
self
.
_target_assigner
,
self
.
anchors
,
groundtruth_boxlists
,
groundtruth_classes_with_background_list
)
...
...
@@ -544,12 +620,11 @@ class SSDMetaArch(model.DetectionModel):
mined_cls_loss: a float scalar with sum of classification losses from
selected hard examples.
"""
class_pred_shape
=
[
-
1
,
self
.
anchors
.
num_boxes_static
(),
self
.
num_classes
]
class_predictions
=
tf
.
reshape
(
tf
.
slice
(
prediction_dict
[
'class_predictions_with_background'
],
[
0
,
0
,
1
],
class_pred_shape
),
class_pred_shape
)
class_predictions
=
tf
.
slice
(
prediction_dict
[
'class_predictions_with_background'
],
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
decoded_boxes
=
self
.
_batch_decode
(
prediction_dict
[
'box_encodings'
])
decoded_boxes
,
_
=
self
.
_batch_decode
(
prediction_dict
[
'box_encodings'
])
decoded_box_tensors_list
=
tf
.
unstack
(
decoded_boxes
)
class_prediction_list
=
tf
.
unstack
(
class_predictions
)
decoded_boxlist_list
=
[]
...
...
@@ -574,6 +649,9 @@ class SSDMetaArch(model.DetectionModel):
Returns:
decoded_boxes: A float32 tensor of shape
[batch_size, num_anchors, 4] containing the decoded boxes.
decoded_keypoints: A float32 tensor of shape
[batch_size, num_anchors, num_keypoints, 2] containing the decoded
keypoints if present in the input `box_encodings`, None otherwise.
"""
combined_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
box_encodings
)
...
...
@@ -581,13 +659,21 @@ class SSDMetaArch(model.DetectionModel):
tiled_anchor_boxes
=
tf
.
tile
(
tf
.
expand_dims
(
self
.
anchors
.
get
(),
0
),
[
batch_size
,
1
,
1
])
tiled_anchors_boxlist
=
box_list
.
BoxList
(
tf
.
reshape
(
tiled_anchor_boxes
,
[
-
1
,
self
.
_box_coder
.
code_size
]))
tf
.
reshape
(
tiled_anchor_boxes
,
[
-
1
,
4
]))
decoded_boxes
=
self
.
_box_coder
.
decode
(
tf
.
reshape
(
box_encodings
,
[
-
1
,
self
.
_box_coder
.
code_size
]),
tiled_anchors_boxlist
)
return
tf
.
reshape
(
decoded_boxes
.
get
(),
tf
.
stack
([
combined_shape
[
0
],
combined_shape
[
1
],
4
]))
decoded_keypoints
=
None
if
decoded_boxes
.
has_field
(
fields
.
BoxListFields
.
keypoints
):
decoded_keypoints
=
decoded_boxes
.
get_field
(
fields
.
BoxListFields
.
keypoints
)
num_keypoints
=
decoded_keypoints
.
get_shape
()[
1
]
decoded_keypoints
=
tf
.
reshape
(
decoded_keypoints
,
tf
.
stack
([
combined_shape
[
0
],
combined_shape
[
1
],
num_keypoints
,
2
]))
decoded_boxes
=
tf
.
reshape
(
decoded_boxes
.
get
(),
tf
.
stack
(
[
combined_shape
[
0
],
combined_shape
[
1
],
4
]))
return
decoded_boxes
,
decoded_keypoints
def
restore_map
(
self
,
from_detection_checkpoint
=
True
):
"""Returns a map of variables to load from a foreign checkpoint.
...
...
research/object_detection/meta_architectures/ssd_meta_arch_test.py
View file @
12714f88
...
...
@@ -18,7 +18,6 @@ import functools
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.training
import
saver
as
tf_saver
from
object_detection.core
import
anchor_generator
from
object_detection.core
import
box_list
from
object_detection.core
import
losses
...
...
@@ -34,7 +33,12 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
def
__init__
(
self
):
super
(
FakeSSDFeatureExtractor
,
self
).
__init__
(
depth_multiplier
=
0
,
min_depth
=
0
,
conv_hyperparams
=
None
)
is_training
=
True
,
depth_multiplier
=
0
,
min_depth
=
0
,
pad_to_multiple
=
1
,
batch_norm_trainable
=
True
,
conv_hyperparams
=
None
)
def
preprocess
(
self
,
resized_inputs
):
return
tf
.
identity
(
resized_inputs
)
...
...
@@ -55,7 +59,7 @@ class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
def
num_anchors_per_location
(
self
):
return
[
1
]
def
_generate
(
self
,
feature_map_shape_list
):
def
_generate
(
self
,
feature_map_shape_list
,
im_height
,
im_width
):
return
box_list
.
BoxList
(
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
[
0
,
.
5
,
.
5
,
1
],
...
...
@@ -147,6 +151,7 @@ class SsdMetaArchTest(tf.test.TestCase):
self
.
assertTrue
(
'box_encodings'
in
prediction_dict
)
self
.
assertTrue
(
'class_predictions_with_background'
in
prediction_dict
)
self
.
assertTrue
(
'feature_maps'
in
prediction_dict
)
self
.
assertTrue
(
'anchors'
in
prediction_dict
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
(
graph
=
tf_graph
)
as
sess
:
...
...
@@ -242,7 +247,7 @@ class SsdMetaArchTest(tf.test.TestCase):
def
test_restore_map_for_detection_ckpt
(
self
):
init_op
=
tf
.
global_variables_initializer
()
saver
=
tf
_saver
.
Saver
()
saver
=
tf
.
train
.
Saver
()
save_path
=
self
.
get_temp_dir
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment