Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
ff88581a
Unverified
Commit
ff88581a
authored
Oct 29, 2017
by
vivek rathod
Committed by
GitHub
Oct 29, 2017
Browse files
Merge pull request #2629 from tombstone/meta_arch_update
update post_processing module, builders, and meta architectures.
parents
018e62f0
aeeaf9a3
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
1026 additions
and
265 deletions
+1026
-265
research/object_detection/builders/post_processing_builder.py
...arch/object_detection/builders/post_processing_builder.py
+19
-7
research/object_detection/builders/post_processing_builder_test.py
...object_detection/builders/post_processing_builder_test.py
+37
-3
research/object_detection/core/post_processing.py
research/object_detection/core/post_processing.py
+106
-23
research/object_detection/core/post_processing_test.py
research/object_detection/core/post_processing_test.py
+192
-18
research/object_detection/meta_architectures/BUILD
research/object_detection/meta_architectures/BUILD
+1
-0
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
...ect_detection/meta_architectures/faster_rcnn_meta_arch.py
+219
-77
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
...etection/meta_architectures/faster_rcnn_meta_arch_test.py
+23
-11
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
...tion/meta_architectures/faster_rcnn_meta_arch_test_lib.py
+268
-81
research/object_detection/meta_architectures/rfcn_meta_arch.py
...rch/object_detection/meta_architectures/rfcn_meta_arch.py
+19
-3
research/object_detection/meta_architectures/rfcn_meta_arch_test.py
...bject_detection/meta_architectures/rfcn_meta_arch_test.py
+9
-0
research/object_detection/meta_architectures/ssd_meta_arch.py
...arch/object_detection/meta_architectures/ssd_meta_arch.py
+124
-38
research/object_detection/meta_architectures/ssd_meta_arch_test.py
...object_detection/meta_architectures/ssd_meta_arch_test.py
+9
-4
No files found.
research/object_detection/builders/post_processing_builder.py
View file @
ff88581a
...
@@ -28,8 +28,8 @@ def build(post_processing_config):
...
@@ -28,8 +28,8 @@ def build(post_processing_config):
configuration.
configuration.
Non-max suppression callable takes `boxes`, `scores`, and optionally
Non-max suppression callable takes `boxes`, `scores`, and optionally
`clip_window`, `parallel_iterations` and `scope` as inputs. It returns
`clip_window`, `parallel_iterations`
`masks,
and `scope` as inputs. It returns
`nms_boxes`, `nms_scores`, `nms_
nms_
classes` and `num_detections`. See
`nms_boxes`, `nms_scores`, `nms_classes`
`nms_masks`
and `num_detections`. See
post_processing.batch_multiclass_non_max_suppression for the type and shape
post_processing.batch_multiclass_non_max_suppression for the type and shape
of these tensors.
of these tensors.
...
@@ -55,7 +55,8 @@ def build(post_processing_config):
...
@@ -55,7 +55,8 @@ def build(post_processing_config):
non_max_suppressor_fn
=
_build_non_max_suppressor
(
non_max_suppressor_fn
=
_build_non_max_suppressor
(
post_processing_config
.
batch_non_max_suppression
)
post_processing_config
.
batch_non_max_suppression
)
score_converter_fn
=
_build_score_converter
(
score_converter_fn
=
_build_score_converter
(
post_processing_config
.
score_converter
)
post_processing_config
.
score_converter
,
post_processing_config
.
logit_scale
)
return
non_max_suppressor_fn
,
score_converter_fn
return
non_max_suppressor_fn
,
score_converter_fn
...
@@ -87,7 +88,17 @@ def _build_non_max_suppressor(nms_config):
...
@@ -87,7 +88,17 @@ def _build_non_max_suppressor(nms_config):
return
non_max_suppressor_fn
return
non_max_suppressor_fn
def
_build_score_converter
(
score_converter_config
):
def
_score_converter_fn_with_logit_scale
(
tf_score_converter_fn
,
logit_scale
):
"""Create a function to scale logits then apply a Tensorflow function."""
def
score_converter_fn
(
logits
):
scaled_logits
=
tf
.
divide
(
logits
,
logit_scale
,
name
=
'scale_logits'
)
return
tf_score_converter_fn
(
scaled_logits
,
name
=
'convert_scores'
)
score_converter_fn
.
__name__
=
'%s_with_logit_scale'
%
(
tf_score_converter_fn
.
__name__
)
return
score_converter_fn
def
_build_score_converter
(
score_converter_config
,
logit_scale
):
"""Builds score converter based on the config.
"""Builds score converter based on the config.
Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
...
@@ -95,6 +106,7 @@ def _build_score_converter(score_converter_config):
...
@@ -95,6 +106,7 @@ def _build_score_converter(score_converter_config):
Args:
Args:
score_converter_config: post_processing_pb2.PostProcessing.score_converter.
score_converter_config: post_processing_pb2.PostProcessing.score_converter.
logit_scale: temperature to use for SOFTMAX score_converter.
Returns:
Returns:
Callable score converter op.
Callable score converter op.
...
@@ -103,9 +115,9 @@ def _build_score_converter(score_converter_config):
...
@@ -103,9 +115,9 @@ def _build_score_converter(score_converter_config):
ValueError: On unknown score converter.
ValueError: On unknown score converter.
"""
"""
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
IDENTITY
:
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
IDENTITY
:
return
tf
.
identity
return
_score_converter_fn_with_logit_scale
(
tf
.
identity
,
logit_scale
)
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
SIGMOID
:
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
SIGMOID
:
return
tf
.
sigmoid
return
_score_converter_fn_with_logit_scale
(
tf
.
sigmoid
,
logit_scale
)
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
SOFTMAX
:
if
score_converter_config
==
post_processing_pb2
.
PostProcessing
.
SOFTMAX
:
return
tf
.
nn
.
softmax
return
_score_converter_fn_with_logit_scale
(
tf
.
nn
.
softmax
,
logit_scale
)
raise
ValueError
(
'Unknown score converter.'
)
raise
ValueError
(
'Unknown score converter.'
)
research/object_detection/builders/post_processing_builder_test.py
View file @
ff88581a
...
@@ -48,7 +48,31 @@ class PostProcessingBuilderTest(tf.test.TestCase):
...
@@ -48,7 +48,31 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
,
tf
.
identity
)
self
.
assertEqual
(
score_converter
.
__name__
,
'identity_with_logit_scale'
)
inputs
=
tf
.
constant
([
1
,
1
],
tf
.
float32
)
outputs
=
score_converter
(
inputs
)
with
self
.
test_session
()
as
sess
:
converted_scores
=
sess
.
run
(
outputs
)
expected_converted_scores
=
sess
.
run
(
inputs
)
self
.
assertAllClose
(
converted_scores
,
expected_converted_scores
)
def
test_build_identity_score_converter_with_logit_scale
(
self
):
post_processing_text_proto
=
"""
score_converter: IDENTITY
logit_scale: 2.0
"""
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
.
__name__
,
'identity_with_logit_scale'
)
inputs
=
tf
.
constant
([
1
,
1
],
tf
.
float32
)
outputs
=
score_converter
(
inputs
)
with
self
.
test_session
()
as
sess
:
converted_scores
=
sess
.
run
(
outputs
)
expected_converted_scores
=
sess
.
run
(
tf
.
constant
([.
5
,
.
5
],
tf
.
float32
))
self
.
assertAllClose
(
converted_scores
,
expected_converted_scores
)
def
test_build_sigmoid_score_converter
(
self
):
def
test_build_sigmoid_score_converter
(
self
):
post_processing_text_proto
=
"""
post_processing_text_proto
=
"""
...
@@ -57,7 +81,7 @@ class PostProcessingBuilderTest(tf.test.TestCase):
...
@@ -57,7 +81,7 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
,
tf
.
sigmoid
)
self
.
assertEqual
(
score_converter
.
__name__
,
'sigmoid_with_logit_scale'
)
def
test_build_softmax_score_converter
(
self
):
def
test_build_softmax_score_converter
(
self
):
post_processing_text_proto
=
"""
post_processing_text_proto
=
"""
...
@@ -66,7 +90,17 @@ class PostProcessingBuilderTest(tf.test.TestCase):
...
@@ -66,7 +90,17 @@ class PostProcessingBuilderTest(tf.test.TestCase):
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
,
tf
.
nn
.
softmax
)
self
.
assertEqual
(
score_converter
.
__name__
,
'softmax_with_logit_scale'
)
def
test_build_softmax_score_converter_with_temperature
(
self
):
post_processing_text_proto
=
"""
score_converter: SOFTMAX
logit_scale: 2.0
"""
post_processing_config
=
post_processing_pb2
.
PostProcessing
()
text_format
.
Merge
(
post_processing_text_proto
,
post_processing_config
)
_
,
score_converter
=
post_processing_builder
.
build
(
post_processing_config
)
self
.
assertEqual
(
score_converter
.
__name__
,
'softmax_with_logit_scale'
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
...
...
research/object_detection/core/post_processing.py
View file @
ff88581a
...
@@ -76,8 +76,6 @@ def multiclass_non_max_suppression(boxes,
...
@@ -76,8 +76,6 @@ def multiclass_non_max_suppression(boxes,
a BoxList holding M boxes with a rank-1 scores field representing
a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
and a rank-1 classes field representing a class label for each box.
If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
contain masks, keypoints, keypoint_heatmaps corresponding to boxes.
Raises:
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
...
@@ -174,6 +172,7 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -174,6 +172,7 @@ def batch_multiclass_non_max_suppression(boxes,
change_coordinate_frame
=
False
,
change_coordinate_frame
=
False
,
num_valid_boxes
=
None
,
num_valid_boxes
=
None
,
masks
=
None
,
masks
=
None
,
additional_fields
=
None
,
scope
=
None
,
scope
=
None
,
parallel_iterations
=
32
):
parallel_iterations
=
32
):
"""Multi-class version of non maximum suppression that operates on a batch.
"""Multi-class version of non maximum suppression that operates on a batch.
...
@@ -203,11 +202,13 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -203,11 +202,13 @@ def batch_multiclass_non_max_suppression(boxes,
is provided)
is provided)
num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
[batch_size] representing the number of valid boxes to be considered
[batch_size] representing the number of valid boxes to be considered
for each image in the batch. This parameter allows for ignoring zero
for each image in the batch. This parameter allows for ignoring zero
paddings.
paddings.
masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
float32 tensor containing box masks. `q` can be either number of classes
float32 tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
or 1 depending on whether a separate mask is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose dimensions are [batch_size, num_anchors, ...].
scope: tf scope name.
scope: tf scope name.
parallel_iterations: (optional) number of batch items to process in
parallel_iterations: (optional) number of batch items to process in
parallel.
parallel.
...
@@ -223,9 +224,13 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -223,9 +224,13 @@ def batch_multiclass_non_max_suppression(boxes,
[batch_size, max_detections, mask_height, mask_width] float32 tensor
[batch_size, max_detections, mask_height, mask_width] float32 tensor
containing masks for each selected box. This is set to None if input
containing masks for each selected box. This is set to None if input
`masks` is None.
`masks` is None.
'nmsed_additional_fields': (optional) a dictionary of
[batch_size, max_detections, ...] float32 tensors corresponding to the
tensors specified in the input `additional_fields`. This is not returned
if input `additional_fields` is None.
'num_detections': A [batch_size] int32 tensor indicating the number of
'num_detections': A [batch_size] int32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i] entries in
valid detections per batch item. Only the top num_detections[i] entries in
nms_boxes[i], nms_scores[i] and nms_class[i] are valid.
t
he rest of the
nms_boxes[i], nms_scores[i] and nms_class[i] are valid.
T
he rest of the
entries are zero paddings.
entries are zero paddings.
Raises:
Raises:
...
@@ -239,6 +244,7 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -239,6 +244,7 @@ def batch_multiclass_non_max_suppression(boxes,
'to the third dimension of scores'
)
'to the third dimension of scores'
)
original_masks
=
masks
original_masks
=
masks
original_additional_fields
=
additional_fields
with
tf
.
name_scope
(
scope
,
'BatchMultiClassNonMaxSuppression'
):
with
tf
.
name_scope
(
scope
,
'BatchMultiClassNonMaxSuppression'
):
boxes_shape
=
boxes
.
shape
boxes_shape
=
boxes
.
shape
batch_size
=
boxes_shape
[
0
].
value
batch_size
=
boxes_shape
[
0
].
value
...
@@ -255,15 +261,61 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -255,15 +261,61 @@ def batch_multiclass_non_max_suppression(boxes,
num_valid_boxes
=
tf
.
ones
([
batch_size
],
dtype
=
tf
.
int32
)
*
num_anchors
num_valid_boxes
=
tf
.
ones
([
batch_size
],
dtype
=
tf
.
int32
)
*
num_anchors
# If masks aren't provided, create dummy masks so we can only have one copy
# If masks aren't provided, create dummy masks so we can only have one copy
# of single_image_nms_fn and discard the dummy masks after map_fn.
# of
_
single_image_nms_fn and discard the dummy masks after map_fn.
if
masks
is
None
:
if
masks
is
None
:
masks_shape
=
tf
.
stack
([
batch_size
,
num_anchors
,
1
,
0
,
0
])
masks_shape
=
tf
.
stack
([
batch_size
,
num_anchors
,
1
,
0
,
0
])
masks
=
tf
.
zeros
(
masks_shape
)
masks
=
tf
.
zeros
(
masks_shape
)
def
single_image_nms_fn
(
args
):
if
additional_fields
is
None
:
"""Runs NMS on a single image and returns padded output."""
additional_fields
=
{}
(
per_image_boxes
,
per_image_scores
,
per_image_masks
,
per_image_num_valid_boxes
)
=
args
def
_single_image_nms_fn
(
args
):
"""Runs NMS on a single image and returns padded output.
Args:
args: A list of tensors consisting of the following:
per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
detections. If `q` is 1 then same boxes are used for all classes
otherwise, if `q` is equal to number of classes, class-specific
boxes are used.
per_image_scores - A [num_anchors, num_classes] float32 tensor
containing the scores for each of the `num_anchors` detections.
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
per_image_additional_fields - (optional) A variable number of float32
tensors each with size [num_anchors, ...].
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
shape [batch_size] representing the number of valid boxes to be
considered for each image in the batch. This parameter allows for
ignoring zero paddings.
Returns:
'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
non-max suppressed boxes.
'nmsed_scores': A [max_detections] float32 tensor containing the scores
for the boxes.
'nmsed_classes': A [max_detections] float32 tensor containing the class
for boxes.
'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
float32 tensor containing masks for each selected box. This is set to
None if input `masks` is None.
'nmsed_additional_fields': (optional) A variable number of float32
tensors each with size [max_detections, ...] corresponding to the
input `per_image_additional_fields`.
'num_detections': A [batch_size] int32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i]
entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
rest of the entries are zero paddings.
"""
per_image_boxes
=
args
[
0
]
per_image_scores
=
args
[
1
]
per_image_masks
=
args
[
2
]
per_image_additional_fields
=
{
key
:
value
for
key
,
value
in
zip
(
additional_fields
,
args
[
3
:
-
1
])
}
per_image_num_valid_boxes
=
args
[
-
1
]
per_image_boxes
=
tf
.
reshape
(
per_image_boxes
=
tf
.
reshape
(
tf
.
slice
(
per_image_boxes
,
3
*
[
0
],
tf
.
slice
(
per_image_boxes
,
3
*
[
0
],
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
,
-
1
])),
[
-
1
,
q
,
4
])
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
,
-
1
])),
[
-
1
,
q
,
4
])
...
@@ -271,12 +323,21 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -271,12 +323,21 @@ def batch_multiclass_non_max_suppression(boxes,
tf
.
slice
(
per_image_scores
,
[
0
,
0
],
tf
.
slice
(
per_image_scores
,
[
0
,
0
],
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
])),
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
])),
[
-
1
,
num_classes
])
[
-
1
,
num_classes
])
per_image_masks
=
tf
.
reshape
(
per_image_masks
=
tf
.
reshape
(
tf
.
slice
(
per_image_masks
,
4
*
[
0
],
tf
.
slice
(
per_image_masks
,
4
*
[
0
],
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
,
-
1
,
-
1
])),
tf
.
stack
([
per_image_num_valid_boxes
,
-
1
,
-
1
,
-
1
])),
[
-
1
,
q
,
per_image_masks
.
shape
[
2
].
value
,
[
-
1
,
q
,
per_image_masks
.
shape
[
2
].
value
,
per_image_masks
.
shape
[
3
].
value
])
per_image_masks
.
shape
[
3
].
value
])
if
per_image_additional_fields
is
not
None
:
for
key
,
tensor
in
per_image_additional_fields
.
items
():
additional_field_shape
=
tensor
.
get_shape
()
additional_field_dim
=
len
(
additional_field_shape
)
per_image_additional_fields
[
key
]
=
tf
.
reshape
(
tf
.
slice
(
per_image_additional_fields
[
key
],
additional_field_dim
*
[
0
],
tf
.
stack
([
per_image_num_valid_boxes
]
+
(
additional_field_dim
-
1
)
*
[
-
1
])),
[
-
1
]
+
[
dim
.
value
for
dim
in
additional_field_shape
[
1
:]])
nmsed_boxlist
=
multiclass_non_max_suppression
(
nmsed_boxlist
=
multiclass_non_max_suppression
(
per_image_boxes
,
per_image_boxes
,
per_image_scores
,
per_image_scores
,
...
@@ -284,9 +345,10 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -284,9 +345,10 @@ def batch_multiclass_non_max_suppression(boxes,
iou_thresh
,
iou_thresh
,
max_size_per_class
,
max_size_per_class
,
max_total_size
,
max_total_size
,
masks
=
per_image_masks
,
clip_window
=
clip_window
,
clip_window
=
clip_window
,
change_coordinate_frame
=
change_coordinate_frame
)
change_coordinate_frame
=
change_coordinate_frame
,
masks
=
per_image_masks
,
additional_fields
=
per_image_additional_fields
)
padded_boxlist
=
box_list_ops
.
pad_or_clip_box_list
(
nmsed_boxlist
,
padded_boxlist
=
box_list_ops
.
pad_or_clip_box_list
(
nmsed_boxlist
,
max_total_size
)
max_total_size
)
num_detections
=
nmsed_boxlist
.
num_boxes
()
num_detections
=
nmsed_boxlist
.
num_boxes
()
...
@@ -294,19 +356,40 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -294,19 +356,40 @@ def batch_multiclass_non_max_suppression(boxes,
nmsed_scores
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
scores
)
nmsed_scores
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
scores
)
nmsed_classes
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
classes
)
nmsed_classes
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
classes
)
nmsed_masks
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
masks
)
nmsed_masks
=
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
masks
)
return
[
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
=
[
num_detections
]
padded_boxlist
.
get_field
(
key
)
for
key
in
per_image_additional_fields
]
return
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
]
+
nmsed_additional_fields
+
[
num_detections
])
num_additional_fields
=
0
if
additional_fields
is
not
None
:
num_additional_fields
=
len
(
additional_fields
)
num_nmsed_outputs
=
4
+
num_additional_fields
(
batch_nmsed_boxes
,
batch_nmsed_scores
,
batch_outputs
=
tf
.
map_fn
(
batch_nmsed_classes
,
batch_nmsed_masks
,
_single_image_nms_fn
,
batch_num_detections
)
=
tf
.
map_fn
(
elems
=
([
boxes
,
scores
,
masks
]
+
list
(
additional_fields
.
values
())
+
single_image_nms_fn
,
[
num_valid_boxes
]),
elems
=
[
boxes
,
scores
,
masks
,
num_valid_boxes
],
dtype
=
(
num_nmsed_outputs
*
[
tf
.
float32
]
+
[
tf
.
int32
]),
dtype
=
[
tf
.
float32
,
tf
.
float32
,
tf
.
float32
,
tf
.
float32
,
tf
.
int32
],
parallel_iterations
=
parallel_iterations
)
parallel_iterations
=
parallel_iterations
)
batch_nmsed_boxes
=
batch_outputs
[
0
]
batch_nmsed_scores
=
batch_outputs
[
1
]
batch_nmsed_classes
=
batch_outputs
[
2
]
batch_nmsed_masks
=
batch_outputs
[
3
]
batch_nmsed_additional_fields
=
{
key
:
value
for
key
,
value
in
zip
(
additional_fields
,
batch_outputs
[
4
:
-
1
])
}
batch_num_detections
=
batch_outputs
[
-
1
]
if
original_masks
is
None
:
if
original_masks
is
None
:
batch_nmsed_masks
=
None
batch_nmsed_masks
=
None
if
original_additional_fields
is
None
:
batch_nmsed_additional_fields
=
None
return
(
batch_nmsed_boxes
,
batch_nmsed_scores
,
batch_nmsed_classes
,
return
(
batch_nmsed_boxes
,
batch_nmsed_scores
,
batch_nmsed_classes
,
batch_nmsed_masks
,
batch_num_detections
)
batch_nmsed_masks
,
batch_nmsed_additional_fields
,
batch_num_detections
)
research/object_detection/core/post_processing_test.py
View file @
ff88581a
...
@@ -497,11 +497,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
...
@@ -497,11 +497,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
exp_nms_classes
=
[[
0
,
0
,
1
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
1
,
0
]]
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
boxes
,
scores
,
score_thresh
,
iou_thresh
,
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
...
@@ -544,11 +546,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
...
@@ -544,11 +546,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[
1
,
0
,
0
,
0
]])
[
1
,
0
,
0
,
0
]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
boxes
,
scores
,
score_thresh
,
iou_thresh
,
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
exp_nms_corners
.
shape
)
...
@@ -616,11 +620,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
...
@@ -616,11 +620,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[
0
,
0
],
[
0
,
0
]]]])
[[
0
,
0
],
[
0
,
0
]]]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
boxes
,
scores
,
score_thresh
,
iou_thresh
,
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
boxes
,
scores
,
score_thresh
,
iou_thresh
,
masks
=
masks
)
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
masks
=
masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
...
@@ -639,6 +645,91 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
...
@@ -639,6 +645,91 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self
.
assertAllClose
(
num_detections
,
[
2
,
3
])
self
.
assertAllClose
(
num_detections
,
[
2
,
3
])
self
.
assertAllClose
(
nmsed_masks
,
exp_nms_masks
)
self
.
assertAllClose
(
nmsed_masks
,
exp_nms_masks
)
def
test_batch_multiclass_nms_with_additional_fields
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
additional_fields
=
{
'keypoints'
:
tf
.
constant
(
[[[[
6
,
7
],
[
8
,
9
]],
[[
0
,
1
],
[
2
,
3
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
13
,
14
],
[
15
,
16
]],
[[
8
,
9
],
[
10
,
11
]],
[[
10
,
11
],
[
12
,
13
]],
[[
0
,
0
],
[
0
,
0
]]]],
tf
.
float32
)
}
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
np
.
array
([[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
999
,
2
,
1004
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
]]])
exp_nms_scores
=
np
.
array
([[.
95
,
.
9
,
0
,
0
],
[.
85
,
.
5
,
.
3
,
0
]])
exp_nms_classes
=
np
.
array
([[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]])
exp_nms_additional_fields
=
{
'keypoints'
:
np
.
array
([[[[
0
,
0
],
[
0
,
0
]],
[[
6
,
7
],
[
8
,
9
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
10
,
11
],
[
12
,
13
]],
[[
13
,
14
],
[
15
,
16
]],
[[
8
,
9
],
[
10
,
11
]],
[[
0
,
0
],
[
0
,
0
]]]])
}
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
additional_fields
=
additional_fields
)
self
.
assertIsNone
(
nmsed_masks
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
self
.
assertAllEqual
(
nmsed_classes
.
shape
.
as_list
(),
exp_nms_classes
.
shape
)
self
.
assertEqual
(
len
(
nmsed_additional_fields
),
len
(
exp_nms_additional_fields
))
for
key
in
exp_nms_additional_fields
:
self
.
assertAllEqual
(
nmsed_additional_fields
[
key
].
shape
.
as_list
(),
exp_nms_additional_fields
[
key
].
shape
)
self
.
assertEqual
(
num_detections
.
shape
.
as_list
(),
[
2
])
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_additional_fields
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_additional_fields
,
num_detections
])
self
.
assertAllClose
(
nmsed_boxes
,
exp_nms_corners
)
self
.
assertAllClose
(
nmsed_scores
,
exp_nms_scores
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
for
key
in
exp_nms_additional_fields
:
self
.
assertAllClose
(
nmsed_additional_fields
[
key
],
exp_nms_additional_fields
[
key
])
self
.
assertAllClose
(
num_detections
,
[
2
,
3
])
def
test_batch_multiclass_nms_with_dynamic_batch_size
(
self
):
def
test_batch_multiclass_nms_with_dynamic_batch_size
(
self
):
boxes_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
2
,
4
))
boxes_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
2
,
4
))
scores_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
2
))
scores_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
2
))
...
@@ -690,11 +781,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
...
@@ -690,11 +781,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[
0
,
0
],
[
0
,
0
]]]])
[[
0
,
0
],
[
0
,
0
]]]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
boxes_placeholder
,
scores_placeholder
,
score_thresh
,
iou_thresh
,
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
boxes_placeholder
,
scores_placeholder
,
score_thresh
,
iou_thresh
,
masks
=
masks_placeholder
)
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
masks
=
masks_placeholder
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
[
None
,
4
,
4
])
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
[
None
,
4
,
4
])
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
[
None
,
4
])
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
[
None
,
4
])
...
@@ -765,10 +858,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
...
@@ -765,10 +858,13 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
[[
0
,
0
],
[
0
,
0
]]]]
[[
0
,
0
],
[
0
,
0
]]]]
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
nmsed_additional_fields
,
num_detections
boxes
,
scores
,
score_thresh
,
iou_thresh
,
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
boxes
,
scores
,
score_thresh
,
iou_thresh
,
num_valid_boxes
=
num_valid_boxes
,
masks
=
masks
)
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
num_valid_boxes
=
num_valid_boxes
,
masks
=
masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
...
@@ -780,6 +876,84 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
...
@@ -780,6 +876,84 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self
.
assertAllClose
(
num_detections
,
[
1
,
1
])
self
.
assertAllClose
(
num_detections
,
[
1
,
1
])
self
.
assertAllClose
(
nmsed_masks
,
exp_nms_masks
)
self
.
assertAllClose
(
nmsed_masks
,
exp_nms_masks
)
def
test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
additional_fields
=
{
'keypoints'
:
tf
.
constant
(
[[[[
6
,
7
],
[
8
,
9
]],
[[
0
,
1
],
[
2
,
3
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
13
,
14
],
[
15
,
16
]],
[[
8
,
9
],
[
10
,
11
]],
[[
10
,
11
],
[
12
,
13
]],
[[
0
,
0
],
[
0
,
0
]]]],
tf
.
float32
)
}
num_valid_boxes
=
tf
.
constant
([
1
,
1
],
tf
.
int32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]]
exp_nms_scores
=
[[.
9
,
0
,
0
,
0
],
[.
5
,
0
,
0
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
exp_nms_additional_fields
=
{
'keypoints'
:
np
.
array
([[[[
6
,
7
],
[
8
,
9
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
13
,
14
],
[
15
,
16
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]]])
}
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
num_valid_boxes
=
num_valid_boxes
,
additional_fields
=
additional_fields
)
self
.
assertIsNone
(
nmsed_masks
)
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_additional_fields
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_additional_fields
,
num_detections
])
self
.
assertAllClose
(
nmsed_boxes
,
exp_nms_corners
)
self
.
assertAllClose
(
nmsed_scores
,
exp_nms_scores
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
for
key
in
exp_nms_additional_fields
:
self
.
assertAllClose
(
nmsed_additional_fields
[
key
],
exp_nms_additional_fields
[
key
])
self
.
assertAllClose
(
num_detections
,
[
1
,
1
])
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/meta_architectures/BUILD
View file @
ff88581a
...
@@ -18,6 +18,7 @@ py_library(
...
@@ -18,6 +18,7 @@ py_library(
"//tensorflow_models/object_detection/core:model"
,
"//tensorflow_models/object_detection/core:model"
,
"//tensorflow_models/object_detection/core:target_assigner"
,
"//tensorflow_models/object_detection/core:target_assigner"
,
"//tensorflow_models/object_detection/utils:shape_utils"
,
"//tensorflow_models/object_detection/utils:shape_utils"
,
"//tensorflow_models/object_detection/utils:visualization_utils"
,
],
],
)
)
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch.py
View file @
ff88581a
...
@@ -62,8 +62,6 @@ Following the API (see model.DetectionModel definition), our outputs after
...
@@ -62,8 +62,6 @@ Following the API (see model.DetectionModel definition), our outputs after
postprocessing operations are always normalized boxes however, internally, we
postprocessing operations are always normalized boxes however, internally, we
sometimes convert to absolute --- e.g. for loss computation. In particular,
sometimes convert to absolute --- e.g. for loss computation. In particular,
anchors and proposal_boxes are both represented as absolute coordinates.
anchors and proposal_boxes are both represented as absolute coordinates.
TODO: Support TPU implementations and sigmoid loss.
"""
"""
from
abc
import
abstractmethod
from
abc
import
abstractmethod
from
functools
import
partial
from
functools
import
partial
...
@@ -91,6 +89,7 @@ class FasterRCNNFeatureExtractor(object):
...
@@ -91,6 +89,7 @@ class FasterRCNNFeatureExtractor(object):
def
__init__
(
self
,
def
__init__
(
self
,
is_training
,
is_training
,
first_stage_features_stride
,
first_stage_features_stride
,
batch_norm_trainable
=
False
,
reuse_weights
=
None
,
reuse_weights
=
None
,
weight_decay
=
0.0
):
weight_decay
=
0.0
):
"""Constructor.
"""Constructor.
...
@@ -99,11 +98,15 @@ class FasterRCNNFeatureExtractor(object):
...
@@ -99,11 +98,15 @@ class FasterRCNNFeatureExtractor(object):
is_training: A boolean indicating whether the training version of the
is_training: A boolean indicating whether the training version of the
computation graph should be constructed.
computation graph should be constructed.
first_stage_features_stride: Output stride of extracted RPN feature map.
first_stage_features_stride: Output stride of extracted RPN feature map.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a relative large batch size
(e.g. 8), it could be desirable to enable batch norm update.
reuse_weights: Whether to reuse variables. Default is None.
reuse_weights: Whether to reuse variables. Default is None.
weight_decay: float weight decay for feature extractor (default: 0.0).
weight_decay: float weight decay for feature extractor (default: 0.0).
"""
"""
self
.
_is_training
=
is_training
self
.
_is_training
=
is_training
self
.
_first_stage_features_stride
=
first_stage_features_stride
self
.
_first_stage_features_stride
=
first_stage_features_stride
self
.
_train_batch_norm
=
(
batch_norm_trainable
and
is_training
)
self
.
_reuse_weights
=
reuse_weights
self
.
_reuse_weights
=
reuse_weights
self
.
_weight_decay
=
weight_decay
self
.
_weight_decay
=
weight_decay
...
@@ -214,7 +217,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -214,7 +217,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
second_stage_score_conversion_fn
,
second_stage_score_conversion_fn
,
second_stage_localization_loss_weight
,
second_stage_localization_loss_weight
,
second_stage_classification_loss_weight
,
second_stage_classification_loss_weight
,
hard_example_miner
,
second_stage_classification_loss
,
second_stage_mask_prediction_loss_weight
=
1.0
,
hard_example_miner
=
None
,
parallel_iterations
=
16
):
parallel_iterations
=
16
):
"""FasterRCNNMetaArch Constructor.
"""FasterRCNNMetaArch Constructor.
...
@@ -225,10 +230,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -225,10 +230,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
include the background category, so if groundtruth labels take values
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
assigned classification targets can range from {0,... K}).
image_resizer_fn: A callable for image resizing. This callable always
image_resizer_fn: A callable for image resizing. This callable
takes a rank-3 image tensor (corresponding to a single image) and
takes a rank-3 image tensor of shape [height, width, channels]
returns a rank-3 image tensor, possibly with new spatial dimensions.
(corresponding to a single image) and returns a rank-3 image tensor,
See builders/image_resizer_builder.py.
possibly with new spatial dimensions. See
builders/image_resizer_builder.py.
feature_extractor: A FasterRCNNFeatureExtractor object.
feature_extractor: A FasterRCNNFeatureExtractor object.
first_stage_only: Whether to construct only the Region Proposal Network
first_stage_only: Whether to construct only the Region Proposal Network
(RPN) part of the model.
(RPN) part of the model.
...
@@ -295,19 +301,28 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -295,19 +301,28 @@ class FasterRCNNMetaArch(model.DetectionModel):
second_stage_score_conversion_fn: Callable elementwise nonlinearity
second_stage_score_conversion_fn: Callable elementwise nonlinearity
(that takes tensors as inputs and returns tensors). This is usually
(that takes tensors as inputs and returns tensors). This is usually
used to convert logits to probabilities.
used to convert logits to probabilities.
second_stage_localization_loss_weight: A float
second_stage_localization_loss_weight: A float indicating the scale factor
second_stage_classification_loss_weight: A float
for second stage localization loss.
second_stage_classification_loss_weight: A float indicating the scale
factor for second stage classification loss.
second_stage_classification_loss: Classification loss used by the second
stage classifier. Either losses.WeightedSigmoidClassificationLoss or
losses.WeightedSoftmaxClassificationLoss.
second_stage_mask_prediction_loss_weight: A float indicating the scale
factor for second stage mask prediction loss. This is applicable only if
second stage box predictor is configured to predict masks.
hard_example_miner: A losses.HardExampleMiner object (can be None).
hard_example_miner: A losses.HardExampleMiner object (can be None).
parallel_iterations: (Optional) The number of iterations allowed to run
parallel_iterations: (Optional) The number of iterations allowed to run
in parallel for calls to tf.map_fn.
in parallel for calls to tf.map_fn.
Raises:
Raises:
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals`
ValueError: If `second_stage_batch_size` > `first_stage_max_proposals` at
training time.
ValueError: If first_stage_anchor_generator is not of type
ValueError: If first_stage_anchor_generator is not of type
grid_anchor_generator.GridAnchorGenerator.
grid_anchor_generator.GridAnchorGenerator.
"""
"""
super
(
FasterRCNNMetaArch
,
self
).
__init__
(
num_classes
=
num_classes
)
super
(
FasterRCNNMetaArch
,
self
).
__init__
(
num_classes
=
num_classes
)
if
second_stage_batch_size
>
first_stage_max_proposals
:
if
is_training
and
second_stage_batch_size
>
first_stage_max_proposals
:
raise
ValueError
(
'second_stage_batch_size should be no greater than '
raise
ValueError
(
'second_stage_batch_size should be no greater than '
'first_stage_max_proposals.'
)
'first_stage_max_proposals.'
)
if
not
isinstance
(
first_stage_anchor_generator
,
if
not
isinstance
(
first_stage_anchor_generator
,
...
@@ -375,10 +390,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -375,10 +390,13 @@ class FasterRCNNMetaArch(model.DetectionModel):
self
.
_second_stage_localization_loss
=
(
self
.
_second_stage_localization_loss
=
(
losses
.
WeightedSmoothL1LocalizationLoss
(
anchorwise_output
=
True
))
losses
.
WeightedSmoothL1LocalizationLoss
(
anchorwise_output
=
True
))
self
.
_second_stage_classification_loss
=
(
self
.
_second_stage_classification_loss
=
second_stage_classification_loss
losses
.
WeightedSoftmaxClassificationLoss
(
anchorwise_output
=
True
))
self
.
_second_stage_mask_loss
=
(
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
))
self
.
_second_stage_loc_loss_weight
=
second_stage_localization_loss_weight
self
.
_second_stage_loc_loss_weight
=
second_stage_localization_loss_weight
self
.
_second_stage_cls_loss_weight
=
second_stage_classification_loss_weight
self
.
_second_stage_cls_loss_weight
=
second_stage_classification_loss_weight
self
.
_second_stage_mask_loss_weight
=
(
second_stage_mask_prediction_loss_weight
)
self
.
_hard_example_miner
=
hard_example_miner
self
.
_hard_example_miner
=
hard_example_miner
self
.
_parallel_iterations
=
parallel_iterations
self
.
_parallel_iterations
=
parallel_iterations
...
@@ -491,7 +509,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -491,7 +509,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
[total_num_proposals, num_classes, 4] representing predicted
[total_num_proposals, num_classes, 4] representing predicted
(final) refined box encodings, where
(final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals
total_num_proposals=batch_size*self._max_num_proposals
8) class_predictions_with_background: a
2
-D tensor with shape
8) class_predictions_with_background: a
3
-D tensor with shape
[total_num_proposals, num_classes + 1] containing class
[total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors, where
predictions (logits) for each of the anchors, where
total_num_proposals=batch_size*self._max_num_proposals.
total_num_proposals=batch_size*self._max_num_proposals.
...
@@ -504,7 +522,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -504,7 +522,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
`self.max_num_proposals` for each image.
`self.max_num_proposals` for each image.
10) proposal_boxes: A float32 tensor of shape
10) proposal_boxes: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing
[batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes
(
in absolute coordinates
)
.
decoded proposal bounding boxes in absolute coordinates.
11) mask_predictions: (optional) a 4-D tensor with shape
11) mask_predictions: (optional) a 4-D tensor with shape
[total_num_padded_proposals, num_classes, mask_height, mask_width]
[total_num_padded_proposals, num_classes, mask_height, mask_width]
containing instance mask predictions.
containing instance mask predictions.
...
@@ -553,10 +571,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -553,10 +571,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
"""Predicts the output tensors from second stage of Faster R-CNN.
"""Predicts the output tensors from second stage of Faster R-CNN.
Args:
Args:
rpn_box_encodings:
3
-D float tensor of shape
rpn_box_encodings:
4
-D float tensor of shape
[batch_size, num_valid_anchors, self._box_coder.code_size] containing
[batch_size, num_valid_anchors, self._box_coder.code_size] containing
predicted boxes.
predicted boxes.
rpn_objectness_predictions_with_background:
3
-D float tensor of shape
rpn_objectness_predictions_with_background:
2
-D float tensor of shape
[batch_size, num_valid_anchors, 2] containing class
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
tensor *includes* background class predictions (at class index 0).
...
@@ -573,7 +591,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -573,7 +591,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
[total_num_proposals, num_classes, 4] representing predicted
[total_num_proposals, num_classes, 4] representing predicted
(final) refined box encodings, where
(final) refined box encodings, where
total_num_proposals=batch_size*self._max_num_proposals
total_num_proposals=batch_size*self._max_num_proposals
2) class_predictions_with_background: a
2
-D tensor with shape
2) class_predictions_with_background: a
3
-D tensor with shape
[total_num_proposals, num_classes + 1] containing class
[total_num_proposals, num_classes + 1] containing class
predictions (logits) for each of the anchors, where
predictions (logits) for each of the anchors, where
total_num_proposals=batch_size*self._max_num_proposals.
total_num_proposals=batch_size*self._max_num_proposals.
...
@@ -586,8 +604,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -586,8 +604,16 @@ class FasterRCNNMetaArch(model.DetectionModel):
`self.max_num_proposals` for each image.
`self.max_num_proposals` for each image.
4) proposal_boxes: A float32 tensor of shape
4) proposal_boxes: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing
[batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes (in absolute coordinates).
decoded proposal bounding boxes in absolute coordinates.
5) mask_predictions: (optional) a 4-D tensor with shape
5) proposal_boxes_normalized: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing decoded proposal
bounding boxes in normalized coordinates. Can be used to override the
boxes proposed by the RPN, thus enabling one to extract features and
get box classification and prediction for externally selected areas
of the image.
6) box_classifier_features: a 4-D float32 tensor representing the
features for each proposal.
7) mask_predictions: (optional) a 4-D tensor with shape
[total_num_padded_proposals, num_classes, mask_height, mask_width]
[total_num_padded_proposals, num_classes, mask_height, mask_width]
containing instance mask predictions.
containing instance mask predictions.
"""
"""
...
@@ -622,7 +648,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -622,7 +648,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
class_predictions_with_background
,
class_predictions_with_background
,
'num_proposals'
:
num_proposals
,
'num_proposals'
:
num_proposals
,
'proposal_boxes'
:
absolute_proposal_boxes
,
'proposal_boxes'
:
absolute_proposal_boxes
,
'box_classifier_features'
:
box_classifier_features
,
'proposal_boxes_normalized'
:
proposal_boxes_normalized
,
}
}
if
box_predictor
.
MASK_PREDICTIONS
in
box_predictions
:
mask_predictions
=
tf
.
squeeze
(
box_predictions
[
box_predictor
.
MASK_PREDICTIONS
],
axis
=
1
)
prediction_dict
[
'mask_predictions'
]
=
mask_predictions
return
prediction_dict
return
prediction_dict
def
_extract_rpn_feature_maps
(
self
,
preprocessed_inputs
):
def
_extract_rpn_feature_maps
(
self
,
preprocessed_inputs
):
...
@@ -729,10 +762,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -729,10 +762,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
extent of the window to clip/prune to.
extent of the window to clip/prune to.
Returns:
Returns:
box_encodings:
3
-D float tensor of shape
box_encodings:
4
-D float tensor of shape
[batch_size, num_valid_anchors, self._box_coder.code_size] containing
[batch_size, num_valid_anchors, self._box_coder.code_size] containing
predicted boxes, where num_valid_anchors <= num_anchors
predicted boxes, where num_valid_anchors <= num_anchors
objectness_predictions_with_background:
3
-D float tensor of shape
objectness_predictions_with_background:
2
-D float tensor of shape
[batch_size, num_valid_anchors, 2] containing class
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors, where
predictions (logits) for each of the anchors, where
num_valid_anchors <= num_anchors. Note that this
num_valid_anchors <= num_anchors. Note that this
...
@@ -813,7 +846,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -813,7 +846,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
return
{
return
{
'detection_boxes'
:
proposal_boxes
,
'detection_boxes'
:
proposal_boxes
,
'detection_scores'
:
proposal_scores
,
'detection_scores'
:
proposal_scores
,
'num_detections'
:
num_proposals
'num_detections'
:
tf
.
to_float
(
num_proposals
)
}
}
with
tf
.
name_scope
(
'SecondStagePostprocessor'
):
with
tf
.
name_scope
(
'SecondStagePostprocessor'
):
mask_predictions
=
prediction_dict
.
get
(
box_predictor
.
MASK_PREDICTIONS
)
mask_predictions
=
prediction_dict
.
get
(
box_predictor
.
MASK_PREDICTIONS
)
...
@@ -877,7 +910,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -877,7 +910,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
rpn_objectness_softmax_without_background
=
tf
.
nn
.
softmax
(
rpn_objectness_softmax_without_background
=
tf
.
nn
.
softmax
(
rpn_objectness_predictions_with_background_batch
)[:,
:,
1
]
rpn_objectness_predictions_with_background_batch
)[:,
:,
1
]
clip_window
=
tf
.
to_float
(
tf
.
stack
([
0
,
0
,
image_shape
[
1
],
image_shape
[
2
]]))
clip_window
=
tf
.
to_float
(
tf
.
stack
([
0
,
0
,
image_shape
[
1
],
image_shape
[
2
]]))
(
proposal_boxes
,
proposal_scores
,
_
,
_
,
(
proposal_boxes
,
proposal_scores
,
_
,
_
,
_
,
num_proposals
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
num_proposals
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
tf
.
expand_dims
(
proposal_boxes
,
axis
=
2
),
tf
.
expand_dims
(
proposal_boxes
,
axis
=
2
),
tf
.
expand_dims
(
rpn_objectness_softmax_without_background
,
tf
.
expand_dims
(
rpn_objectness_softmax_without_background
,
...
@@ -891,7 +924,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -891,7 +924,7 @@ class FasterRCNNMetaArch(model.DetectionModel):
proposal_boxes
=
tf
.
stop_gradient
(
proposal_boxes
)
proposal_boxes
=
tf
.
stop_gradient
(
proposal_boxes
)
if
not
self
.
_hard_example_miner
:
if
not
self
.
_hard_example_miner
:
(
groundtruth_boxlists
,
groundtruth_classes_with_background_list
,
(
groundtruth_boxlists
,
groundtruth_classes_with_background_list
,
)
=
self
.
_format_groundtruth_data
(
image_shape
)
_
)
=
self
.
_format_groundtruth_data
(
image_shape
)
(
proposal_boxes
,
proposal_scores
,
(
proposal_boxes
,
proposal_scores
,
num_proposals
)
=
self
.
_unpad_proposals_and_sample_box_classifier_batch
(
num_proposals
)
=
self
.
_unpad_proposals_and_sample_box_classifier_batch
(
proposal_boxes
,
proposal_scores
,
num_proposals
,
proposal_boxes
,
proposal_scores
,
num_proposals
,
...
@@ -998,6 +1031,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -998,6 +1031,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
for target assignment, we:
for target assignment, we:
1) convert boxes to absolute coordinates,
1) convert boxes to absolute coordinates,
2) add a background class at class index 0
2) add a background class at class index 0
3) groundtruth instance masks, if available, are resized to match
image_shape.
Args:
Args:
image_shape: A 1-D int32 tensor of shape [4] representing the shape of the
image_shape: A 1-D int32 tensor of shape [4] representing the shape of the
...
@@ -1009,6 +1044,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1009,6 +1044,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
groundtruth_classes_with_background_list: A list of 2-D one-hot
groundtruth_classes_with_background_list: A list of 2-D one-hot
(or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
(or k-hot) tensors of shape [num_boxes, num_classes+1] containing the
class targets with the 0th index assumed to map to the background class.
class targets with the 0th index assumed to map to the background class.
groundtruth_masks_list: If present, a list of 3-D tf.float32 tensors of
shape [num_boxes, image_height, image_width] containing instance masks.
This is set to None if no masks exist in the provided groundtruth.
"""
"""
groundtruth_boxlists
=
[
groundtruth_boxlists
=
[
box_list_ops
.
to_absolute_coordinates
(
box_list_ops
.
to_absolute_coordinates
(
...
@@ -1019,7 +1057,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1019,7 +1057,22 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf
.
pad
(
one_hot_encoding
,
[[
0
,
0
],
[
1
,
0
]],
mode
=
'CONSTANT'
))
tf
.
pad
(
one_hot_encoding
,
[[
0
,
0
],
[
1
,
0
]],
mode
=
'CONSTANT'
))
for
one_hot_encoding
in
self
.
groundtruth_lists
(
for
one_hot_encoding
in
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
classes
)]
fields
.
BoxListFields
.
classes
)]
return
groundtruth_boxlists
,
groundtruth_classes_with_background_list
groundtruth_masks_list
=
self
.
_groundtruth_lists
.
get
(
fields
.
BoxListFields
.
masks
)
if
groundtruth_masks_list
is
not
None
:
resized_masks_list
=
[]
for
mask
in
groundtruth_masks_list
:
resized_4d_mask
=
tf
.
image
.
resize_images
(
tf
.
expand_dims
(
mask
,
axis
=
3
),
image_shape
[
1
:
3
],
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
,
align_corners
=
True
)
resized_masks_list
.
append
(
tf
.
squeeze
(
resized_4d_mask
,
axis
=
3
))
groundtruth_masks_list
=
resized_masks_list
return
(
groundtruth_boxlists
,
groundtruth_classes_with_background_list
,
groundtruth_masks_list
)
def
_sample_box_classifier_minibatch
(
self
,
def
_sample_box_classifier_minibatch
(
self
,
proposal_boxlist
,
proposal_boxlist
,
...
@@ -1100,29 +1153,26 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1100,29 +1153,26 @@ class FasterRCNNMetaArch(model.DetectionModel):
proposal_boxes
,
proposal_boxes
,
num_proposals
,
num_proposals
,
image_shape
,
image_shape
,
mask_predictions
=
None
,
mask_predictions
=
None
):
mask_threshold
=
0.5
):
"""Converts predictions from the second stage box classifier to detections.
"""Converts predictions from the second stage box classifier to detections.
Args:
Args:
refined_box_encodings: a 3-D tensor with shape
refined_box_encodings: a 3-D
float
tensor with shape
[total_num_padded_proposals, num_classes, 4] representing predicted
[total_num_padded_proposals, num_classes, 4] representing predicted
(final) refined box encodings.
(final) refined box encodings.
class_predictions_with_background: a 3-D tensor with shape
class_predictions_with_background: a 3-D tensor
float
with shape
[total_num_padded_proposals, num_classes + 1] containing class
[total_num_padded_proposals, num_classes + 1] containing class
predictions (logits) for each of the proposals. Note that this tensor
predictions (logits) for each of the proposals. Note that this tensor
*includes* background class predictions (at class index 0).
*includes* background class predictions (at class index 0).
proposal_boxes:
[batch_size, self.max_num_proposals, 4] representing
proposal_boxes:
a 3-D float tensor with shape
decoded proposal
bounding boxes.
[batch_size, self.max_num_proposals, 4] representing
decoded proposal
num_proposals: A Tensor of type `int32`. A 1-D tensor of shape [batch]
bounding boxes in absolute coordinates.
representing the number of proposals predicted for each image in
num_proposals: a 1-D int32 tensor of shape [batch] representing the number
the batch.
of proposals predicted for each image in
the batch.
image_shape: a 1-D tensor representing the input image shape.
image_shape: a 1-D
int32
tensor representing the input image shape.
mask_predictions: (optional) a 4-D tensor with shape
mask_predictions: (optional) a 4-D
float
tensor with shape
[total_num_padded_proposals, num_classes, mask_height, mask_width]
[total_num_padded_proposals, num_classes, mask_height, mask_width]
containing instance mask predictions.
containing instance mask prediction logits.
mask_threshold: a scalar threshold determining which mask values are
rounded to 0 or 1.
Returns:
Returns:
A dictionary containing:
A dictionary containing:
...
@@ -1131,7 +1181,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1131,7 +1181,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
`detection_classes`: [batch, max_detections]
`detection_classes`: [batch, max_detections]
`num_detections`: [batch]
`num_detections`: [batch]
`detection_masks`:
`detection_masks`:
(optional) [batch, max_detections, mask_height, mask_width]
(optional) [batch, max_detections, mask_height, mask_width]. Note
that a pixel-wise sigmoid score converter is applied to the detection
masks.
"""
"""
refined_box_encodings_batch
=
tf
.
reshape
(
refined_box_encodings
,
refined_box_encodings_batch
=
tf
.
reshape
(
refined_box_encodings
,
[
-
1
,
self
.
max_num_proposals
,
[
-
1
,
self
.
max_num_proposals
,
...
@@ -1156,10 +1208,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1156,10 +1208,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
if
mask_predictions
is
not
None
:
if
mask_predictions
is
not
None
:
mask_height
=
mask_predictions
.
shape
[
2
].
value
mask_height
=
mask_predictions
.
shape
[
2
].
value
mask_width
=
mask_predictions
.
shape
[
3
].
value
mask_width
=
mask_predictions
.
shape
[
3
].
value
mask_predictions
=
tf
.
sigmoid
(
mask_predictions
)
mask_predictions_batch
=
tf
.
reshape
(
mask_predictions_batch
=
tf
.
reshape
(
mask_predictions
,
[
-
1
,
self
.
max_num_proposals
,
mask_predictions
,
[
-
1
,
self
.
max_num_proposals
,
self
.
num_classes
,
mask_height
,
mask_width
])
self
.
num_classes
,
mask_height
,
mask_width
])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
_
,
num_detections
)
=
self
.
_second_stage_nms_fn
(
num_detections
)
=
self
.
_second_stage_nms_fn
(
refined_decoded_boxes_batch
,
refined_decoded_boxes_batch
,
class_predictions_batch
,
class_predictions_batch
,
...
@@ -1173,26 +1226,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1173,26 +1226,9 @@ class FasterRCNNMetaArch(model.DetectionModel):
'num_detections'
:
tf
.
to_float
(
num_detections
)}
'num_detections'
:
tf
.
to_float
(
num_detections
)}
if
nmsed_masks
is
not
None
:
if
nmsed_masks
is
not
None
:
detections
[
'detection_masks'
]
=
nmsed_masks
detections
[
'detection_masks'
]
=
nmsed_masks
if
mask_predictions
is
not
None
:
detections
[
'detection_masks'
]
=
tf
.
to_float
(
tf
.
greater_equal
(
detections
[
'detection_masks'
],
mask_threshold
))
return
detections
return
detections
def
_batch_decode_boxes
(
self
,
box_encodings
,
anchor_boxes
):
def
_batch_decode_boxes
(
self
,
box_encodings
,
anchor_boxes
):
"""Decode tensor of refined box encodings.
Args:
refined_box_encodings: a 4-D tensor with shape
[batch_size, max_num_proposals, num_classes, self._box_coder.code_size]
representing predicted (final) refined box encodings.
proposal_boxes: [batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes.
Returns:
refined_box_predictions: a [batch_size, max_num_proposals, num_classes, 4]
float tensor representing (padded) refined bounding box predictions
(for each image in batch, proposal and class).
"""
"""Decodes box encodings with respect to the anchor boxes.
"""Decodes box encodings with respect to the anchor boxes.
Args:
Args:
...
@@ -1246,7 +1282,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1246,7 +1282,8 @@ class FasterRCNNMetaArch(model.DetectionModel):
corresponding loss values.
corresponding loss values.
"""
"""
with
tf
.
name_scope
(
scope
,
'Loss'
,
prediction_dict
.
values
()):
with
tf
.
name_scope
(
scope
,
'Loss'
,
prediction_dict
.
values
()):
(
groundtruth_boxlists
,
groundtruth_classes_with_background_list
(
groundtruth_boxlists
,
groundtruth_classes_with_background_list
,
groundtruth_masks_list
)
=
self
.
_format_groundtruth_data
(
prediction_dict
[
'image_shape'
])
)
=
self
.
_format_groundtruth_data
(
prediction_dict
[
'image_shape'
])
loss_dict
=
self
.
_loss_rpn
(
loss_dict
=
self
.
_loss_rpn
(
prediction_dict
[
'rpn_box_encodings'
],
prediction_dict
[
'rpn_box_encodings'
],
...
@@ -1262,7 +1299,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1262,7 +1299,11 @@ class FasterRCNNMetaArch(model.DetectionModel):
prediction_dict
[
'proposal_boxes'
],
prediction_dict
[
'proposal_boxes'
],
prediction_dict
[
'num_proposals'
],
prediction_dict
[
'num_proposals'
],
groundtruth_boxlists
,
groundtruth_boxlists
,
groundtruth_classes_with_background_list
))
groundtruth_classes_with_background_list
,
prediction_dict
[
'image_shape'
],
prediction_dict
.
get
(
'mask_predictions'
),
groundtruth_masks_list
,
))
return
loss_dict
return
loss_dict
def
_loss_rpn
(
self
,
def
_loss_rpn
(
self
,
...
@@ -1278,10 +1319,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1278,10 +1319,10 @@ class FasterRCNNMetaArch(model.DetectionModel):
participate in the loss computation, and returns the RPN losses.
participate in the loss computation, and returns the RPN losses.
Args:
Args:
rpn_box_encodings: A
3
-D float tensor of shape
rpn_box_encodings: A
4
-D float tensor of shape
[batch_size, num_anchors, self._box_coder.code_size] containing
[batch_size, num_anchors, self._box_coder.code_size] containing
predicted proposal box encodings.
predicted proposal box encodings.
rpn_objectness_predictions_with_background: A
3
-D float tensor of shape
rpn_objectness_predictions_with_background: A
2
-D float tensor of shape
[batch_size, num_anchors, 2] containing objectness predictions
[batch_size, num_anchors, 2] containing objectness predictions
(logits) for each of the anchors with 0 corresponding to background
(logits) for each of the anchors with 0 corresponding to background
and 1 corresponding to object.
and 1 corresponding to object.
...
@@ -1334,12 +1375,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1334,12 +1375,14 @@ class FasterRCNNMetaArch(model.DetectionModel):
tf
.
reduce_sum
(
localization_losses
,
axis
=
1
)
/
normalizer
)
tf
.
reduce_sum
(
localization_losses
,
axis
=
1
)
/
normalizer
)
objectness_loss
=
tf
.
reduce_mean
(
objectness_loss
=
tf
.
reduce_mean
(
tf
.
reduce_sum
(
objectness_losses
,
axis
=
1
)
/
normalizer
)
tf
.
reduce_sum
(
objectness_losses
,
axis
=
1
)
/
normalizer
)
loss_dict
=
{
loss_dict
=
{}
'first_stage_localization_loss'
:
self
.
_first_stage_loc_loss_weight
*
localization_loss
,
with
tf
.
name_scope
(
'localization_loss'
):
'first_stage_objectness_loss'
:
loss_dict
[
'first_stage_localization_loss'
]
=
(
self
.
_first_stage_obj_loss_weight
*
objectness_loss
,
self
.
_first_stage_loc_loss_weight
*
localization_loss
)
}
with
tf
.
name_scope
(
'objectness_loss'
):
loss_dict
[
'first_stage_objectness_loss'
]
=
(
self
.
_first_stage_obj_loss_weight
*
objectness_loss
)
return
loss_dict
return
loss_dict
def
_loss_box_classifier
(
self
,
def
_loss_box_classifier
(
self
,
...
@@ -1348,17 +1391,23 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1348,17 +1391,23 @@ class FasterRCNNMetaArch(model.DetectionModel):
proposal_boxes
,
proposal_boxes
,
num_proposals
,
num_proposals
,
groundtruth_boxlists
,
groundtruth_boxlists
,
groundtruth_classes_with_background_list
):
groundtruth_classes_with_background_list
,
image_shape
,
prediction_masks
=
None
,
groundtruth_masks_list
=
None
):
"""Computes scalar box classifier loss tensors.
"""Computes scalar box classifier loss tensors.
Uses self._detector_target_assigner to obtain regression and classification
Uses self._detector_target_assigner to obtain regression and classification
targets for the second stage box classifier, optionally performs
targets for the second stage box classifier, optionally performs
hard mining, and returns losses. All losses are computed independently
hard mining, and returns losses. All losses are computed independently
for each image and then averaged across the batch.
for each image and then averaged across the batch.
Please note that for boxes and masks with multiple labels, the box
regression and mask prediction losses are only computed for one label.
This function assumes that the proposal boxes in the "padded" regions are
This function assumes that the proposal boxes in the "padded" regions are
actually zero (and thus should not be matched to).
actually zero (and thus should not be matched to).
Args:
Args:
refined_box_encodings: a 3-D tensor with shape
refined_box_encodings: a 3-D tensor with shape
[total_num_proposals, num_classes, box_coder.code_size] representing
[total_num_proposals, num_classes, box_coder.code_size] representing
...
@@ -1377,11 +1426,23 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1377,11 +1426,23 @@ class FasterRCNNMetaArch(model.DetectionModel):
groundtruth_classes_with_background_list: a list of 2-D one-hot
groundtruth_classes_with_background_list: a list of 2-D one-hot
(or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
(or k-hot) tensors of shape [num_boxes, num_classes + 1] containing the
class targets with the 0th index assumed to map to the background class.
class targets with the 0th index assumed to map to the background class.
image_shape: a 1-D tensor of shape [4] representing the image shape.
prediction_masks: an optional 4-D tensor with shape [total_num_proposals,
num_classes, mask_height, mask_width] containing the instance masks for
each box.
groundtruth_masks_list: an optional list of 3-D tensors of shape
[num_boxes, image_height, image_width] containing the instance masks for
each of the boxes.
Returns:
Returns:
a dictionary mapping loss keys ('second_stage_localization_loss',
a dictionary mapping loss keys ('second_stage_localization_loss',
'second_stage_classification_loss') to scalar tensors representing
'second_stage_classification_loss') to scalar tensors representing
corresponding loss values.
corresponding loss values.
Raises:
ValueError: if `predict_instance_masks` in
second_stage_mask_rcnn_box_predictor is True and
`groundtruth_masks_list` is not provided.
"""
"""
with
tf
.
name_scope
(
'BoxClassifierLoss'
):
with
tf
.
name_scope
(
'BoxClassifierLoss'
):
paddings_indicator
=
self
.
_padded_batched_proposals_indicator
(
paddings_indicator
=
self
.
_padded_batched_proposals_indicator
(
...
@@ -1409,9 +1470,20 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1409,9 +1470,20 @@ class FasterRCNNMetaArch(model.DetectionModel):
[
batch_size
*
self
.
max_num_proposals
,
-
1
])
[
batch_size
*
self
.
max_num_proposals
,
-
1
])
refined_box_encodings_with_background
=
tf
.
pad
(
refined_box_encodings_with_background
=
tf
.
pad
(
refined_box_encodings
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])
refined_box_encodings
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
]])
# For anchors with multiple labels, picks refined_location_encodings
# for just one class to avoid over-counting for regression loss and
# (optionally) mask loss.
one_hot_flat_cls_targets_with_background
=
tf
.
argmax
(
flat_cls_targets_with_background
,
axis
=
1
)
one_hot_flat_cls_targets_with_background
=
tf
.
one_hot
(
one_hot_flat_cls_targets_with_background
,
flat_cls_targets_with_background
.
get_shape
()[
1
])
refined_box_encodings_masked_by_class_targets
=
tf
.
boolean_mask
(
refined_box_encodings_masked_by_class_targets
=
tf
.
boolean_mask
(
refined_box_encodings_with_background
,
refined_box_encodings_with_background
,
tf
.
greater
(
flat_cls_targets_with_background
,
0
))
tf
.
greater
(
one_hot_flat_cls_targets_with_background
,
0
))
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
self
.
max_num_proposals
,
-
1
])
reshaped_refined_box_encodings
=
tf
.
reshape
(
reshaped_refined_box_encodings
=
tf
.
reshape
(
refined_box_encodings_masked_by_class_targets
,
refined_box_encodings_masked_by_class_targets
,
[
batch_size
,
-
1
,
4
])
[
batch_size
,
-
1
,
4
])
...
@@ -1433,12 +1505,82 @@ class FasterRCNNMetaArch(model.DetectionModel):
...
@@ -1433,12 +1505,82 @@ class FasterRCNNMetaArch(model.DetectionModel):
)
=
self
.
_unpad_proposals_and_apply_hard_mining
(
)
=
self
.
_unpad_proposals_and_apply_hard_mining
(
proposal_boxlists
,
second_stage_loc_losses
,
proposal_boxlists
,
second_stage_loc_losses
,
second_stage_cls_losses
,
num_proposals
)
second_stage_cls_losses
,
num_proposals
)
loss_dict
=
{
loss_dict
=
{}
'second_stage_localization_loss'
:
with
tf
.
name_scope
(
'localization_loss'
):
(
self
.
_second_stage_loc_loss_weight
*
second_stage_loc_loss
),
loss_dict
[
'second_stage_localization_loss'
]
=
(
'second_stage_classification_loss'
:
self
.
_second_stage_loc_loss_weight
*
second_stage_loc_loss
)
(
self
.
_second_stage_cls_loss_weight
*
second_stage_cls_loss
),
}
with
tf
.
name_scope
(
'classification_loss'
):
loss_dict
[
'second_stage_classification_loss'
]
=
(
self
.
_second_stage_cls_loss_weight
*
second_stage_cls_loss
)
second_stage_mask_loss
=
None
if
prediction_masks
is
not
None
:
if
groundtruth_masks_list
is
None
:
raise
ValueError
(
'Groundtruth instance masks not provided. '
'Please configure input reader.'
)
# Create a new target assigner that matches the proposals to groundtruth
# and returns the mask targets.
# TODO: Move `unmatched_cls_target` from constructor to assign function.
# This will enable reuse of a single target assigner for both class
# targets and mask targets.
mask_target_assigner
=
target_assigner
.
create_target_assigner
(
'FasterRCNN'
,
'detection'
,
unmatched_cls_target
=
tf
.
zeros
(
image_shape
[
1
:
3
],
dtype
=
tf
.
float32
))
(
batch_mask_targets
,
_
,
_
,
batch_mask_target_weights
,
_
)
=
target_assigner
.
batch_assign_targets
(
mask_target_assigner
,
proposal_boxlists
,
groundtruth_boxlists
,
groundtruth_masks_list
)
# Pad the prediction_masks with to add zeros for background class to be
# consistent with class predictions.
prediction_masks_with_background
=
tf
.
pad
(
prediction_masks
,
[[
0
,
0
],
[
1
,
0
],
[
0
,
0
],
[
0
,
0
]])
prediction_masks_masked_by_class_targets
=
tf
.
boolean_mask
(
prediction_masks_with_background
,
tf
.
greater
(
one_hot_flat_cls_targets_with_background
,
0
))
mask_height
=
prediction_masks
.
shape
[
2
].
value
mask_width
=
prediction_masks
.
shape
[
3
].
value
reshaped_prediction_masks
=
tf
.
reshape
(
prediction_masks_masked_by_class_targets
,
[
batch_size
,
-
1
,
mask_height
*
mask_width
])
batch_mask_targets_shape
=
tf
.
shape
(
batch_mask_targets
)
flat_gt_masks
=
tf
.
reshape
(
batch_mask_targets
,
[
-
1
,
batch_mask_targets_shape
[
2
],
batch_mask_targets_shape
[
3
]])
# Use normalized proposals to crop mask targets from image masks.
flat_normalized_proposals
=
box_list_ops
.
to_normalized_coordinates
(
box_list
.
BoxList
(
tf
.
reshape
(
proposal_boxes
,
[
-
1
,
4
])),
image_shape
[
1
],
image_shape
[
2
]).
get
()
flat_cropped_gt_mask
=
tf
.
image
.
crop_and_resize
(
tf
.
expand_dims
(
flat_gt_masks
,
-
1
),
flat_normalized_proposals
,
tf
.
range
(
flat_normalized_proposals
.
shape
[
0
].
value
),
[
mask_height
,
mask_width
])
batch_cropped_gt_mask
=
tf
.
reshape
(
flat_cropped_gt_mask
,
[
batch_size
,
-
1
,
mask_height
*
mask_width
])
second_stage_mask_losses
=
self
.
_second_stage_mask_loss
(
reshaped_prediction_masks
,
batch_cropped_gt_mask
,
weights
=
batch_mask_target_weights
)
/
(
mask_height
*
mask_width
*
tf
.
maximum
(
tf
.
reduce_sum
(
batch_mask_target_weights
,
axis
=
1
,
keep_dims
=
True
),
tf
.
ones
((
batch_size
,
1
))))
second_stage_mask_loss
=
tf
.
reduce_sum
(
tf
.
boolean_mask
(
second_stage_mask_losses
,
paddings_indicator
))
if
second_stage_mask_loss
is
not
None
:
with
tf
.
name_scope
(
'mask_loss'
):
loss_dict
[
'second_stage_mask_loss'
]
=
(
self
.
_second_stage_mask_loss_weight
*
second_stage_mask_loss
)
return
loss_dict
return
loss_dict
def
_padded_batched_proposals_indicator
(
self
,
def
_padded_batched_proposals_indicator
(
self
,
...
...
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test.py
View file @
ff88581a
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch_test_lib
from
object_detection.meta_architectures
import
faster_rcnn_meta_arch_test_lib
...
@@ -46,19 +47,19 @@ class FasterRCNNMetaArchTest(
...
@@ -46,19 +47,19 @@ class FasterRCNNMetaArchTest(
mask_height
=
2
mask_height
=
2
mask_width
=
2
mask_width
=
2
mask_predictions
=
.
6
*
tf
.
ones
(
mask_predictions
=
30
.
*
tf
.
ones
(
[
total_num_padded_proposals
,
model
.
num_classes
,
[
total_num_padded_proposals
,
model
.
num_classes
,
mask_height
,
mask_width
],
dtype
=
tf
.
float32
)
mask_height
,
mask_width
],
dtype
=
tf
.
float32
)
exp_detection_masks
=
[[[[
1
,
1
],
[
1
,
1
]],
exp_detection_masks
=
np
.
array
(
[[[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]]],
[[
1
,
1
],
[
1
,
1
]]],
[[[
1
,
1
],
[
1
,
1
]],
[[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
1
,
1
],
[
1
,
1
]],
[[
0
,
0
],
[
0
,
0
]]]]
[[
0
,
0
],
[
0
,
0
]]]]
)
detections
=
model
.
postprocess
({
detections
=
model
.
postprocess
({
'refined_box_encodings'
:
refined_box_encodings
,
'refined_box_encodings'
:
refined_box_encodings
,
...
@@ -79,6 +80,17 @@ class FasterRCNNMetaArchTest(
...
@@ -79,6 +80,17 @@ class FasterRCNNMetaArchTest(
self
.
assertAllClose
(
detections_out
[
'detection_masks'
],
self
.
assertAllClose
(
detections_out
[
'detection_masks'
],
exp_detection_masks
)
exp_detection_masks
)
def
_get_box_classifier_features_shape
(
self
,
image_size
,
batch_size
,
max_num_proposals
,
initial_crop_size
,
maxpool_stride
,
num_features
):
return
(
batch_size
*
max_num_proposals
,
initial_crop_size
/
maxpool_stride
,
initial_crop_size
/
maxpool_stride
,
num_features
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/meta_architectures/faster_rcnn_meta_arch_test_lib.py
View file @
ff88581a
...
@@ -113,7 +113,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -113,7 +113,8 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
second_stage_batch_size
,
second_stage_batch_size
,
first_stage_max_proposals
=
8
,
first_stage_max_proposals
=
8
,
num_classes
=
2
,
num_classes
=
2
,
hard_mining
=
False
):
hard_mining
=
False
,
softmax_second_stage_classification_loss
=
True
):
def
image_resizer_fn
(
image
):
def
image_resizer_fn
(
image
):
return
tf
.
identity
(
image
)
return
tf
.
identity
(
image
)
...
@@ -178,6 +179,12 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -178,6 +179,12 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
second_stage_score_conversion_fn
=
tf
.
identity
second_stage_score_conversion_fn
=
tf
.
identity
second_stage_localization_loss_weight
=
1.0
second_stage_localization_loss_weight
=
1.0
second_stage_classification_loss_weight
=
1.0
second_stage_classification_loss_weight
=
1.0
if
softmax_second_stage_classification_loss
:
second_stage_classification_loss
=
(
losses
.
WeightedSoftmaxClassificationLoss
(
anchorwise_output
=
True
))
else
:
second_stage_classification_loss
=
(
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
))
hard_example_miner
=
None
hard_example_miner
=
None
if
hard_mining
:
if
hard_mining
:
...
@@ -221,52 +228,68 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -221,52 +228,68 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
second_stage_localization_loss_weight
,
second_stage_localization_loss_weight
,
'second_stage_classification_loss_weight'
:
'second_stage_classification_loss_weight'
:
second_stage_classification_loss_weight
,
second_stage_classification_loss_weight
,
'second_stage_classification_loss'
:
second_stage_classification_loss
,
'hard_example_miner'
:
hard_example_miner
}
'hard_example_miner'
:
hard_example_miner
}
return
self
.
_get_model
(
self
.
_get_second_stage_box_predictor
(
return
self
.
_get_model
(
self
.
_get_second_stage_box_predictor
(
num_classes
=
num_classes
,
is_training
=
is_training
),
**
common_kwargs
)
num_classes
=
num_classes
,
is_training
=
is_training
),
**
common_kwargs
)
def
test_predict_correct_shapes_in_inference_mode_
both
_stage
s
(
def
test_predict_
gives_
correct_shapes_in_inference_mode_
first
_stage
_only
(
self
):
self
):
batch_size
=
2
test_graph
=
tf
.
Graph
()
image_size
=
10
with
test_graph
.
as_default
():
input_shapes
=
[(
batch_size
,
image_size
,
image_size
,
3
),
model
=
self
.
_build_model
(
(
None
,
image_size
,
image_size
,
3
),
is_training
=
False
,
first_stage_only
=
True
,
second_stage_batch_size
=
2
)
(
batch_size
,
None
,
None
,
3
),
batch_size
=
2
(
None
,
None
,
None
,
3
)]
height
=
10
expected_num_anchors
=
image_size
*
image_size
*
3
*
3
width
=
12
expected_shapes
=
{
input_image_shape
=
(
batch_size
,
height
,
width
,
3
)
'rpn_box_predictor_features'
:
(
2
,
image_size
,
image_size
,
512
),
preprocessed_inputs
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
'rpn_features_to_crop'
:
(
2
,
image_size
,
image_size
,
3
),
shape
=
(
batch_size
,
None
,
None
,
3
))
'image_shape'
:
(
4
,),
prediction_dict
=
model
.
predict
(
preprocessed_inputs
)
'rpn_box_encodings'
:
(
2
,
expected_num_anchors
,
4
),
'rpn_objectness_predictions_with_background'
:
# In inference mode, anchors are clipped to the image window, but not
(
2
,
expected_num_anchors
,
2
),
# pruned. Since MockFasterRCNN.extract_proposal_features returns a
'anchors'
:
(
expected_num_anchors
,
4
),
# tensor with the same shape as its input, the expected number of anchors
'refined_box_encodings'
:
(
2
*
8
,
2
,
4
),
# is height * width * the number of anchors per location (i.e. 3x3).
'class_predictions_with_background'
:
(
2
*
8
,
2
+
1
),
expected_num_anchors
=
height
*
width
*
3
*
3
'num_proposals'
:
(
2
,),
expected_output_keys
=
set
([
'proposal_boxes'
:
(
2
,
8
,
4
),
'rpn_box_predictor_features'
,
'rpn_features_to_crop'
,
'image_shape'
,
}
'rpn_box_encodings'
,
'rpn_objectness_predictions_with_background'
,
for
input_shape
in
input_shapes
:
'anchors'
])
test_graph
=
tf
.
Graph
()
expected_output_shapes
=
{
with
test_graph
.
as_default
():
'rpn_box_predictor_features'
:
(
batch_size
,
height
,
width
,
512
),
model
=
self
.
_build_model
(
'rpn_features_to_crop'
:
(
batch_size
,
height
,
width
,
3
),
is_training
=
False
,
first_stage_only
=
False
,
'rpn_box_encodings'
:
(
batch_size
,
expected_num_anchors
,
4
),
second_stage_batch_size
=
2
)
'rpn_objectness_predictions_with_background'
:
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
input_shape
)
(
batch_size
,
expected_num_anchors
,
2
),
result_tensor_dict
=
model
.
predict
(
preprocessed_inputs
)
'anchors'
:
(
expected_num_anchors
,
4
)
init_op
=
tf
.
global_variables_initializer
()
}
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
sess
.
run
(
init_op
)
tensor_dict_out
=
sess
.
run
(
result_tensor_dict
,
feed_dict
=
{
prediction_out
=
sess
.
run
(
prediction_dict
,
preprocessed_inputs
:
feed_dict
=
{
np
.
zeros
((
batch_size
,
image_size
,
image_size
,
3
))})
preprocessed_inputs
:
self
.
assertEqual
(
set
(
tensor_dict_out
.
keys
()),
np
.
zeros
(
input_image_shape
)
set
(
expected_shapes
.
keys
()))
})
for
key
in
expected_shapes
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
self
.
assertEqual
(
set
(
prediction_out
.
keys
()),
expected_output_keys
)
self
.
assertAllEqual
(
prediction_out
[
'image_shape'
],
input_image_shape
)
for
output_key
,
expected_shape
in
expected_output_shapes
.
items
():
self
.
assertAllEqual
(
prediction_out
[
output_key
].
shape
,
expected_shape
)
# Check that anchors are clipped to window.
anchors
=
prediction_out
[
'anchors'
]
self
.
assertTrue
(
np
.
all
(
np
.
greater_equal
(
anchors
,
0
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
0
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
1
],
width
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
2
],
height
)))
self
.
assertTrue
(
np
.
all
(
np
.
less_equal
(
anchors
[:,
3
],
width
)))
def
test_predict_gives_valid_anchors_in_training_mode_first_stage_only
(
self
):
def
test_predict_gives_valid_anchors_in_training_mode_first_stage_only
(
self
):
test_graph
=
tf
.
Graph
()
test_graph
=
tf
.
Graph
()
...
@@ -321,48 +344,73 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -321,48 +344,73 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
prediction_out
[
'rpn_objectness_predictions_with_background'
].
shape
,
prediction_out
[
'rpn_objectness_predictions_with_background'
].
shape
,
(
batch_size
,
num_anchors_out
,
2
))
(
batch_size
,
num_anchors_out
,
2
))
def
test_predict_gives_correct_shapes_in_inference_mode_both_stages
(
self
):
def
test_predict_correct_shapes_in_inference_mode_both_stages
(
test_graph
=
tf
.
Graph
()
self
):
with
test_graph
.
as_default
():
batch_size
=
2
model
=
self
.
_build_model
(
image_size
=
10
is_training
=
False
,
first_stage_only
=
False
,
second_stage_batch_size
=
2
)
max_num_proposals
=
8
batch_size
=
2
initial_crop_size
=
3
image_size
=
10
maxpool_stride
=
1
image_shape
=
(
batch_size
,
image_size
,
image_size
,
3
)
preprocessed_inputs
=
tf
.
zeros
(
image_shape
,
dtype
=
tf
.
float32
)
result_tensor_dict
=
model
.
predict
(
preprocessed_inputs
)
expected_num_anchors
=
image_size
*
image_size
*
3
*
3
expected_shapes
=
{
input_shapes
=
[(
batch_size
,
image_size
,
image_size
,
3
),
'rpn_box_predictor_features'
:
(
None
,
image_size
,
image_size
,
3
),
(
2
,
image_size
,
image_size
,
512
),
(
batch_size
,
None
,
None
,
3
),
'rpn_features_to_crop'
:
(
2
,
image_size
,
image_size
,
3
),
(
None
,
None
,
None
,
3
)]
'image_shape'
:
(
4
,),
expected_num_anchors
=
image_size
*
image_size
*
3
*
3
'rpn_box_encodings'
:
(
2
,
expected_num_anchors
,
4
),
expected_shapes
=
{
'rpn_objectness_predictions_with_background'
:
'rpn_box_predictor_features'
:
(
2
,
expected_num_anchors
,
2
),
(
2
,
image_size
,
image_size
,
512
),
'anchors'
:
(
expected_num_anchors
,
4
),
'rpn_features_to_crop'
:
(
2
,
image_size
,
image_size
,
3
),
'refined_box_encodings'
:
(
2
*
8
,
2
,
4
),
'image_shape'
:
(
4
,),
'class_predictions_with_background'
:
(
2
*
8
,
2
+
1
),
'rpn_box_encodings'
:
(
2
,
expected_num_anchors
,
4
),
'num_proposals'
:
(
2
,),
'rpn_objectness_predictions_with_background'
:
'proposal_boxes'
:
(
2
,
8
,
4
),
(
2
,
expected_num_anchors
,
2
),
}
'anchors'
:
(
expected_num_anchors
,
4
),
init_op
=
tf
.
global_variables_initializer
()
'refined_box_encodings'
:
(
2
*
max_num_proposals
,
2
,
4
),
with
self
.
test_session
()
as
sess
:
'class_predictions_with_background'
:
(
2
*
max_num_proposals
,
2
+
1
),
'num_proposals'
:
(
2
,),
'proposal_boxes'
:
(
2
,
max_num_proposals
,
4
),
'proposal_boxes_normalized'
:
(
2
,
max_num_proposals
,
4
),
'box_classifier_features'
:
self
.
_get_box_classifier_features_shape
(
image_size
,
batch_size
,
max_num_proposals
,
initial_crop_size
,
maxpool_stride
,
3
)
}
for
input_shape
in
input_shapes
:
test_graph
=
tf
.
Graph
()
with
test_graph
.
as_default
():
model
=
self
.
_build_model
(
is_training
=
False
,
first_stage_only
=
False
,
second_stage_batch_size
=
2
)
preprocessed_inputs
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
input_shape
)
result_tensor_dict
=
model
.
predict
(
preprocessed_inputs
)
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
(
graph
=
test_graph
)
as
sess
:
sess
.
run
(
init_op
)
sess
.
run
(
init_op
)
tensor_dict_out
=
sess
.
run
(
result_tensor_dict
)
tensor_dict_out
=
sess
.
run
(
result_tensor_dict
,
feed_dict
=
{
self
.
assertEqual
(
set
(
tensor_dict_out
.
keys
()),
preprocessed_inputs
:
set
(
expected_shapes
.
keys
()))
np
.
zeros
((
batch_size
,
image_size
,
image_size
,
3
))})
for
key
in
expected_shapes
:
self
.
assertEqual
(
set
(
tensor_dict_out
.
keys
()),
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
set
(
expected_shapes
.
keys
()))
for
key
in
expected_shapes
:
self
.
assertAllEqual
(
tensor_dict_out
[
key
].
shape
,
expected_shapes
[
key
])
def
test_predict_gives_correct_shapes_in_train_mode_both_stages
(
self
):
def
test_predict_gives_correct_shapes_in_train_mode_both_stages
(
self
):
test_graph
=
tf
.
Graph
()
test_graph
=
tf
.
Graph
()
with
test_graph
.
as_default
():
with
test_graph
.
as_default
():
model
=
self
.
_build_model
(
model
=
self
.
_build_model
(
is_training
=
True
,
first_stage_only
=
False
,
second_stage_batch_size
=
7
)
is_training
=
True
,
first_stage_only
=
False
,
second_stage_batch_size
=
7
)
batch_size
=
2
batch_size
=
2
image_size
=
10
image_size
=
10
max_num_proposals
=
7
initial_crop_size
=
3
maxpool_stride
=
1
image_shape
=
(
batch_size
,
image_size
,
image_size
,
3
)
image_shape
=
(
batch_size
,
image_size
,
image_size
,
3
)
preprocessed_inputs
=
tf
.
zeros
(
image_shape
,
dtype
=
tf
.
float32
)
preprocessed_inputs
=
tf
.
zeros
(
image_shape
,
dtype
=
tf
.
float32
)
groundtruth_boxes_list
=
[
groundtruth_boxes_list
=
[
...
@@ -381,11 +429,20 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -381,11 +429,20 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
(
2
,
image_size
,
image_size
,
512
),
(
2
,
image_size
,
image_size
,
512
),
'rpn_features_to_crop'
:
(
2
,
image_size
,
image_size
,
3
),
'rpn_features_to_crop'
:
(
2
,
image_size
,
image_size
,
3
),
'image_shape'
:
(
4
,),
'image_shape'
:
(
4
,),
'refined_box_encodings'
:
(
2
*
7
,
2
,
4
),
'refined_box_encodings'
:
(
2
*
max_num_proposals
,
2
,
4
),
'class_predictions_with_background'
:
(
2
*
7
,
2
+
1
),
'class_predictions_with_background'
:
(
2
*
max_num_proposals
,
2
+
1
),
'num_proposals'
:
(
2
,),
'num_proposals'
:
(
2
,),
'proposal_boxes'
:
(
2
,
7
,
4
),
'proposal_boxes'
:
(
2
,
max_num_proposals
,
4
),
'proposal_boxes_normalized'
:
(
2
,
max_num_proposals
,
4
),
'box_classifier_features'
:
self
.
_get_box_classifier_features_shape
(
image_size
,
batch_size
,
max_num_proposals
,
initial_crop_size
,
maxpool_stride
,
3
)
}
}
init_op
=
tf
.
global_variables_initializer
()
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
sess
.
run
(
init_op
)
...
@@ -600,6 +657,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -600,6 +657,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
preprocessed_inputs
=
model
.
preprocess
(
image_placeholder
)
preprocessed_inputs
=
model
.
preprocess
(
image_placeholder
)
self
.
assertAllEqual
(
preprocessed_inputs
.
shape
.
as_list
(),
image_shape
)
self
.
assertAllEqual
(
preprocessed_inputs
.
shape
.
as_list
(),
image_shape
)
# TODO: Split test into two - with and without masks.
def
test_loss_first_stage_only_mode
(
self
):
def
test_loss_first_stage_only_mode
(
self
):
model
=
self
.
_build_model
(
model
=
self
.
_build_model
(
is_training
=
True
,
first_stage_only
=
True
,
second_stage_batch_size
=
6
)
is_training
=
True
,
first_stage_only
=
True
,
second_stage_batch_size
=
6
)
...
@@ -650,6 +708,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -650,6 +708,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self
.
assertTrue
(
'second_stage_localization_loss'
not
in
loss_dict_out
)
self
.
assertTrue
(
'second_stage_localization_loss'
not
in
loss_dict_out
)
self
.
assertTrue
(
'second_stage_classification_loss'
not
in
loss_dict_out
)
self
.
assertTrue
(
'second_stage_classification_loss'
not
in
loss_dict_out
)
# TODO: Split test into two - with and without masks.
def
test_loss_full
(
self
):
def
test_loss_full
(
self
):
model
=
self
.
_build_model
(
model
=
self
.
_build_model
(
is_training
=
True
,
first_stage_only
=
False
,
second_stage_batch_size
=
6
)
is_training
=
True
,
first_stage_only
=
False
,
second_stage_batch_size
=
6
)
...
@@ -702,12 +761,26 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -702,12 +761,26 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[
10
,
-
10
,
-
10
],
[
10
,
-
10
,
-
10
],
[
-
10
,
10
,
-
10
]],
dtype
=
tf
.
float32
)
[
-
10
,
10
,
-
10
]],
dtype
=
tf
.
float32
)
mask_predictions_logits
=
20
*
tf
.
ones
((
batch_size
*
model
.
max_num_proposals
,
model
.
num_classes
,
14
,
14
),
dtype
=
tf
.
float32
)
groundtruth_boxes_list
=
[
groundtruth_boxes_list
=
[
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
[.
5
,
.
5
,
1
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1
,
.
5
]],
dtype
=
tf
.
float32
)]
tf
.
constant
([[
0
,
.
5
,
.
5
,
1
],
[.
5
,
0
,
1
,
.
5
]],
dtype
=
tf
.
float32
)]
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
],
[
0
,
1
]],
dtype
=
tf
.
float32
),
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
],
[
0
,
1
]],
dtype
=
tf
.
float32
),
tf
.
constant
([[
1
,
0
],
[
1
,
0
]],
dtype
=
tf
.
float32
)]
tf
.
constant
([[
1
,
0
],
[
1
,
0
]],
dtype
=
tf
.
float32
)]
# Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero.
groundtruth_masks_list
=
[
tf
.
convert_to_tensor
(
np
.
ones
((
2
,
32
,
32
)),
dtype
=
tf
.
float32
),
tf
.
convert_to_tensor
(
np
.
ones
((
2
,
32
,
32
)),
dtype
=
tf
.
float32
)]
prediction_dict
=
{
prediction_dict
=
{
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
'rpn_objectness_predictions_with_background'
:
...
@@ -717,10 +790,12 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -717,10 +790,12 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'refined_box_encodings'
:
refined_box_encodings
,
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'proposal_boxes'
:
proposal_boxes
,
'proposal_boxes'
:
proposal_boxes
,
'num_proposals'
:
num_proposals
'num_proposals'
:
num_proposals
,
'mask_predictions'
:
mask_predictions_logits
}
}
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
)
groundtruth_classes_list
,
groundtruth_masks_list
)
loss_dict
=
model
.
loss
(
prediction_dict
)
loss_dict
=
model
.
loss
(
prediction_dict
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
...
@@ -729,6 +804,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -729,6 +804,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self
.
assertAllClose
(
loss_dict_out
[
'first_stage_objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'first_stage_objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_classification_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_classification_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_mask_loss'
],
0
)
def
test_loss_full_zero_padded_proposals
(
self
):
def
test_loss_full_zero_padded_proposals
(
self
):
model
=
self
.
_build_model
(
model
=
self
.
_build_model
(
...
@@ -775,10 +851,23 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -775,10 +851,23 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]],
dtype
=
tf
.
float32
)
[
0
,
0
,
0
]],
dtype
=
tf
.
float32
)
mask_predictions_logits
=
20
*
tf
.
ones
((
batch_size
*
model
.
max_num_proposals
,
model
.
num_classes
,
14
,
14
),
dtype
=
tf
.
float32
)
groundtruth_boxes_list
=
[
groundtruth_boxes_list
=
[
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
tf
.
float32
)]
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
tf
.
float32
)]
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
]],
dtype
=
tf
.
float32
)]
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
0
]],
dtype
=
tf
.
float32
)]
# Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero.
groundtruth_masks_list
=
[
tf
.
convert_to_tensor
(
np
.
ones
((
1
,
32
,
32
)),
dtype
=
tf
.
float32
)]
prediction_dict
=
{
prediction_dict
=
{
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
'rpn_objectness_predictions_with_background'
:
...
@@ -788,10 +877,12 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -788,10 +877,12 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
'refined_box_encodings'
:
refined_box_encodings
,
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'proposal_boxes'
:
proposal_boxes
,
'proposal_boxes'
:
proposal_boxes
,
'num_proposals'
:
num_proposals
'num_proposals'
:
num_proposals
,
'mask_predictions'
:
mask_predictions_logits
}
}
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
)
groundtruth_classes_list
,
groundtruth_masks_list
)
loss_dict
=
model
.
loss
(
prediction_dict
)
loss_dict
=
model
.
loss
(
prediction_dict
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
...
@@ -800,6 +891,102 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -800,6 +891,102 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
self
.
assertAllClose
(
loss_dict_out
[
'first_stage_objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'first_stage_objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_classification_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_classification_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_mask_loss'
],
0
)
def
test_loss_full_multiple_label_groundtruth
(
self
):
model
=
self
.
_build_model
(
is_training
=
True
,
first_stage_only
=
False
,
second_stage_batch_size
=
6
,
softmax_second_stage_classification_loss
=
False
)
batch_size
=
1
anchors
=
tf
.
constant
(
[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
16
,
16
,
32
,
32
]],
dtype
=
tf
.
float32
)
rpn_box_encodings
=
tf
.
zeros
(
[
batch_size
,
anchors
.
get_shape
().
as_list
()[
0
],
BOX_CODE_SIZE
],
dtype
=
tf
.
float32
)
# use different numbers for the objectness category to break ties in
# order of boxes returned by NMS
rpn_objectness_predictions_with_background
=
tf
.
constant
([
[[
-
10
,
13
],
[
10
,
-
10
],
[
10
,
-
11
],
[
10
,
-
12
]],],
dtype
=
tf
.
float32
)
image_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
int32
)
# box_classifier_batch_size is 6, but here we assume that the number of
# actual proposals (not counting zero paddings) is fewer (3).
num_proposals
=
tf
.
constant
([
3
],
dtype
=
tf
.
int32
)
proposal_boxes
=
tf
.
constant
(
[[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
16
,
0
,
32
,
16
],
[
0
,
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]],
dtype
=
tf
.
float32
)
# second_stage_localization_loss should only be computed for predictions
# that match groundtruth. For multiple label groundtruth boxes, the loss
# should only be computed once for the label with the smaller index.
refined_box_encodings
=
tf
.
constant
(
[[[
0
,
0
,
0
,
0
],
[
1
,
1
,
-
1
,
-
1
]],
[[
1
,
1
,
-
1
,
-
1
],
[
1
,
1
,
1
,
1
]],
[[
1
,
1
,
-
1
,
-
1
],
[
1
,
1
,
1
,
1
]],
[[
1
,
1
,
-
1
,
-
1
],
[
1
,
1
,
1
,
1
]],
[[
1
,
1
,
-
1
,
-
1
],
[
1
,
1
,
1
,
1
]],
[[
1
,
1
,
-
1
,
-
1
],
[
1
,
1
,
1
,
1
]]],
dtype
=
tf
.
float32
)
class_predictions_with_background
=
tf
.
constant
(
[[
-
100
,
100
,
100
],
[
100
,
-
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
],
[
0
,
0
,
0
]],
dtype
=
tf
.
float32
)
mask_predictions_logits
=
20
*
tf
.
ones
((
batch_size
*
model
.
max_num_proposals
,
model
.
num_classes
,
14
,
14
),
dtype
=
tf
.
float32
)
groundtruth_boxes_list
=
[
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
]],
dtype
=
tf
.
float32
)]
# Box contains two ground truth labels.
groundtruth_classes_list
=
[
tf
.
constant
([[
1
,
1
]],
dtype
=
tf
.
float32
)]
# Set all elements of groundtruth mask to 1.0. In this case all proposal
# crops of the groundtruth masks should return a mask that covers the entire
# proposal. Thus, if mask_predictions_logits element values are all greater
# than 20, the loss should be zero.
groundtruth_masks_list
=
[
tf
.
convert_to_tensor
(
np
.
ones
((
1
,
32
,
32
)),
dtype
=
tf
.
float32
)]
prediction_dict
=
{
'rpn_box_encodings'
:
rpn_box_encodings
,
'rpn_objectness_predictions_with_background'
:
rpn_objectness_predictions_with_background
,
'image_shape'
:
image_shape
,
'anchors'
:
anchors
,
'refined_box_encodings'
:
refined_box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'proposal_boxes'
:
proposal_boxes
,
'num_proposals'
:
num_proposals
,
'mask_predictions'
:
mask_predictions_logits
}
model
.
provide_groundtruth
(
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
)
loss_dict
=
model
.
loss
(
prediction_dict
)
with
self
.
test_session
()
as
sess
:
loss_dict_out
=
sess
.
run
(
loss_dict
)
self
.
assertAllClose
(
loss_dict_out
[
'first_stage_localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'first_stage_objectness_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_localization_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_classification_loss'
],
0
)
self
.
assertAllClose
(
loss_dict_out
[
'second_stage_mask_loss'
],
0
)
def
test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images
(
self
):
def
test_loss_full_zero_padded_proposals_nonzero_loss_with_two_images
(
self
):
model
=
self
.
_build_model
(
model
=
self
.
_build_model
(
...
@@ -828,7 +1015,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -828,7 +1015,7 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
image_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
int32
)
image_shape
=
tf
.
constant
([
batch_size
,
32
,
32
,
3
],
dtype
=
tf
.
int32
)
# box_classifier_batch_size is 6, but here we assume that the number of
# box_classifier_batch_size is 6, but here we assume that the number of
# actual proposals (not counting zero paddings) is fewer
(3)
.
# actual proposals (not counting zero paddings) is fewer.
num_proposals
=
tf
.
constant
([
3
,
2
],
dtype
=
tf
.
int32
)
num_proposals
=
tf
.
constant
([
3
,
2
],
dtype
=
tf
.
int32
)
proposal_boxes
=
tf
.
constant
(
proposal_boxes
=
tf
.
constant
(
[[[
0
,
0
,
16
,
16
],
[[[
0
,
0
,
16
,
16
],
...
@@ -839,9 +1026,9 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
...
@@ -839,9 +1026,9 @@ class FasterRCNNMetaArchTestBase(tf.test.TestCase):
[
0
,
0
,
0
,
0
]],
[
0
,
0
,
0
,
0
]],
[[
0
,
0
,
16
,
16
],
[[
0
,
0
,
16
,
16
],
[
0
,
16
,
16
,
32
],
[
0
,
16
,
16
,
32
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
,
0
],
# begin paddings
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]],
dtype
=
tf
.
float32
)
[
0
,
0
,
0
,
0
]]],
dtype
=
tf
.
float32
)
refined_box_encodings
=
tf
.
zeros
(
refined_box_encodings
=
tf
.
zeros
(
...
...
research/object_detection/meta_architectures/rfcn_meta_arch.py
View file @
ff88581a
...
@@ -73,6 +73,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -73,6 +73,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_score_conversion_fn
,
second_stage_score_conversion_fn
,
second_stage_localization_loss_weight
,
second_stage_localization_loss_weight
,
second_stage_classification_loss_weight
,
second_stage_classification_loss_weight
,
second_stage_classification_loss
,
hard_example_miner
,
hard_example_miner
,
parallel_iterations
=
16
):
parallel_iterations
=
16
):
"""RFCNMetaArch Constructor.
"""RFCNMetaArch Constructor.
...
@@ -149,6 +150,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -149,6 +150,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
used to convert logits to probabilities.
used to convert logits to probabilities.
second_stage_localization_loss_weight: A float
second_stage_localization_loss_weight: A float
second_stage_classification_loss_weight: A float
second_stage_classification_loss_weight: A float
second_stage_classification_loss: A string indicating which loss function
to use, supports 'softmax' and 'sigmoid'.
hard_example_miner: A losses.HardExampleMiner object (can be None).
hard_example_miner: A losses.HardExampleMiner object (can be None).
parallel_iterations: (Optional) The number of iterations allowed to run
parallel_iterations: (Optional) The number of iterations allowed to run
in parallel for calls to tf.map_fn.
in parallel for calls to tf.map_fn.
...
@@ -185,6 +188,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -185,6 +188,8 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
second_stage_score_conversion_fn
,
second_stage_score_conversion_fn
,
second_stage_localization_loss_weight
,
second_stage_localization_loss_weight
,
second_stage_classification_loss_weight
,
second_stage_classification_loss_weight
,
second_stage_classification_loss
,
1.0
,
# second stage mask prediction loss weight isn't used in R-FCN.
hard_example_miner
,
hard_example_miner
,
parallel_iterations
)
parallel_iterations
)
...
@@ -198,10 +203,10 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -198,10 +203,10 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
"""Predicts the output tensors from 2nd stage of FasterRCNN.
"""Predicts the output tensors from 2nd stage of FasterRCNN.
Args:
Args:
rpn_box_encodings:
3
-D float tensor of shape
rpn_box_encodings:
4
-D float tensor of shape
[batch_size, num_valid_anchors, self._box_coder.code_size] containing
[batch_size, num_valid_anchors, self._box_coder.code_size] containing
predicted boxes.
predicted boxes.
rpn_objectness_predictions_with_background:
3
-D float tensor of shape
rpn_objectness_predictions_with_background:
2
-D float tensor of shape
[batch_size, num_valid_anchors, 2] containing class
[batch_size, num_valid_anchors, 2] containing class
predictions (logits) for each of the anchors. Note that this
predictions (logits) for each of the anchors. Note that this
tensor *includes* background class predictions (at class index 0).
tensor *includes* background class predictions (at class index 0).
...
@@ -225,13 +230,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -225,13 +230,22 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
Note that this tensor *includes* background class predictions
Note that this tensor *includes* background class predictions
(at class index 0).
(at class index 0).
3) num_proposals: An int32 tensor of shape [batch_size] representing the
3) num_proposals: An int32 tensor of shape [batch_size] representing the
number of proposals generated by the RPN.
`num_proposals` allows us
number of proposals generated by the RPN. `num_proposals` allows us
to keep track of which entries are to be treated as zero paddings and
to keep track of which entries are to be treated as zero paddings and
which are not since we always pad the number of proposals to be
which are not since we always pad the number of proposals to be
`self.max_num_proposals` for each image.
`self.max_num_proposals` for each image.
4) proposal_boxes: A float32 tensor of shape
4) proposal_boxes: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing
[batch_size, self.max_num_proposals, 4] representing
decoded proposal bounding boxes (in absolute coordinates).
decoded proposal bounding boxes (in absolute coordinates).
5) proposal_boxes_normalized: A float32 tensor of shape
[batch_size, self.max_num_proposals, 4] representing decoded proposal
bounding boxes (in normalized coordinates). Can be used to override
the boxes proposed by the RPN, thus enabling one to extract box
classification and prediction for externally selected areas of the
image.
6) box_classifier_features: a 4-D float32 tensor, of shape
[batch_size, feature_map_height, feature_map_width, depth],
representing the box classifier features.
"""
"""
proposal_boxes_normalized
,
_
,
num_proposals
=
self
.
_postprocess_rpn
(
proposal_boxes_normalized
,
_
,
num_proposals
=
self
.
_postprocess_rpn
(
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
rpn_box_encodings
,
rpn_objectness_predictions_with_background
,
...
@@ -263,5 +277,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
...
@@ -263,5 +277,7 @@ class RFCNMetaArch(faster_rcnn_meta_arch.FasterRCNNMetaArch):
class_predictions_with_background
,
class_predictions_with_background
,
'num_proposals'
:
num_proposals
,
'num_proposals'
:
num_proposals
,
'proposal_boxes'
:
absolute_proposal_boxes
,
'proposal_boxes'
:
absolute_proposal_boxes
,
'box_classifier_features'
:
box_classifier_features
,
'proposal_boxes_normalized'
:
proposal_boxes_normalized
,
}
}
return
prediction_dict
return
prediction_dict
research/object_detection/meta_architectures/rfcn_meta_arch_test.py
View file @
ff88581a
...
@@ -51,6 +51,15 @@ class RFCNMetaArchTest(
...
@@ -51,6 +51,15 @@ class RFCNMetaArchTest(
return
rfcn_meta_arch
.
RFCNMetaArch
(
return
rfcn_meta_arch
.
RFCNMetaArch
(
second_stage_rfcn_box_predictor
=
box_predictor
,
**
common_kwargs
)
second_stage_rfcn_box_predictor
=
box_predictor
,
**
common_kwargs
)
def
_get_box_classifier_features_shape
(
self
,
image_size
,
batch_size
,
max_num_proposals
,
initial_crop_size
,
maxpool_stride
,
num_features
):
return
(
batch_size
,
image_size
,
image_size
,
num_features
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/meta_architectures/ssd_meta_arch.py
View file @
ff88581a
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""SSD Meta-architecture definition.
"""SSD Meta-architecture definition.
General tensorflow implementation of convolutional Multibox/SSD detection
General tensorflow implementation of convolutional Multibox/SSD detection
...
@@ -29,6 +28,7 @@ from object_detection.core import model
...
@@ -29,6 +28,7 @@ from object_detection.core import model
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
target_assigner
from
object_detection.core
import
target_assigner
from
object_detection.utils
import
shape_utils
from
object_detection.utils
import
shape_utils
from
object_detection.utils
import
visualization_utils
slim
=
tf
.
contrib
.
slim
slim
=
tf
.
contrib
.
slim
...
@@ -37,13 +37,34 @@ class SSDFeatureExtractor(object):
...
@@ -37,13 +37,34 @@ class SSDFeatureExtractor(object):
"""SSD Feature Extractor definition."""
"""SSD Feature Extractor definition."""
def
__init__
(
self
,
def
__init__
(
self
,
is_training
,
depth_multiplier
,
depth_multiplier
,
min_depth
,
min_depth
,
pad_to_multiple
,
conv_hyperparams
,
conv_hyperparams
,
batch_norm_trainable
=
True
,
reuse_weights
=
None
):
reuse_weights
=
None
):
"""Constructor.
Args:
is_training: whether the network is in training mode.
depth_multiplier: float depth multiplier for feature extractor.
min_depth: minimum feature extractor depth.
pad_to_multiple: the nearest multiple to zero pad the input height and
width dimensions to.
conv_hyperparams: tf slim arg_scope for conv2d and separable_conv2d ops.
batch_norm_trainable: Whether to update batch norm parameters during
training or not. When training with a small batch size
(e.g. 1), it is desirable to disable batch norm update and use
pretrained batch norm params.
reuse_weights: whether to reuse variables. Default is None.
"""
self
.
_is_training
=
is_training
self
.
_depth_multiplier
=
depth_multiplier
self
.
_depth_multiplier
=
depth_multiplier
self
.
_min_depth
=
min_depth
self
.
_min_depth
=
min_depth
self
.
_pad_to_multiple
=
pad_to_multiple
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_batch_norm_trainable
=
batch_norm_trainable
self
.
_reuse_weights
=
reuse_weights
self
.
_reuse_weights
=
reuse_weights
@
abstractmethod
@
abstractmethod
...
@@ -101,9 +122,9 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -101,9 +122,9 @@ class SSDMetaArch(model.DetectionModel):
add_summaries
=
True
):
add_summaries
=
True
):
"""SSDMetaArch Constructor.
"""SSDMetaArch Constructor.
TODO: group NMS parameters + score converter into
TODO: group NMS parameters + score converter into
a class and loss
a class and loss
parameters into a class and write config protos for
parameters into a class and write config protos for
postprocessing
postprocessing
and losses.
and losses.
Args:
Args:
is_training: A boolean indicating whether the training version of the
is_training: A boolean indicating whether the training version of the
...
@@ -204,8 +225,8 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -204,8 +225,8 @@ class SSDMetaArch(model.DetectionModel):
if
inputs
.
dtype
is
not
tf
.
float32
:
if
inputs
.
dtype
is
not
tf
.
float32
:
raise
ValueError
(
'`preprocess` expects a tf.float32 tensor'
)
raise
ValueError
(
'`preprocess` expects a tf.float32 tensor'
)
with
tf
.
name_scope
(
'Preprocessor'
):
with
tf
.
name_scope
(
'Preprocessor'
):
# TODO: revisit whether to always use batch size as
the number of
# TODO: revisit whether to always use batch size as the number of
parallel
#
parallel
iterations vs allow for dynamic batching.
# iterations vs allow for dynamic batching.
resized_inputs
=
tf
.
map_fn
(
self
.
_image_resizer_fn
,
resized_inputs
=
tf
.
map_fn
(
self
.
_image_resizer_fn
,
elems
=
inputs
,
elems
=
inputs
,
dtype
=
tf
.
float32
)
dtype
=
tf
.
float32
)
...
@@ -226,7 +247,7 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -226,7 +247,7 @@ class SSDMetaArch(model.DetectionModel):
Returns:
Returns:
prediction_dict: a dictionary holding "raw" prediction tensors:
prediction_dict: a dictionary holding "raw" prediction tensors:
1) box_encodings:
3
-D float tensor of shape [batch_size, num_anchors,
1) box_encodings:
4
-D float tensor of shape [batch_size, num_anchors,
box_code_dimension] containing predicted boxes.
box_code_dimension] containing predicted boxes.
2) class_predictions_with_background: 3-D float tensor of shape
2) class_predictions_with_background: 3-D float tensor of shape
[batch_size, num_anchors, num_classes+1] containing class predictions
[batch_size, num_anchors, num_classes+1] containing class predictions
...
@@ -234,19 +255,26 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -234,19 +255,26 @@ class SSDMetaArch(model.DetectionModel):
background class predictions (at class index 0).
background class predictions (at class index 0).
3) feature_maps: a list of tensors where the ith tensor has shape
3) feature_maps: a list of tensors where the ith tensor has shape
[batch, height_i, width_i, depth_i].
[batch, height_i, width_i, depth_i].
4) anchors: 2-D float tensor of shape [num_anchors, 4] containing
the generated anchors in normalized coordinates.
"""
"""
with
tf
.
variable_scope
(
None
,
self
.
_extract_features_scope
,
with
tf
.
variable_scope
(
None
,
self
.
_extract_features_scope
,
[
preprocessed_inputs
]):
[
preprocessed_inputs
]):
feature_maps
=
self
.
_feature_extractor
.
extract_features
(
feature_maps
=
self
.
_feature_extractor
.
extract_features
(
preprocessed_inputs
)
preprocessed_inputs
)
feature_map_spatial_dims
=
self
.
_get_feature_map_spatial_dims
(
feature_maps
)
feature_map_spatial_dims
=
self
.
_get_feature_map_spatial_dims
(
feature_maps
)
self
.
_anchors
=
self
.
_anchor_generator
.
generate
(
feature_map_spatial_dims
)
image_shape
=
tf
.
shape
(
preprocessed_inputs
)
self
.
_anchors
=
self
.
_anchor_generator
.
generate
(
feature_map_spatial_dims
,
im_height
=
image_shape
[
1
],
im_width
=
image_shape
[
2
])
(
box_encodings
,
class_predictions_with_background
(
box_encodings
,
class_predictions_with_background
)
=
self
.
_add_box_predictions_to_feature_maps
(
feature_maps
)
)
=
self
.
_add_box_predictions_to_feature_maps
(
feature_maps
)
predictions_dict
=
{
predictions_dict
=
{
'box_encodings'
:
box_encodings
,
'box_encodings'
:
box_encodings
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'class_predictions_with_background'
:
class_predictions_with_background
,
'feature_maps'
:
feature_maps
'feature_maps'
:
feature_maps
,
'anchors'
:
self
.
_anchors
.
get
()
}
}
return
predictions_dict
return
predictions_dict
...
@@ -351,9 +379,11 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -351,9 +379,11 @@ class SSDMetaArch(model.DetectionModel):
Returns:
Returns:
detections: a dictionary containing the following fields
detections: a dictionary containing the following fields
detection_boxes: [batch, max_detection, 4]
detection_boxes: [batch, max_detection
s
, 4]
detection_scores: [batch, max_detections]
detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections]
detection_classes: [batch, max_detections]
detection_keypoints: [batch, max_detections, num_keypoints, 2] (if
encoded in the prediction_dict 'box_encodings')
num_detections: [batch]
num_detections: [batch]
Raises:
Raises:
ValueError: if prediction_dict does not contain `box_encodings` or
ValueError: if prediction_dict does not contain `box_encodings` or
...
@@ -365,7 +395,7 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -365,7 +395,7 @@ class SSDMetaArch(model.DetectionModel):
with
tf
.
name_scope
(
'Postprocessor'
):
with
tf
.
name_scope
(
'Postprocessor'
):
box_encodings
=
prediction_dict
[
'box_encodings'
]
box_encodings
=
prediction_dict
[
'box_encodings'
]
class_predictions
=
prediction_dict
[
'class_predictions_with_background'
]
class_predictions
=
prediction_dict
[
'class_predictions_with_background'
]
detection_boxes
=
self
.
_batch_decode
(
box_encodings
)
detection_boxes
,
detection_keypoints
=
self
.
_batch_decode
(
box_encodings
)
detection_boxes
=
tf
.
expand_dims
(
detection_boxes
,
axis
=
2
)
detection_boxes
=
tf
.
expand_dims
(
detection_boxes
,
axis
=
2
)
class_predictions_without_background
=
tf
.
slice
(
class_predictions
,
class_predictions_without_background
=
tf
.
slice
(
class_predictions
,
...
@@ -374,14 +404,25 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -374,14 +404,25 @@ class SSDMetaArch(model.DetectionModel):
detection_scores
=
self
.
_score_conversion_fn
(
detection_scores
=
self
.
_score_conversion_fn
(
class_predictions_without_background
)
class_predictions_without_background
)
clip_window
=
tf
.
constant
([
0
,
0
,
1
,
1
],
tf
.
float32
)
clip_window
=
tf
.
constant
([
0
,
0
,
1
,
1
],
tf
.
float32
)
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
_
,
additional_fields
=
None
num_detections
)
=
self
.
_non_max_suppression_fn
(
detection_boxes
,
if
detection_keypoints
is
not
None
:
detection_scores
,
additional_fields
=
{
clip_window
=
clip_window
)
fields
.
BoxListFields
.
keypoints
:
detection_keypoints
}
return
{
'detection_boxes'
:
nmsed_boxes
,
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
_
,
nmsed_additional_fields
,
'detection_scores'
:
nmsed_scores
,
num_detections
)
=
self
.
_non_max_suppression_fn
(
'detection_classes'
:
nmsed_classes
,
detection_boxes
,
'num_detections'
:
tf
.
to_float
(
num_detections
)}
detection_scores
,
clip_window
=
clip_window
,
additional_fields
=
additional_fields
)
detection_dict
=
{
'detection_boxes'
:
nmsed_boxes
,
'detection_scores'
:
nmsed_scores
,
'detection_classes'
:
nmsed_classes
,
'num_detections'
:
tf
.
to_float
(
num_detections
)}
if
(
nmsed_additional_fields
is
not
None
and
fields
.
BoxListFields
.
keypoints
in
nmsed_additional_fields
):
detection_dict
[
'detection_keypoints'
]
=
nmsed_additional_fields
[
fields
.
BoxListFields
.
keypoints
]
return
detection_dict
def
loss
(
self
,
prediction_dict
,
scope
=
None
):
def
loss
(
self
,
prediction_dict
,
scope
=
None
):
"""Compute scalar loss tensors with respect to provided groundtruth.
"""Compute scalar loss tensors with respect to provided groundtruth.
...
@@ -395,7 +436,7 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -395,7 +436,7 @@ class SSDMetaArch(model.DetectionModel):
box_code_dimension] containing predicted boxes.
box_code_dimension] containing predicted boxes.
2) class_predictions_with_background: 3-D float tensor of shape
2) class_predictions_with_background: 3-D float tensor of shape
[batch_size, num_anchors, num_classes+1] containing class predictions
[batch_size, num_anchors, num_classes+1] containing class predictions
(logits) for each of the anchors.
Note that this tensor *includes*
(logits) for each of the anchors. Note that this tensor *includes*
background class predictions.
background class predictions.
scope: Optional scope name.
scope: Optional scope name.
...
@@ -405,10 +446,14 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -405,10 +446,14 @@ class SSDMetaArch(model.DetectionModel):
values.
values.
"""
"""
with
tf
.
name_scope
(
scope
,
'Loss'
,
prediction_dict
.
values
()):
with
tf
.
name_scope
(
scope
,
'Loss'
,
prediction_dict
.
values
()):
keypoints
=
None
if
self
.
groundtruth_has_field
(
fields
.
BoxListFields
.
keypoints
):
keypoints
=
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
keypoints
)
(
batch_cls_targets
,
batch_cls_weights
,
batch_reg_targets
,
(
batch_cls_targets
,
batch_cls_weights
,
batch_reg_targets
,
batch_reg_weights
,
match_list
)
=
self
.
_assign_targets
(
batch_reg_weights
,
match_list
)
=
self
.
_assign_targets
(
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
classes
))
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
classes
),
keypoints
)
if
self
.
_add_summaries
:
if
self
.
_add_summaries
:
self
.
_summarize_input
(
self
.
_summarize_input
(
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
match_list
)
self
.
groundtruth_lists
(
fields
.
BoxListFields
.
boxes
),
match_list
)
...
@@ -417,35 +462,60 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -417,35 +462,60 @@ class SSDMetaArch(model.DetectionModel):
location_losses
=
self
.
_localization_loss
(
location_losses
=
self
.
_localization_loss
(
prediction_dict
[
'box_encodings'
],
prediction_dict
[
'box_encodings'
],
batch_reg_targets
,
batch_reg_targets
,
ignore_nan_targets
=
True
,
weights
=
batch_reg_weights
)
weights
=
batch_reg_weights
)
cls_losses
=
self
.
_classification_loss
(
cls_losses
=
self
.
_classification_loss
(
prediction_dict
[
'class_predictions_with_background'
],
prediction_dict
[
'class_predictions_with_background'
],
batch_cls_targets
,
batch_cls_targets
,
weights
=
batch_cls_weights
)
weights
=
batch_cls_weights
)
# Optionally apply hard mining on top of loss values
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
if
self
.
_hard_example_miner
:
if
self
.
_hard_example_miner
:
(
localization_loss
,
classification_loss
)
=
self
.
_apply_hard_mining
(
(
localization_loss
,
classification_loss
)
=
self
.
_apply_hard_mining
(
location_losses
,
cls_losses
,
prediction_dict
,
match_list
)
location_losses
,
cls_losses
,
prediction_dict
,
match_list
)
if
self
.
_add_summaries
:
if
self
.
_add_summaries
:
self
.
_hard_example_miner
.
summarize
()
self
.
_hard_example_miner
.
summarize
()
else
:
if
self
.
_add_summaries
:
class_ids
=
tf
.
argmax
(
batch_cls_targets
,
axis
=
2
)
flattened_class_ids
=
tf
.
reshape
(
class_ids
,
[
-
1
])
flattened_classification_losses
=
tf
.
reshape
(
cls_losses
,
[
-
1
])
self
.
_summarize_anchor_classification_loss
(
flattened_class_ids
,
flattened_classification_losses
)
localization_loss
=
tf
.
reduce_sum
(
location_losses
)
classification_loss
=
tf
.
reduce_sum
(
cls_losses
)
# Optionally normalize by number of positive matches
# Optionally normalize by number of positive matches
normalizer
=
tf
.
constant
(
1.0
,
dtype
=
tf
.
float32
)
normalizer
=
tf
.
constant
(
1.0
,
dtype
=
tf
.
float32
)
if
self
.
_normalize_loss_by_num_matches
:
if
self
.
_normalize_loss_by_num_matches
:
normalizer
=
tf
.
maximum
(
tf
.
to_float
(
tf
.
reduce_sum
(
num_matches
)),
1.0
)
normalizer
=
tf
.
maximum
(
tf
.
to_float
(
tf
.
reduce_sum
(
num_matches
)),
1.0
)
with
tf
.
name_scope
(
'localization_loss'
):
localization_loss
=
((
self
.
_localization_loss_weight
/
normalizer
)
*
localization_loss
)
with
tf
.
name_scope
(
'classification_loss'
):
classification_loss
=
((
self
.
_classification_loss_weight
/
normalizer
)
*
classification_loss
)
loss_dict
=
{
loss_dict
=
{
'localization_loss'
:
(
self
.
_localization_loss_weight
/
normalizer
)
*
'localization_loss'
:
localization_loss
,
localization_loss
,
'classification_loss'
:
classification_loss
'classification_loss'
:
(
self
.
_classification_loss_weight
/
normalizer
)
*
classification_loss
}
}
return
loss_dict
return
loss_dict
def
_assign_targets
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
):
def
_summarize_anchor_classification_loss
(
self
,
class_ids
,
cls_losses
):
positive_indices
=
tf
.
where
(
tf
.
greater
(
class_ids
,
0
))
positive_anchor_cls_loss
=
tf
.
squeeze
(
tf
.
gather
(
cls_losses
,
positive_indices
),
axis
=
1
)
visualization_utils
.
add_cdf_image_summary
(
positive_anchor_cls_loss
,
'PositiveAnchorLossCDF'
)
negative_indices
=
tf
.
where
(
tf
.
equal
(
class_ids
,
0
))
negative_anchor_cls_loss
=
tf
.
squeeze
(
tf
.
gather
(
cls_losses
,
negative_indices
),
axis
=
1
)
visualization_utils
.
add_cdf_image_summary
(
negative_anchor_cls_loss
,
'NegativeAnchorLossCDF'
)
def
_assign_targets
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_keypoints_list
=
None
):
"""Assign groundtruth targets.
"""Assign groundtruth targets.
Adds a background class to each one-hot encoding of groundtruth classes
Adds a background class to each one-hot encoding of groundtruth classes
...
@@ -460,6 +530,8 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -460,6 +530,8 @@ class SSDMetaArch(model.DetectionModel):
groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of
groundtruth_classes_list: a list of 2-D one-hot (or k-hot) tensors of
shape [num_boxes, num_classes] containing the class targets with the 0th
shape [num_boxes, num_classes] containing the class targets with the 0th
index assumed to map to the first non-background class.
index assumed to map to the first non-background class.
groundtruth_keypoints_list: (optional) a list of 3-D tensors of shape
[num_boxes, num_keypoints, 2]
Returns:
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
...
@@ -480,6 +552,10 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -480,6 +552,10 @@ class SSDMetaArch(model.DetectionModel):
tf
.
pad
(
one_hot_encoding
,
[[
0
,
0
],
[
1
,
0
]],
mode
=
'CONSTANT'
)
tf
.
pad
(
one_hot_encoding
,
[[
0
,
0
],
[
1
,
0
]],
mode
=
'CONSTANT'
)
for
one_hot_encoding
in
groundtruth_classes_list
for
one_hot_encoding
in
groundtruth_classes_list
]
]
if
groundtruth_keypoints_list
is
not
None
:
for
boxlist
,
keypoints
in
zip
(
groundtruth_boxlists
,
groundtruth_keypoints_list
):
boxlist
.
add_field
(
fields
.
BoxListFields
.
keypoints
,
keypoints
)
return
target_assigner
.
batch_assign_targets
(
return
target_assigner
.
batch_assign_targets
(
self
.
_target_assigner
,
self
.
anchors
,
groundtruth_boxlists
,
self
.
_target_assigner
,
self
.
anchors
,
groundtruth_boxlists
,
groundtruth_classes_with_background_list
)
groundtruth_classes_with_background_list
)
...
@@ -544,12 +620,11 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -544,12 +620,11 @@ class SSDMetaArch(model.DetectionModel):
mined_cls_loss: a float scalar with sum of classification losses from
mined_cls_loss: a float scalar with sum of classification losses from
selected hard examples.
selected hard examples.
"""
"""
class_pred_shape
=
[
-
1
,
self
.
anchors
.
num_boxes_static
(),
self
.
num_classes
]
class_predictions
=
tf
.
slice
(
class_predictions
=
tf
.
reshape
(
prediction_dict
[
'class_predictions_with_background'
],
[
0
,
0
,
tf
.
slice
(
prediction_dict
[
'class_predictions_with_background'
],
1
],
[
-
1
,
-
1
,
-
1
])
[
0
,
0
,
1
],
class_pred_shape
),
class_pred_shape
)
decoded_boxes
=
self
.
_batch_decode
(
prediction_dict
[
'box_encodings'
])
decoded_boxes
,
_
=
self
.
_batch_decode
(
prediction_dict
[
'box_encodings'
])
decoded_box_tensors_list
=
tf
.
unstack
(
decoded_boxes
)
decoded_box_tensors_list
=
tf
.
unstack
(
decoded_boxes
)
class_prediction_list
=
tf
.
unstack
(
class_predictions
)
class_prediction_list
=
tf
.
unstack
(
class_predictions
)
decoded_boxlist_list
=
[]
decoded_boxlist_list
=
[]
...
@@ -574,6 +649,9 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -574,6 +649,9 @@ class SSDMetaArch(model.DetectionModel):
Returns:
Returns:
decoded_boxes: A float32 tensor of shape
decoded_boxes: A float32 tensor of shape
[batch_size, num_anchors, 4] containing the decoded boxes.
[batch_size, num_anchors, 4] containing the decoded boxes.
decoded_keypoints: A float32 tensor of shape
[batch_size, num_anchors, num_keypoints, 2] containing the decoded
keypoints if present in the input `box_encodings`, None otherwise.
"""
"""
combined_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
combined_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
box_encodings
)
box_encodings
)
...
@@ -581,13 +659,21 @@ class SSDMetaArch(model.DetectionModel):
...
@@ -581,13 +659,21 @@ class SSDMetaArch(model.DetectionModel):
tiled_anchor_boxes
=
tf
.
tile
(
tiled_anchor_boxes
=
tf
.
tile
(
tf
.
expand_dims
(
self
.
anchors
.
get
(),
0
),
[
batch_size
,
1
,
1
])
tf
.
expand_dims
(
self
.
anchors
.
get
(),
0
),
[
batch_size
,
1
,
1
])
tiled_anchors_boxlist
=
box_list
.
BoxList
(
tiled_anchors_boxlist
=
box_list
.
BoxList
(
tf
.
reshape
(
tiled_anchor_boxes
,
[
-
1
,
self
.
_box_coder
.
code_size
]))
tf
.
reshape
(
tiled_anchor_boxes
,
[
-
1
,
4
]))
decoded_boxes
=
self
.
_box_coder
.
decode
(
decoded_boxes
=
self
.
_box_coder
.
decode
(
tf
.
reshape
(
box_encodings
,
[
-
1
,
self
.
_box_coder
.
code_size
]),
tf
.
reshape
(
box_encodings
,
[
-
1
,
self
.
_box_coder
.
code_size
]),
tiled_anchors_boxlist
)
tiled_anchors_boxlist
)
return
tf
.
reshape
(
decoded_boxes
.
get
(),
decoded_keypoints
=
None
tf
.
stack
([
combined_shape
[
0
],
combined_shape
[
1
],
if
decoded_boxes
.
has_field
(
fields
.
BoxListFields
.
keypoints
):
4
]))
decoded_keypoints
=
decoded_boxes
.
get_field
(
fields
.
BoxListFields
.
keypoints
)
num_keypoints
=
decoded_keypoints
.
get_shape
()[
1
]
decoded_keypoints
=
tf
.
reshape
(
decoded_keypoints
,
tf
.
stack
([
combined_shape
[
0
],
combined_shape
[
1
],
num_keypoints
,
2
]))
decoded_boxes
=
tf
.
reshape
(
decoded_boxes
.
get
(),
tf
.
stack
(
[
combined_shape
[
0
],
combined_shape
[
1
],
4
]))
return
decoded_boxes
,
decoded_keypoints
def
restore_map
(
self
,
from_detection_checkpoint
=
True
):
def
restore_map
(
self
,
from_detection_checkpoint
=
True
):
"""Returns a map of variables to load from a foreign checkpoint.
"""Returns a map of variables to load from a foreign checkpoint.
...
...
research/object_detection/meta_architectures/ssd_meta_arch_test.py
View file @
ff88581a
...
@@ -18,7 +18,6 @@ import functools
...
@@ -18,7 +18,6 @@ import functools
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
tensorflow.python.training
import
saver
as
tf_saver
from
object_detection.core
import
anchor_generator
from
object_detection.core
import
anchor_generator
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list
from
object_detection.core
import
losses
from
object_detection.core
import
losses
...
@@ -34,7 +33,12 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
...
@@ -34,7 +33,12 @@ class FakeSSDFeatureExtractor(ssd_meta_arch.SSDFeatureExtractor):
def
__init__
(
self
):
def
__init__
(
self
):
super
(
FakeSSDFeatureExtractor
,
self
).
__init__
(
super
(
FakeSSDFeatureExtractor
,
self
).
__init__
(
depth_multiplier
=
0
,
min_depth
=
0
,
conv_hyperparams
=
None
)
is_training
=
True
,
depth_multiplier
=
0
,
min_depth
=
0
,
pad_to_multiple
=
1
,
batch_norm_trainable
=
True
,
conv_hyperparams
=
None
)
def
preprocess
(
self
,
resized_inputs
):
def
preprocess
(
self
,
resized_inputs
):
return
tf
.
identity
(
resized_inputs
)
return
tf
.
identity
(
resized_inputs
)
...
@@ -55,7 +59,7 @@ class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
...
@@ -55,7 +59,7 @@ class MockAnchorGenerator2x2(anchor_generator.AnchorGenerator):
def
num_anchors_per_location
(
self
):
def
num_anchors_per_location
(
self
):
return
[
1
]
return
[
1
]
def
_generate
(
self
,
feature_map_shape_list
):
def
_generate
(
self
,
feature_map_shape_list
,
im_height
,
im_width
):
return
box_list
.
BoxList
(
return
box_list
.
BoxList
(
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
tf
.
constant
([[
0
,
0
,
.
5
,
.
5
],
[
0
,
.
5
,
.
5
,
1
],
[
0
,
.
5
,
.
5
,
1
],
...
@@ -147,6 +151,7 @@ class SsdMetaArchTest(tf.test.TestCase):
...
@@ -147,6 +151,7 @@ class SsdMetaArchTest(tf.test.TestCase):
self
.
assertTrue
(
'box_encodings'
in
prediction_dict
)
self
.
assertTrue
(
'box_encodings'
in
prediction_dict
)
self
.
assertTrue
(
'class_predictions_with_background'
in
prediction_dict
)
self
.
assertTrue
(
'class_predictions_with_background'
in
prediction_dict
)
self
.
assertTrue
(
'feature_maps'
in
prediction_dict
)
self
.
assertTrue
(
'feature_maps'
in
prediction_dict
)
self
.
assertTrue
(
'anchors'
in
prediction_dict
)
init_op
=
tf
.
global_variables_initializer
()
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
(
graph
=
tf_graph
)
as
sess
:
with
self
.
test_session
(
graph
=
tf_graph
)
as
sess
:
...
@@ -242,7 +247,7 @@ class SsdMetaArchTest(tf.test.TestCase):
...
@@ -242,7 +247,7 @@ class SsdMetaArchTest(tf.test.TestCase):
def
test_restore_map_for_detection_ckpt
(
self
):
def
test_restore_map_for_detection_ckpt
(
self
):
init_op
=
tf
.
global_variables_initializer
()
init_op
=
tf
.
global_variables_initializer
()
saver
=
tf
_saver
.
Saver
()
saver
=
tf
.
train
.
Saver
()
save_path
=
self
.
get_temp_dir
()
save_path
=
self
.
get_temp_dir
()
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
sess
.
run
(
init_op
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment