Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
f282f6ef
Commit
f282f6ef
authored
Jul 05, 2017
by
Alexander Gorban
Browse files
Merge branch 'master' of github.com:tensorflow/models
parents
58a5da7b
a2970b03
Changes
302
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
6620 additions
and
0 deletions
+6620
-0
object_detection/core/box_list.py
object_detection/core/box_list.py
+207
-0
object_detection/core/box_list_ops.py
object_detection/core/box_list_ops.py
+975
-0
object_detection/core/box_list_ops_test.py
object_detection/core/box_list_ops_test.py
+962
-0
object_detection/core/box_list_test.py
object_detection/core/box_list_test.py
+134
-0
object_detection/core/box_predictor.py
object_detection/core/box_predictor.py
+546
-0
object_detection/core/box_predictor_test.py
object_detection/core/box_predictor_test.py
+323
-0
object_detection/core/data_decoder.py
object_detection/core/data_decoder.py
+41
-0
object_detection/core/keypoint_ops.py
object_detection/core/keypoint_ops.py
+231
-0
object_detection/core/keypoint_ops_test.py
object_detection/core/keypoint_ops_test.py
+168
-0
object_detection/core/losses.py
object_detection/core/losses.py
+551
-0
object_detection/core/losses_test.py
object_detection/core/losses_test.py
+562
-0
object_detection/core/matcher.py
object_detection/core/matcher.py
+213
-0
object_detection/core/matcher_test.py
object_detection/core/matcher_test.py
+150
-0
object_detection/core/minibatch_sampler.py
object_detection/core/minibatch_sampler.py
+90
-0
object_detection/core/minibatch_sampler_test.py
object_detection/core/minibatch_sampler_test.py
+82
-0
object_detection/core/model.py
object_detection/core/model.py
+252
-0
object_detection/core/post_processing.py
object_detection/core/post_processing.py
+298
-0
object_detection/core/post_processing_test.py
object_detection/core/post_processing_test.py
+673
-0
object_detection/core/prefetcher.py
object_detection/core/prefetcher.py
+61
-0
object_detection/core/prefetcher_test.py
object_detection/core/prefetcher_test.py
+101
-0
No files found.
Too many changes to show.
To preserve performance only
302 of 302+
files are displayed.
Plain diff
Email patch
object_detection/core/box_list.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List definition.
BoxList represents a list of bounding boxes as tensorflow
tensors, where each bounding box is represented as a row of 4 numbers,
[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes
within a given list correspond to a single image. See also
box_list_ops.py for common box related operations (such as area, iou, etc).
Optionally, users can add additional related fields (such as weights).
We assume the following things to be true about fields:
* they correspond to boxes in the box_list along the 0th dimension
* they have inferrable rank at graph construction time
* all dimensions except for possibly the 0th can be inferred
(i.e., not None) at graph construction time.
Some other notes:
* Following tensorflow conventions, we use height, width ordering,
and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
* Tensors are always provided as (flat) [N, 4] tensors.
"""
import
tensorflow
as
tf
class
BoxList
(
object
):
"""Box collection."""
def
__init__
(
self
,
boxes
):
"""Constructs box collection.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data or if bbox data is not in
float32 format.
"""
if
len
(
boxes
.
get_shape
())
!=
2
or
boxes
.
get_shape
()[
-
1
]
!=
4
:
raise
ValueError
(
'Invalid dimensions for box data.'
)
if
boxes
.
dtype
!=
tf
.
float32
:
raise
ValueError
(
'Invalid tensor type: should be tf.float32'
)
self
.
data
=
{
'boxes'
:
boxes
}
def
num_boxes
(
self
):
"""Returns number of boxes held in collection.
Returns:
a tensor representing the number of boxes held in the collection.
"""
return
tf
.
shape
(
self
.
data
[
'boxes'
])[
0
]
def
num_boxes_static
(
self
):
"""Returns number of boxes held in collection.
This number is inferred at graph construction time rather than run-time.
Returns:
Number of boxes held in collection (integer) or None if this is not
inferrable at graph construction time.
"""
return
self
.
data
[
'boxes'
].
get_shape
()[
0
].
value
def
get_all_fields
(
self
):
"""Returns all fields."""
return
self
.
data
.
keys
()
def
get_extra_fields
(
self
):
"""Returns all non-box fields (i.e., everything not named 'boxes')."""
return
[
k
for
k
in
self
.
data
.
keys
()
if
k
!=
'boxes'
]
def
add_field
(
self
,
field
,
field_data
):
"""Add field to box list.
This method can be used to add related box data such as
weights/labels, etc.
Args:
field: a string key to access the data via `get`
field_data: a tensor containing the data to store in the BoxList
"""
self
.
data
[
field
]
=
field_data
def
has_field
(
self
,
field
):
return
field
in
self
.
data
def
get
(
self
):
"""Convenience function for accessing box coordinates.
Returns:
a tensor with shape [N, 4] representing box coordinates.
"""
return
self
.
get_field
(
'boxes'
)
def
set
(
self
,
boxes
):
"""Convenience function for setting box coordinates.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data
"""
if
len
(
boxes
.
get_shape
())
!=
2
or
boxes
.
get_shape
()[
-
1
]
!=
4
:
raise
ValueError
(
'Invalid dimensions for box data.'
)
self
.
data
[
'boxes'
]
=
boxes
def
get_field
(
self
,
field
):
"""Accesses a box collection and associated fields.
This function returns specified field with object; if no field is specified,
it returns the box coordinates.
Args:
field: this optional string parameter can be used to specify
a related field to be accessed.
Returns:
a tensor representing the box collection or an associated field.
Raises:
ValueError: if invalid field
"""
if
not
self
.
has_field
(
field
):
raise
ValueError
(
'field '
+
str
(
field
)
+
' does not exist'
)
return
self
.
data
[
field
]
def
set_field
(
self
,
field
,
value
):
"""Sets the value of a field.
Updates the field of a box_list with a given value.
Args:
field: (string) name of the field to set value.
value: the value to assign to the field.
Raises:
ValueError: if the box_list does not have specified field.
"""
if
not
self
.
has_field
(
field
):
raise
ValueError
(
'field %s does not exist'
%
field
)
self
.
data
[
field
]
=
value
def
get_center_coordinates_and_sizes
(
self
,
scope
=
None
):
"""Computes the center coordinates, height and width of the boxes.
Args:
scope: name scope of the function.
Returns:
a list of 4 1-D tensors [ycenter, xcenter, height, width].
"""
with
tf
.
name_scope
(
scope
,
'get_center_coordinates_and_sizes'
):
box_corners
=
self
.
get
()
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
unstack
(
tf
.
transpose
(
box_corners
))
width
=
xmax
-
xmin
height
=
ymax
-
ymin
ycenter
=
ymin
+
height
/
2.
xcenter
=
xmin
+
width
/
2.
return
[
ycenter
,
xcenter
,
height
,
width
]
def
transpose_coordinates
(
self
,
scope
=
None
):
"""Transpose the coordinate representation in a boxlist.
Args:
scope: name scope of the function.
"""
with
tf
.
name_scope
(
scope
,
'transpose_coordinates'
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
self
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
self
.
set
(
tf
.
concat
([
x_min
,
y_min
,
x_max
,
y_max
],
1
))
def
as_tensor_dict
(
self
,
fields
=
None
):
"""Retrieves specified fields as a dictionary of tensors.
Args:
fields: (optional) list of fields to return in the dictionary.
If None (default), all fields are returned.
Returns:
tensor_dict: A dictionary of tensors specified by fields.
Raises:
ValueError: if specified field is not contained in boxlist.
"""
tensor_dict
=
{}
if
fields
is
None
:
fields
=
self
.
get_all_fields
()
for
field
in
fields
:
if
not
self
.
has_field
(
field
):
raise
ValueError
(
'boxlist must contain all specified fields'
)
tensor_dict
[
field
]
=
self
.
get_field
(
field
)
return
tensor_dict
object_detection/core/box_list_ops.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List operations.
Example box operations that are supported:
* areas: compute bounding box areas
* iou: pairwise intersection-over-union scores
* sq_dist: pairwise distances between bounding boxes
Whenever box_list_ops functions output a BoxList, the fields of the incoming
BoxList are retained unless documented otherwise.
"""
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.utils
import
shape_utils
class
SortOrder
(
object
):
"""Enum class for sort order.
Attributes:
ascend: ascend order.
descend: descend order.
"""
ascend
=
1
descend
=
2
def
area
(
boxlist
,
scope
=
None
):
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
with
tf
.
name_scope
(
scope
,
'Area'
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
return
tf
.
squeeze
((
y_max
-
y_min
)
*
(
x_max
-
x_min
),
[
1
])
def
height_width
(
boxlist
,
scope
=
None
):
"""Computes height and width of boxes in boxlist.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
Height: A tensor with shape [N] representing box heights.
Width: A tensor with shape [N] representing box widths.
"""
with
tf
.
name_scope
(
scope
,
'HeightWidth'
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
return
tf
.
squeeze
(
y_max
-
y_min
,
[
1
]),
tf
.
squeeze
(
x_max
-
x_min
,
[
1
])
def
scale
(
boxlist
,
y_scale
,
x_scale
,
scope
=
None
):
"""scale box coordinates in x and y dimensions.
Args:
boxlist: BoxList holding N boxes
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
boxlist: BoxList holding N boxes
"""
with
tf
.
name_scope
(
scope
,
'Scale'
):
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
y_min
=
y_scale
*
y_min
y_max
=
y_scale
*
y_max
x_min
=
x_scale
*
x_min
x_max
=
x_scale
*
x_max
scaled_boxlist
=
box_list
.
BoxList
(
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
1
))
return
_copy_extra_fields
(
scaled_boxlist
,
boxlist
)
def
clip_to_window
(
boxlist
,
window
,
filter_nonoverlapping
=
True
,
scope
=
None
):
"""Clip bounding boxes to a window.
This op clips any input bounding boxes (represented by bounding box
corners) to a window, optionally filtering out boxes that do not
overlap at all with the window.
Args:
boxlist: BoxList holding M_in boxes
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window to which the op should clip boxes.
filter_nonoverlapping: whether to filter out boxes that do not overlap at
all with the window.
scope: name scope.
Returns:
a BoxList holding M_out boxes where M_out <= M_in
"""
with
tf
.
name_scope
(
scope
,
'ClipToWindow'
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
y_min_clipped
=
tf
.
maximum
(
tf
.
minimum
(
y_min
,
win_y_max
),
win_y_min
)
y_max_clipped
=
tf
.
maximum
(
tf
.
minimum
(
y_max
,
win_y_max
),
win_y_min
)
x_min_clipped
=
tf
.
maximum
(
tf
.
minimum
(
x_min
,
win_x_max
),
win_x_min
)
x_max_clipped
=
tf
.
maximum
(
tf
.
minimum
(
x_max
,
win_x_max
),
win_x_min
)
clipped
=
box_list
.
BoxList
(
tf
.
concat
([
y_min_clipped
,
x_min_clipped
,
y_max_clipped
,
x_max_clipped
],
1
))
clipped
=
_copy_extra_fields
(
clipped
,
boxlist
)
if
filter_nonoverlapping
:
areas
=
area
(
clipped
)
nonzero_area_indices
=
tf
.
cast
(
tf
.
reshape
(
tf
.
where
(
tf
.
greater
(
areas
,
0.0
)),
[
-
1
]),
tf
.
int32
)
clipped
=
gather
(
clipped
,
nonzero_area_indices
)
return
clipped
def
prune_outside_window
(
boxlist
,
window
,
scope
=
None
):
"""Prunes bounding boxes that fall outside a given window.
This function prunes bounding boxes that even partially fall outside the given
window. See also clip_to_window which only prunes bounding boxes that fall
completely outside the window, and clips any bounding boxes that partially
overflow.
Args:
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of the window
scope: name scope.
Returns:
pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
with
tf
.
name_scope
(
scope
,
'PruneOutsideWindow'
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
coordinate_violations
=
tf
.
concat
([
tf
.
less
(
y_min
,
win_y_min
),
tf
.
less
(
x_min
,
win_x_min
),
tf
.
greater
(
y_max
,
win_y_max
),
tf
.
greater
(
x_max
,
win_x_max
)
],
1
)
valid_indices
=
tf
.
reshape
(
tf
.
where
(
tf
.
logical_not
(
tf
.
reduce_any
(
coordinate_violations
,
1
))),
[
-
1
])
return
gather
(
boxlist
,
valid_indices
),
valid_indices
def
prune_completely_outside_window
(
boxlist
,
window
,
scope
=
None
):
"""Prunes bounding boxes that fall completely outside of the given window.
The function clip_to_window prunes bounding boxes that fall
completely outside the window, but also clips any bounding boxes that
partially overflow. This function does not clip partially overflowing boxes.
Args:
boxlist: a BoxList holding M_in boxes.
window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
of the window
scope: name scope.
Returns:
pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
with
tf
.
name_scope
(
scope
,
'PruneCompleteleyOutsideWindow'
):
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
coordinate_violations
=
tf
.
concat
([
tf
.
greater_equal
(
y_min
,
win_y_max
),
tf
.
greater_equal
(
x_min
,
win_x_max
),
tf
.
less_equal
(
y_max
,
win_y_min
),
tf
.
less_equal
(
x_max
,
win_x_min
)
],
1
)
valid_indices
=
tf
.
reshape
(
tf
.
where
(
tf
.
logical_not
(
tf
.
reduce_any
(
coordinate_violations
,
1
))),
[
-
1
])
return
gather
(
boxlist
,
valid_indices
),
valid_indices
def
intersection
(
boxlist1
,
boxlist2
,
scope
=
None
):
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
with
tf
.
name_scope
(
scope
,
'Intersection'
):
y_min1
,
x_min1
,
y_max1
,
x_max1
=
tf
.
split
(
value
=
boxlist1
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
y_min2
,
x_min2
,
y_max2
,
x_max2
=
tf
.
split
(
value
=
boxlist2
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
all_pairs_min_ymax
=
tf
.
minimum
(
y_max1
,
tf
.
transpose
(
y_max2
))
all_pairs_max_ymin
=
tf
.
maximum
(
y_min1
,
tf
.
transpose
(
y_min2
))
intersect_heights
=
tf
.
maximum
(
0.0
,
all_pairs_min_ymax
-
all_pairs_max_ymin
)
all_pairs_min_xmax
=
tf
.
minimum
(
x_max1
,
tf
.
transpose
(
x_max2
))
all_pairs_max_xmin
=
tf
.
maximum
(
x_min1
,
tf
.
transpose
(
x_min2
))
intersect_widths
=
tf
.
maximum
(
0.0
,
all_pairs_min_xmax
-
all_pairs_max_xmin
)
return
intersect_heights
*
intersect_widths
def
matched_intersection
(
boxlist1
,
boxlist2
,
scope
=
None
):
"""Compute intersection areas between corresponding boxes in two boxlists.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing pairwise intersections
"""
with
tf
.
name_scope
(
scope
,
'MatchedIntersection'
):
y_min1
,
x_min1
,
y_max1
,
x_max1
=
tf
.
split
(
value
=
boxlist1
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
y_min2
,
x_min2
,
y_max2
,
x_max2
=
tf
.
split
(
value
=
boxlist2
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
min_ymax
=
tf
.
minimum
(
y_max1
,
y_max2
)
max_ymin
=
tf
.
maximum
(
y_min1
,
y_min2
)
intersect_heights
=
tf
.
maximum
(
0.0
,
min_ymax
-
max_ymin
)
min_xmax
=
tf
.
minimum
(
x_max1
,
x_max2
)
max_xmin
=
tf
.
maximum
(
x_min1
,
x_min2
)
intersect_widths
=
tf
.
maximum
(
0.0
,
min_xmax
-
max_xmin
)
return
tf
.
reshape
(
intersect_heights
*
intersect_widths
,
[
-
1
])
def
iou
(
boxlist1
,
boxlist2
,
scope
=
None
):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
with
tf
.
name_scope
(
scope
,
'IOU'
):
intersections
=
intersection
(
boxlist1
,
boxlist2
)
areas1
=
area
(
boxlist1
)
areas2
=
area
(
boxlist2
)
unions
=
(
tf
.
expand_dims
(
areas1
,
1
)
+
tf
.
expand_dims
(
areas2
,
0
)
-
intersections
)
return
tf
.
where
(
tf
.
equal
(
intersections
,
0.0
),
tf
.
zeros_like
(
intersections
),
tf
.
truediv
(
intersections
,
unions
))
def
matched_iou
(
boxlist1
,
boxlist2
,
scope
=
None
):
"""Compute intersection-over-union between corresponding boxes in boxlists.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing pairwise iou scores.
"""
with
tf
.
name_scope
(
scope
,
'MatchedIOU'
):
intersections
=
matched_intersection
(
boxlist1
,
boxlist2
)
areas1
=
area
(
boxlist1
)
areas2
=
area
(
boxlist2
)
unions
=
areas1
+
areas2
-
intersections
return
tf
.
where
(
tf
.
equal
(
intersections
,
0.0
),
tf
.
zeros_like
(
intersections
),
tf
.
truediv
(
intersections
,
unions
))
def
ioa
(
boxlist1
,
boxlist2
,
scope
=
None
):
"""Computes pairwise intersection-over-area between box collections.
intersection-over-area (IOA) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, ioa(box1, box2) != ioa(box2, box1).
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise ioa scores.
"""
with
tf
.
name_scope
(
scope
,
'IOA'
):
intersections
=
intersection
(
boxlist1
,
boxlist2
)
areas
=
tf
.
expand_dims
(
area
(
boxlist2
),
0
)
return
tf
.
truediv
(
intersections
,
areas
)
def
prune_non_overlapping_boxes
(
boxlist1
,
boxlist2
,
min_overlap
=
0.0
,
scope
=
None
):
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
For each box in boxlist1, we want its IOA to be more than minoverlap with
at least one of the boxes in boxlist2. If it does not, we remove it.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
min_overlap: Minimum required overlap between boxes, to count them as
overlapping.
scope: name scope.
Returns:
new_boxlist1: A pruned boxlist with size [N', 4].
keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
first input BoxList `boxlist1`.
"""
with
tf
.
name_scope
(
scope
,
'PruneNonOverlappingBoxes'
):
ioa_
=
ioa
(
boxlist2
,
boxlist1
)
# [M, N] tensor
ioa_
=
tf
.
reduce_max
(
ioa_
,
reduction_indices
=
[
0
])
# [N] tensor
keep_bool
=
tf
.
greater_equal
(
ioa_
,
tf
.
constant
(
min_overlap
))
keep_inds
=
tf
.
squeeze
(
tf
.
where
(
keep_bool
),
squeeze_dims
=
[
1
])
new_boxlist1
=
gather
(
boxlist1
,
keep_inds
)
return
new_boxlist1
,
keep_inds
def
prune_small_boxes
(
boxlist
,
min_side
,
scope
=
None
):
"""Prunes small boxes in the boxlist which have a side smaller than min_side.
Args:
boxlist: BoxList holding N boxes.
min_side: Minimum width AND height of box to survive pruning.
scope: name scope.
Returns:
A pruned boxlist.
"""
with
tf
.
name_scope
(
scope
,
'PruneSmallBoxes'
):
height
,
width
=
height_width
(
boxlist
)
is_valid
=
tf
.
logical_and
(
tf
.
greater_equal
(
width
,
min_side
),
tf
.
greater_equal
(
height
,
min_side
))
return
gather
(
boxlist
,
tf
.
reshape
(
tf
.
where
(
is_valid
),
[
-
1
]))
def
change_coordinate_frame
(
boxlist
,
window
,
scope
=
None
):
"""Change coordinate frame of the boxlist to be relative to window's frame.
Given a window of the form [ymin, xmin, ymax, xmax],
changes bounding box coordinates from boxlist to be relative to this window
(e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
An example use case is data augmentation: where we are given groundtruth
boxes (boxlist) and would like to randomly crop the image to some
window (window). In this case we need to change the coordinate frame of
each groundtruth box to be relative to this new window.
Args:
boxlist: A BoxList object holding N boxes.
window: A rank 1 tensor [4].
scope: name scope.
Returns:
Returns a BoxList object with N boxes.
"""
with
tf
.
name_scope
(
scope
,
'ChangeCoordinateFrame'
):
win_height
=
window
[
2
]
-
window
[
0
]
win_width
=
window
[
3
]
-
window
[
1
]
boxlist_new
=
scale
(
box_list
.
BoxList
(
boxlist
.
get
()
-
[
window
[
0
],
window
[
1
],
window
[
0
],
window
[
1
]]),
1.0
/
win_height
,
1.0
/
win_width
)
boxlist_new
=
_copy_extra_fields
(
boxlist_new
,
boxlist
)
return
boxlist_new
def
sq_dist
(
boxlist1
,
boxlist2
,
scope
=
None
):
"""Computes the pairwise squared distances between box corners.
This op treats each box as if it were a point in a 4d Euclidean space and
computes pairwise squared distances.
Mathematically, we are given two matrices of box coordinates X and Y,
where X(i,:) is the i'th row of X, containing the 4 numbers defining the
corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
boxlist2. We compute
Z(i,j) = ||X(i,:) - Y(j,:)||^2
= ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise distances
"""
with
tf
.
name_scope
(
scope
,
'SqDist'
):
sqnorm1
=
tf
.
reduce_sum
(
tf
.
square
(
boxlist1
.
get
()),
1
,
keep_dims
=
True
)
sqnorm2
=
tf
.
reduce_sum
(
tf
.
square
(
boxlist2
.
get
()),
1
,
keep_dims
=
True
)
innerprod
=
tf
.
matmul
(
boxlist1
.
get
(),
boxlist2
.
get
(),
transpose_a
=
False
,
transpose_b
=
True
)
return
sqnorm1
+
tf
.
transpose
(
sqnorm2
)
-
2.0
*
innerprod
def
boolean_mask
(
boxlist
,
indicator
,
fields
=
None
,
scope
=
None
):
"""Select boxes from BoxList according to indicator and return new BoxList.
`boolean_mask` returns the subset of boxes that are marked as "True" by the
indicator tensor. By default, `boolean_mask` returns boxes corresponding to
the input index list, as well as all additional fields stored in the boxlist
(indexing into the first dimension). However one can optionally only draw
from a subset of fields.
Args:
boxlist: BoxList holding N boxes
indicator: a rank-1 boolean tensor
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
scope: name scope.
Returns:
subboxlist: a BoxList corresponding to the subset of the input BoxList
specified by indicator
Raises:
ValueError: if `indicator` is not a rank-1 boolean tensor.
"""
with
tf
.
name_scope
(
scope
,
'BooleanMask'
):
if
indicator
.
shape
.
ndims
!=
1
:
raise
ValueError
(
'indicator should have rank 1'
)
if
indicator
.
dtype
!=
tf
.
bool
:
raise
ValueError
(
'indicator should be a boolean tensor'
)
subboxlist
=
box_list
.
BoxList
(
tf
.
boolean_mask
(
boxlist
.
get
(),
indicator
))
if
fields
is
None
:
fields
=
boxlist
.
get_extra_fields
()
for
field
in
fields
:
if
not
boxlist
.
has_field
(
field
):
raise
ValueError
(
'boxlist must contain all specified fields'
)
subfieldlist
=
tf
.
boolean_mask
(
boxlist
.
get_field
(
field
),
indicator
)
subboxlist
.
add_field
(
field
,
subfieldlist
)
return
subboxlist
def
gather
(
boxlist
,
indices
,
fields
=
None
,
scope
=
None
):
"""Gather boxes from BoxList according to indices and return new BoxList.
By default, `gather` returns boxes corresponding to the input index list, as
well as all additional fields stored in the boxlist (indexing into the
first dimension). However one can optionally only gather from a
subset of fields.
Args:
boxlist: BoxList holding N boxes
indices: a rank-1 tensor of type int32 / int64
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
scope: name scope.
Returns:
subboxlist: a BoxList corresponding to the subset of the input BoxList
specified by indices
Raises:
ValueError: if specified field is not contained in boxlist or if the
indices are not of type int32
"""
with
tf
.
name_scope
(
scope
,
'Gather'
):
if
len
(
indices
.
shape
.
as_list
())
!=
1
:
raise
ValueError
(
'indices should have rank 1'
)
if
indices
.
dtype
!=
tf
.
int32
and
indices
.
dtype
!=
tf
.
int64
:
raise
ValueError
(
'indices should be an int32 / int64 tensor'
)
subboxlist
=
box_list
.
BoxList
(
tf
.
gather
(
boxlist
.
get
(),
indices
))
if
fields
is
None
:
fields
=
boxlist
.
get_extra_fields
()
for
field
in
fields
:
if
not
boxlist
.
has_field
(
field
):
raise
ValueError
(
'boxlist must contain all specified fields'
)
subfieldlist
=
tf
.
gather
(
boxlist
.
get_field
(
field
),
indices
)
subboxlist
.
add_field
(
field
,
subfieldlist
)
return
subboxlist
def
concatenate
(
boxlists
,
fields
=
None
,
scope
=
None
):
"""Concatenate list of BoxLists.
This op concatenates a list of input BoxLists into a larger BoxList. It also
handles concatenation of BoxList fields as long as the field tensor shapes
are equal except for the first dimension.
Args:
boxlists: list of BoxList objects
fields: optional list of fields to also concatenate. By default, all
fields from the first BoxList in the list are included in the
concatenation.
scope: name scope.
Returns:
a BoxList with number of boxes equal to
sum([boxlist.num_boxes() for boxlist in BoxList])
Raises:
ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
contains non BoxList objects), or if requested fields are not contained in
all boxlists
"""
with
tf
.
name_scope
(
scope
,
'Concatenate'
):
if
not
isinstance
(
boxlists
,
list
):
raise
ValueError
(
'boxlists should be a list'
)
if
not
boxlists
:
raise
ValueError
(
'boxlists should have nonzero length'
)
for
boxlist
in
boxlists
:
if
not
isinstance
(
boxlist
,
box_list
.
BoxList
):
raise
ValueError
(
'all elements of boxlists should be BoxList objects'
)
concatenated
=
box_list
.
BoxList
(
tf
.
concat
([
boxlist
.
get
()
for
boxlist
in
boxlists
],
0
))
if
fields
is
None
:
fields
=
boxlists
[
0
].
get_extra_fields
()
for
field
in
fields
:
first_field_shape
=
boxlists
[
0
].
get_field
(
field
).
get_shape
().
as_list
()
first_field_shape
[
0
]
=
-
1
if
None
in
first_field_shape
:
raise
ValueError
(
'field %s must have fully defined shape except for the'
' 0th dimension.'
%
field
)
for
boxlist
in
boxlists
:
if
not
boxlist
.
has_field
(
field
):
raise
ValueError
(
'boxlist must contain all requested fields'
)
field_shape
=
boxlist
.
get_field
(
field
).
get_shape
().
as_list
()
field_shape
[
0
]
=
-
1
if
field_shape
!=
first_field_shape
:
raise
ValueError
(
'field %s must have same shape for all boxlists '
'except for the 0th dimension.'
%
field
)
concatenated_field
=
tf
.
concat
(
[
boxlist
.
get_field
(
field
)
for
boxlist
in
boxlists
],
0
)
concatenated
.
add_field
(
field
,
concatenated_field
)
return
concatenated
def
sort_by_field
(
boxlist
,
field
,
order
=
SortOrder
.
descend
,
scope
=
None
):
"""Sort boxes and associated fields according to a scalar field.
A common use case is reordering the boxes according to descending scores.
Args:
boxlist: BoxList holding N boxes.
field: A BoxList field for sorting and reordering the BoxList.
order: (Optional) descend or ascend. Default is descend.
scope: name scope.
Returns:
sorted_boxlist: A sorted BoxList with the field in the specified order.
Raises:
ValueError: if specified field does not exist
ValueError: if the order is not either descend or ascend
"""
with
tf
.
name_scope
(
scope
,
'SortByField'
):
if
order
!=
SortOrder
.
descend
and
order
!=
SortOrder
.
ascend
:
raise
ValueError
(
'Invalid sort order'
)
field_to_sort
=
boxlist
.
get_field
(
field
)
if
len
(
field_to_sort
.
shape
.
as_list
())
!=
1
:
raise
ValueError
(
'Field should have rank 1'
)
num_boxes
=
boxlist
.
num_boxes
()
num_entries
=
tf
.
size
(
field_to_sort
)
length_assert
=
tf
.
Assert
(
tf
.
equal
(
num_boxes
,
num_entries
),
[
'Incorrect field size: actual vs expected.'
,
num_entries
,
num_boxes
])
with
tf
.
control_dependencies
([
length_assert
]):
# TODO: Remove with tf.device when top_k operation runs correctly on GPU.
with
tf
.
device
(
'/cpu:0'
):
_
,
sorted_indices
=
tf
.
nn
.
top_k
(
field_to_sort
,
num_boxes
,
sorted
=
True
)
if
order
==
SortOrder
.
ascend
:
sorted_indices
=
tf
.
reverse_v2
(
sorted_indices
,
[
0
])
return
gather
(
boxlist
,
sorted_indices
)
def
visualize_boxes_in_image
(
image
,
boxlist
,
normalized
=
False
,
scope
=
None
):
"""Overlay bounding box list on image.
Currently this visualization plots a 1 pixel thick red bounding box on top
of the image. Note that tf.image.draw_bounding_boxes essentially is
1 indexed.
Args:
image: an image tensor with shape [height, width, 3]
boxlist: a BoxList
normalized: (boolean) specify whether corners are to be interpreted
as absolute coordinates in image space or normalized with respect to the
image size.
scope: name scope.
Returns:
image_and_boxes: an image tensor with shape [height, width, 3]
"""
with
tf
.
name_scope
(
scope
,
'VisualizeBoxesInImage'
):
if
not
normalized
:
height
,
width
,
_
=
tf
.
unstack
(
tf
.
shape
(
image
))
boxlist
=
scale
(
boxlist
,
1.0
/
tf
.
cast
(
height
,
tf
.
float32
),
1.0
/
tf
.
cast
(
width
,
tf
.
float32
))
corners
=
tf
.
expand_dims
(
boxlist
.
get
(),
0
)
image
=
tf
.
expand_dims
(
image
,
0
)
return
tf
.
squeeze
(
tf
.
image
.
draw_bounding_boxes
(
image
,
corners
),
[
0
])
def
filter_field_value_equals
(
boxlist
,
field
,
value
,
scope
=
None
):
"""Filter to keep only boxes with field entries equal to the given value.
Args:
boxlist: BoxList holding N boxes.
field: field name for filtering.
value: scalar value.
scope: name scope.
Returns:
a BoxList holding M boxes where M <= N
Raises:
ValueError: if boxlist not a BoxList object or if it does not have
the specified field.
"""
with
tf
.
name_scope
(
scope
,
'FilterFieldValueEquals'
):
if
not
isinstance
(
boxlist
,
box_list
.
BoxList
):
raise
ValueError
(
'boxlist must be a BoxList'
)
if
not
boxlist
.
has_field
(
field
):
raise
ValueError
(
'boxlist must contain the specified field'
)
filter_field
=
boxlist
.
get_field
(
field
)
gather_index
=
tf
.
reshape
(
tf
.
where
(
tf
.
equal
(
filter_field
,
value
)),
[
-
1
])
return
gather
(
boxlist
,
gather_index
)
def
filter_greater_than
(
boxlist
,
thresh
,
scope
=
None
):
"""Filter to keep only boxes with score exceeding a given threshold.
This op keeps the collection of boxes whose corresponding scores are
greater than the input threshold.
TODO: Change function name to FilterScoresGreaterThan
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores.
thresh: scalar threshold
scope: name scope.
Returns:
a BoxList holding M boxes where M <= N
Raises:
ValueError: if boxlist not a BoxList object or if it does not
have a scores field
"""
with
tf
.
name_scope
(
scope
,
'FilterGreaterThan'
):
if
not
isinstance
(
boxlist
,
box_list
.
BoxList
):
raise
ValueError
(
'boxlist must be a BoxList'
)
if
not
boxlist
.
has_field
(
'scores'
):
raise
ValueError
(
'input boxlist must have
\'
scores
\'
field'
)
scores
=
boxlist
.
get_field
(
'scores'
)
if
len
(
scores
.
shape
.
as_list
())
>
2
:
raise
ValueError
(
'Scores should have rank 1 or 2'
)
if
len
(
scores
.
shape
.
as_list
())
==
2
and
scores
.
shape
.
as_list
()[
1
]
!=
1
:
raise
ValueError
(
'Scores should have rank 1 or have shape '
'consistent with [None, 1]'
)
high_score_indices
=
tf
.
cast
(
tf
.
reshape
(
tf
.
where
(
tf
.
greater
(
scores
,
thresh
)),
[
-
1
]),
tf
.
int32
)
return
gather
(
boxlist
,
high_score_indices
)
def
non_max_suppression
(
boxlist
,
thresh
,
max_output_size
,
scope
=
None
):
"""Non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. Note that this only works for a single class ---
to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores.
thresh: scalar threshold
max_output_size: maximum number of retained boxes
scope: name scope.
Returns:
a BoxList holding M boxes where M <= max_output_size
Raises:
ValueError: if thresh is not in [0, 1]
"""
with
tf
.
name_scope
(
scope
,
'NonMaxSuppression'
):
if
not
0
<=
thresh
<=
1.0
:
raise
ValueError
(
'thresh must be between 0 and 1'
)
if
not
isinstance
(
boxlist
,
box_list
.
BoxList
):
raise
ValueError
(
'boxlist must be a BoxList'
)
if
not
boxlist
.
has_field
(
'scores'
):
raise
ValueError
(
'input boxlist must have
\'
scores
\'
field'
)
selected_indices
=
tf
.
image
.
non_max_suppression
(
boxlist
.
get
(),
boxlist
.
get_field
(
'scores'
),
max_output_size
,
iou_threshold
=
thresh
)
return
gather
(
boxlist
,
selected_indices
)
def
_copy_extra_fields
(
boxlist_to_copy_to
,
boxlist_to_copy_from
):
"""Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
Args:
boxlist_to_copy_to: BoxList to which extra fields are copied.
boxlist_to_copy_from: BoxList from which fields are copied.
Returns:
boxlist_to_copy_to with extra fields.
"""
for
field
in
boxlist_to_copy_from
.
get_extra_fields
():
boxlist_to_copy_to
.
add_field
(
field
,
boxlist_to_copy_from
.
get_field
(
field
))
return
boxlist_to_copy_to
def
to_normalized_coordinates
(
boxlist
,
height
,
width
,
check_range
=
True
,
scope
=
None
):
"""Converts absolute box coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor:
boxlist = box_list_ops.to_normalized_coordinates(boxlist,
tf.shape(images)[1],
tf.shape(images)[2]),
This function raises an assertion failed error at graph execution time when
the maximum coordinate is smaller than 1.01 (which means that coordinates are
already normalized). The value 1.01 is to deal with small rounding errors.
Args:
boxlist: BoxList with coordinates in terms of pixel-locations.
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
boxlist with normalized coordinates in [0, 1].
"""
with
tf
.
name_scope
(
scope
,
'ToNormalizedCoordinates'
):
height
=
tf
.
cast
(
height
,
tf
.
float32
)
width
=
tf
.
cast
(
width
,
tf
.
float32
)
if
check_range
:
max_val
=
tf
.
reduce_max
(
boxlist
.
get
())
max_assert
=
tf
.
Assert
(
tf
.
greater
(
max_val
,
1.01
),
[
'max value is lower than 1.01: '
,
max_val
])
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
return
scale
(
boxlist
,
1
/
height
,
1
/
width
)
def
to_absolute_coordinates
(
boxlist
,
height
,
width
,
check_range
=
True
,
scope
=
None
):
"""Converts normalized box coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum box coordinate
value is larger than 1.01 (in which case coordinates are already absolute).
Args:
boxlist: BoxList with coordinates in range [0, 1].
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
boxlist with absolute coordinates in terms of the image size.
"""
with
tf
.
name_scope
(
scope
,
'ToAbsoluteCoordinates'
):
height
=
tf
.
cast
(
height
,
tf
.
float32
)
width
=
tf
.
cast
(
width
,
tf
.
float32
)
# Ensure range of input boxes is correct.
if
check_range
:
box_maximum
=
tf
.
reduce_max
(
boxlist
.
get
())
max_assert
=
tf
.
Assert
(
tf
.
greater_equal
(
1.01
,
box_maximum
),
[
'maximum box coordinate value is larger '
'than 1.01: '
,
box_maximum
])
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
return
scale
(
boxlist
,
height
,
width
)
def
refine_boxes_multi_class
(
pool_boxes
,
num_classes
,
nms_iou_thresh
,
nms_max_detections
,
voting_iou_thresh
=
0.5
):
"""Refines a pool of boxes using non max suppression and box voting.
Box refinement is done independently for each class.
Args:
pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
have a rank 1 'scores' field and a rank 1 'classes' field.
num_classes: (int scalar) Number of classes.
nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
nms_max_detections: (int scalar) maximum output size for NMS.
voting_iou_thresh: (float scalar) iou threshold for box voting.
Returns:
BoxList of refined boxes.
Raises:
ValueError: if
a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
b) pool_boxes is not a BoxList.
c) pool_boxes does not have a scores and classes field.
"""
if
not
0.0
<=
nms_iou_thresh
<=
1.0
:
raise
ValueError
(
'nms_iou_thresh must be between 0 and 1'
)
if
not
0.0
<=
voting_iou_thresh
<=
1.0
:
raise
ValueError
(
'voting_iou_thresh must be between 0 and 1'
)
if
not
isinstance
(
pool_boxes
,
box_list
.
BoxList
):
raise
ValueError
(
'pool_boxes must be a BoxList'
)
if
not
pool_boxes
.
has_field
(
'scores'
):
raise
ValueError
(
'pool_boxes must have a
\'
scores
\'
field'
)
if
not
pool_boxes
.
has_field
(
'classes'
):
raise
ValueError
(
'pool_boxes must have a
\'
classes
\'
field'
)
refined_boxes
=
[]
for
i
in
range
(
num_classes
):
boxes_class
=
filter_field_value_equals
(
pool_boxes
,
'classes'
,
i
)
refined_boxes_class
=
refine_boxes
(
boxes_class
,
nms_iou_thresh
,
nms_max_detections
,
voting_iou_thresh
)
refined_boxes
.
append
(
refined_boxes_class
)
return
sort_by_field
(
concatenate
(
refined_boxes
),
'scores'
)
def
refine_boxes
(
pool_boxes
,
nms_iou_thresh
,
nms_max_detections
,
voting_iou_thresh
=
0.5
):
"""Refines a pool of boxes using non max suppression and box voting.
Args:
pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
have a rank 1 'scores' field.
nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
nms_max_detections: (int scalar) maximum output size for NMS.
voting_iou_thresh: (float scalar) iou threshold for box voting.
Returns:
BoxList of refined boxes.
Raises:
ValueError: if
a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
b) pool_boxes is not a BoxList.
c) pool_boxes does not have a scores field.
"""
if
not
0.0
<=
nms_iou_thresh
<=
1.0
:
raise
ValueError
(
'nms_iou_thresh must be between 0 and 1'
)
if
not
0.0
<=
voting_iou_thresh
<=
1.0
:
raise
ValueError
(
'voting_iou_thresh must be between 0 and 1'
)
if
not
isinstance
(
pool_boxes
,
box_list
.
BoxList
):
raise
ValueError
(
'pool_boxes must be a BoxList'
)
if
not
pool_boxes
.
has_field
(
'scores'
):
raise
ValueError
(
'pool_boxes must have a
\'
scores
\'
field'
)
nms_boxes
=
non_max_suppression
(
pool_boxes
,
nms_iou_thresh
,
nms_max_detections
)
return
box_voting
(
nms_boxes
,
pool_boxes
,
voting_iou_thresh
)
def
box_voting
(
selected_boxes
,
pool_boxes
,
iou_thresh
=
0.5
):
"""Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
Performs box voting as described in 'Object detection via a multi-region &
semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
with iou overlap >= iou_thresh. The location of B is set to the weighted
average location of boxes in S (scores are used for weighting). And the score
of B is set to the average score of boxes in S.
Args:
selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
boxes are usually selected from pool_boxes using non max suppression.
pool_boxes: BoxList containing a set of (possibly redundant) boxes.
iou_thresh: (float scalar) iou threshold for matching boxes in
selected_boxes and pool_boxes.
Returns:
BoxList containing averaged locations and scores for each box in
selected_boxes.
Raises:
ValueError: if
a) selected_boxes or pool_boxes is not a BoxList.
b) if iou_thresh is not in [0, 1].
c) pool_boxes does not have a scores field.
"""
if
not
0.0
<=
iou_thresh
<=
1.0
:
raise
ValueError
(
'iou_thresh must be between 0 and 1'
)
if
not
isinstance
(
selected_boxes
,
box_list
.
BoxList
):
raise
ValueError
(
'selected_boxes must be a BoxList'
)
if
not
isinstance
(
pool_boxes
,
box_list
.
BoxList
):
raise
ValueError
(
'pool_boxes must be a BoxList'
)
if
not
pool_boxes
.
has_field
(
'scores'
):
raise
ValueError
(
'pool_boxes must have a
\'
scores
\'
field'
)
iou_
=
iou
(
selected_boxes
,
pool_boxes
)
match_indicator
=
tf
.
to_float
(
tf
.
greater
(
iou_
,
iou_thresh
))
num_matches
=
tf
.
reduce_sum
(
match_indicator
,
1
)
# TODO: Handle the case where some boxes in selected_boxes do not match to any
# boxes in pool_boxes. For such boxes without any matches, we should return
# the original boxes without voting.
match_assert
=
tf
.
Assert
(
tf
.
reduce_all
(
tf
.
greater
(
num_matches
,
0
)),
[
'Each box in selected_boxes must match with at least one box '
'in pool_boxes.'
])
scores
=
tf
.
expand_dims
(
pool_boxes
.
get_field
(
'scores'
),
1
)
scores_assert
=
tf
.
Assert
(
tf
.
reduce_all
(
tf
.
greater_equal
(
scores
,
0
)),
[
'Scores must be non negative.'
])
with
tf
.
control_dependencies
([
scores_assert
,
match_assert
]):
sum_scores
=
tf
.
matmul
(
match_indicator
,
scores
)
averaged_scores
=
tf
.
reshape
(
sum_scores
,
[
-
1
])
/
num_matches
box_locations
=
tf
.
matmul
(
match_indicator
,
pool_boxes
.
get
()
*
scores
)
/
sum_scores
averaged_boxes
=
box_list
.
BoxList
(
box_locations
)
_copy_extra_fields
(
averaged_boxes
,
selected_boxes
)
averaged_boxes
.
add_field
(
'scores'
,
averaged_scores
)
return
averaged_boxes
def
pad_or_clip_box_list
(
boxlist
,
num_boxes
,
scope
=
None
):
"""Pads or clips all fields of a BoxList.
Args:
boxlist: A BoxList with arbitrary of number of boxes.
num_boxes: First num_boxes in boxlist are kept.
The fields are zero-padded if num_boxes is bigger than the
actual number of boxes.
scope: name scope.
Returns:
BoxList with all fields padded or clipped.
"""
with
tf
.
name_scope
(
scope
,
'PadOrClipBoxList'
):
subboxlist
=
box_list
.
BoxList
(
shape_utils
.
pad_or_clip_tensor
(
boxlist
.
get
(),
num_boxes
))
for
field
in
boxlist
.
get_extra_fields
():
subfield
=
shape_utils
.
pad_or_clip_tensor
(
boxlist
.
get_field
(
field
),
num_boxes
)
subboxlist
.
add_field
(
field
,
subfield
)
return
subboxlist
object_detection/core/box_list_ops_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_list_ops."""
import
numpy
as
np
import
tensorflow
as
tf
from
tensorflow.python.framework
import
errors
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
class
BoxListOpsTest
(
tf
.
test
.
TestCase
):
"""Tests for common bounding box operations."""
def
test_area
(
self
):
corners
=
tf
.
constant
([[
0.0
,
0.0
,
10.0
,
20.0
],
[
1.0
,
2.0
,
3.0
,
4.0
]])
exp_output
=
[
200.0
,
4.0
]
boxes
=
box_list
.
BoxList
(
corners
)
areas
=
box_list_ops
.
area
(
boxes
)
with
self
.
test_session
()
as
sess
:
areas_output
=
sess
.
run
(
areas
)
self
.
assertAllClose
(
areas_output
,
exp_output
)
def
test_height_width
(
self
):
corners
=
tf
.
constant
([[
0.0
,
0.0
,
10.0
,
20.0
],
[
1.0
,
2.0
,
3.0
,
4.0
]])
exp_output_heights
=
[
10.
,
2.
]
exp_output_widths
=
[
20.
,
2.
]
boxes
=
box_list
.
BoxList
(
corners
)
heights
,
widths
=
box_list_ops
.
height_width
(
boxes
)
with
self
.
test_session
()
as
sess
:
output_heights
,
output_widths
=
sess
.
run
([
heights
,
widths
])
self
.
assertAllClose
(
output_heights
,
exp_output_heights
)
self
.
assertAllClose
(
output_widths
,
exp_output_widths
)
def
test_scale
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
100
,
200
],
[
50
,
120
,
100
,
140
]],
dtype
=
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'extra_data'
,
tf
.
constant
([[
1
],
[
2
]]))
y_scale
=
tf
.
constant
(
1.0
/
100
)
x_scale
=
tf
.
constant
(
1.0
/
200
)
scaled_boxes
=
box_list_ops
.
scale
(
boxes
,
y_scale
,
x_scale
)
exp_output
=
[[
0
,
0
,
1
,
1
],
[
0.5
,
0.6
,
1.0
,
0.7
]]
with
self
.
test_session
()
as
sess
:
scaled_corners_out
=
sess
.
run
(
scaled_boxes
.
get
())
self
.
assertAllClose
(
scaled_corners_out
,
exp_output
)
extra_data_out
=
sess
.
run
(
scaled_boxes
.
get_field
(
'extra_data'
))
self
.
assertAllEqual
(
extra_data_out
,
[[
1
],
[
2
]])
def
test_clip_to_window_filter_boxes_which_fall_outside_the_window
(
self
):
window
=
tf
.
constant
([
0
,
0
,
9
,
14
],
tf
.
float32
)
corners
=
tf
.
constant
([[
5.0
,
5.0
,
6.0
,
6.0
],
[
-
1.0
,
-
2.0
,
4.0
,
5.0
],
[
2.0
,
3.0
,
5.0
,
9.0
],
[
0.0
,
0.0
,
9.0
,
14.0
],
[
-
100.0
,
-
100.0
,
300.0
,
600.0
],
[
-
10.0
,
-
10.0
,
-
9.0
,
-
9.0
]])
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'extra_data'
,
tf
.
constant
([[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
]]))
exp_output
=
[[
5.0
,
5.0
,
6.0
,
6.0
],
[
0.0
,
0.0
,
4.0
,
5.0
],
[
2.0
,
3.0
,
5.0
,
9.0
],
[
0.0
,
0.0
,
9.0
,
14.0
],
[
0.0
,
0.0
,
9.0
,
14.0
]]
pruned
=
box_list_ops
.
clip_to_window
(
boxes
,
window
,
filter_nonoverlapping
=
True
)
with
self
.
test_session
()
as
sess
:
pruned_output
=
sess
.
run
(
pruned
.
get
())
self
.
assertAllClose
(
pruned_output
,
exp_output
)
extra_data_out
=
sess
.
run
(
pruned
.
get_field
(
'extra_data'
))
self
.
assertAllEqual
(
extra_data_out
,
[[
1
],
[
2
],
[
3
],
[
4
],
[
5
]])
def
test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window
(
self
):
window
=
tf
.
constant
([
0
,
0
,
9
,
14
],
tf
.
float32
)
corners
=
tf
.
constant
([[
5.0
,
5.0
,
6.0
,
6.0
],
[
-
1.0
,
-
2.0
,
4.0
,
5.0
],
[
2.0
,
3.0
,
5.0
,
9.0
],
[
0.0
,
0.0
,
9.0
,
14.0
],
[
-
100.0
,
-
100.0
,
300.0
,
600.0
],
[
-
10.0
,
-
10.0
,
-
9.0
,
-
9.0
]])
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'extra_data'
,
tf
.
constant
([[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
]]))
exp_output
=
[[
5.0
,
5.0
,
6.0
,
6.0
],
[
0.0
,
0.0
,
4.0
,
5.0
],
[
2.0
,
3.0
,
5.0
,
9.0
],
[
0.0
,
0.0
,
9.0
,
14.0
],
[
0.0
,
0.0
,
9.0
,
14.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]]
pruned
=
box_list_ops
.
clip_to_window
(
boxes
,
window
,
filter_nonoverlapping
=
False
)
with
self
.
test_session
()
as
sess
:
pruned_output
=
sess
.
run
(
pruned
.
get
())
self
.
assertAllClose
(
pruned_output
,
exp_output
)
extra_data_out
=
sess
.
run
(
pruned
.
get_field
(
'extra_data'
))
self
.
assertAllEqual
(
extra_data_out
,
[[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
]])
def
test_prune_outside_window_filters_boxes_which_fall_outside_the_window
(
self
):
window
=
tf
.
constant
([
0
,
0
,
9
,
14
],
tf
.
float32
)
corners
=
tf
.
constant
([[
5.0
,
5.0
,
6.0
,
6.0
],
[
-
1.0
,
-
2.0
,
4.0
,
5.0
],
[
2.0
,
3.0
,
5.0
,
9.0
],
[
0.0
,
0.0
,
9.0
,
14.0
],
[
-
10.0
,
-
10.0
,
-
9.0
,
-
9.0
],
[
-
100.0
,
-
100.0
,
300.0
,
600.0
]])
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'extra_data'
,
tf
.
constant
([[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
]]))
exp_output
=
[[
5.0
,
5.0
,
6.0
,
6.0
],
[
2.0
,
3.0
,
5.0
,
9.0
],
[
0.0
,
0.0
,
9.0
,
14.0
]]
pruned
,
keep_indices
=
box_list_ops
.
prune_outside_window
(
boxes
,
window
)
with
self
.
test_session
()
as
sess
:
pruned_output
=
sess
.
run
(
pruned
.
get
())
self
.
assertAllClose
(
pruned_output
,
exp_output
)
keep_indices_out
=
sess
.
run
(
keep_indices
)
self
.
assertAllEqual
(
keep_indices_out
,
[
0
,
2
,
3
])
extra_data_out
=
sess
.
run
(
pruned
.
get_field
(
'extra_data'
))
self
.
assertAllEqual
(
extra_data_out
,
[[
1
],
[
3
],
[
4
]])
def
test_prune_completely_outside_window
(
self
):
window
=
tf
.
constant
([
0
,
0
,
9
,
14
],
tf
.
float32
)
corners
=
tf
.
constant
([[
5.0
,
5.0
,
6.0
,
6.0
],
[
-
1.0
,
-
2.0
,
4.0
,
5.0
],
[
2.0
,
3.0
,
5.0
,
9.0
],
[
0.0
,
0.0
,
9.0
,
14.0
],
[
-
10.0
,
-
10.0
,
-
9.0
,
-
9.0
],
[
-
100.0
,
-
100.0
,
300.0
,
600.0
]])
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'extra_data'
,
tf
.
constant
([[
1
],
[
2
],
[
3
],
[
4
],
[
5
],
[
6
]]))
exp_output
=
[[
5.0
,
5.0
,
6.0
,
6.0
],
[
-
1.0
,
-
2.0
,
4.0
,
5.0
],
[
2.0
,
3.0
,
5.0
,
9.0
],
[
0.0
,
0.0
,
9.0
,
14.0
],
[
-
100.0
,
-
100.0
,
300.0
,
600.0
]]
pruned
,
keep_indices
=
box_list_ops
.
prune_completely_outside_window
(
boxes
,
window
)
with
self
.
test_session
()
as
sess
:
pruned_output
=
sess
.
run
(
pruned
.
get
())
self
.
assertAllClose
(
pruned_output
,
exp_output
)
keep_indices_out
=
sess
.
run
(
keep_indices
)
self
.
assertAllEqual
(
keep_indices_out
,
[
0
,
1
,
2
,
3
,
5
])
extra_data_out
=
sess
.
run
(
pruned
.
get_field
(
'extra_data'
))
self
.
assertAllEqual
(
extra_data_out
,
[[
1
],
[
2
],
[
3
],
[
4
],
[
6
]])
def
test_intersection
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
exp_output
=
[[
2.0
,
0.0
,
6.0
],
[
1.0
,
0.0
,
5.0
]]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
intersect
=
box_list_ops
.
intersection
(
boxes1
,
boxes2
)
with
self
.
test_session
()
as
sess
:
intersect_output
=
sess
.
run
(
intersect
)
self
.
assertAllClose
(
intersect_output
,
exp_output
)
def
test_matched_intersection
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
]])
exp_output
=
[
2.0
,
0.0
]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
intersect
=
box_list_ops
.
matched_intersection
(
boxes1
,
boxes2
)
with
self
.
test_session
()
as
sess
:
intersect_output
=
sess
.
run
(
intersect
)
self
.
assertAllClose
(
intersect_output
,
exp_output
)
def
test_iou
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
exp_output
=
[[
2.0
/
16.0
,
0
,
6.0
/
400.0
],
[
1.0
/
16.0
,
0.0
,
5.0
/
400.0
]]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
iou
=
box_list_ops
.
iou
(
boxes1
,
boxes2
)
with
self
.
test_session
()
as
sess
:
iou_output
=
sess
.
run
(
iou
)
self
.
assertAllClose
(
iou_output
,
exp_output
)
def
test_matched_iou
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
]])
exp_output
=
[
2.0
/
16.0
,
0
]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
iou
=
box_list_ops
.
matched_iou
(
boxes1
,
boxes2
)
with
self
.
test_session
()
as
sess
:
iou_output
=
sess
.
run
(
iou
)
self
.
assertAllClose
(
iou_output
,
exp_output
)
def
test_iouworks_on_empty_inputs
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
boxes_empty
=
box_list
.
BoxList
(
tf
.
zeros
((
0
,
4
)))
iou_empty_1
=
box_list_ops
.
iou
(
boxes1
,
boxes_empty
)
iou_empty_2
=
box_list_ops
.
iou
(
boxes_empty
,
boxes2
)
iou_empty_3
=
box_list_ops
.
iou
(
boxes_empty
,
boxes_empty
)
with
self
.
test_session
()
as
sess
:
iou_output_1
,
iou_output_2
,
iou_output_3
=
sess
.
run
(
[
iou_empty_1
,
iou_empty_2
,
iou_empty_3
])
self
.
assertAllEqual
(
iou_output_1
.
shape
,
(
2
,
0
))
self
.
assertAllEqual
(
iou_output_2
.
shape
,
(
0
,
3
))
self
.
assertAllEqual
(
iou_output_3
.
shape
,
(
0
,
0
))
def
test_ioa
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
exp_output_1
=
[[
2.0
/
12.0
,
0
,
6.0
/
400.0
],
[
1.0
/
12.0
,
0.0
,
5.0
/
400.0
]]
exp_output_2
=
[[
2.0
/
6.0
,
1.0
/
5.0
],
[
0
,
0
],
[
6.0
/
6.0
,
5.0
/
5.0
]]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
ioa_1
=
box_list_ops
.
ioa
(
boxes1
,
boxes2
)
ioa_2
=
box_list_ops
.
ioa
(
boxes2
,
boxes1
)
with
self
.
test_session
()
as
sess
:
ioa_output_1
,
ioa_output_2
=
sess
.
run
([
ioa_1
,
ioa_2
])
self
.
assertAllClose
(
ioa_output_1
,
exp_output_1
)
self
.
assertAllClose
(
ioa_output_2
,
exp_output_2
)
def
test_prune_non_overlapping_boxes
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
minoverlap
=
0.5
exp_output_1
=
boxes1
exp_output_2
=
box_list
.
BoxList
(
tf
.
constant
(
0.0
,
shape
=
[
0
,
4
]))
output_1
,
keep_indices_1
=
box_list_ops
.
prune_non_overlapping_boxes
(
boxes1
,
boxes2
,
min_overlap
=
minoverlap
)
output_2
,
keep_indices_2
=
box_list_ops
.
prune_non_overlapping_boxes
(
boxes2
,
boxes1
,
min_overlap
=
minoverlap
)
with
self
.
test_session
()
as
sess
:
(
output_1_
,
keep_indices_1_
,
output_2_
,
keep_indices_2_
,
exp_output_1_
,
exp_output_2_
)
=
sess
.
run
(
[
output_1
.
get
(),
keep_indices_1
,
output_2
.
get
(),
keep_indices_2
,
exp_output_1
.
get
(),
exp_output_2
.
get
()])
self
.
assertAllClose
(
output_1_
,
exp_output_1_
)
self
.
assertAllClose
(
output_2_
,
exp_output_2_
)
self
.
assertAllEqual
(
keep_indices_1_
,
[
0
,
1
])
self
.
assertAllEqual
(
keep_indices_2_
,
[])
def
test_prune_small_boxes
(
self
):
boxes
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
],
[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
exp_boxes
=
[[
3.0
,
4.0
,
6.0
,
8.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]]
boxes
=
box_list
.
BoxList
(
boxes
)
pruned_boxes
=
box_list_ops
.
prune_small_boxes
(
boxes
,
3
)
with
self
.
test_session
()
as
sess
:
pruned_boxes
=
sess
.
run
(
pruned_boxes
.
get
())
self
.
assertAllEqual
(
pruned_boxes
,
exp_boxes
)
def
test_prune_small_boxes_prunes_boxes_with_negative_side
(
self
):
boxes
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
],
[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
],
[
2.0
,
3.0
,
1.5
,
7.0
],
# negative height
[
2.0
,
3.0
,
5.0
,
1.7
]])
# negative width
exp_boxes
=
[[
3.0
,
4.0
,
6.0
,
8.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]]
boxes
=
box_list
.
BoxList
(
boxes
)
pruned_boxes
=
box_list_ops
.
prune_small_boxes
(
boxes
,
3
)
with
self
.
test_session
()
as
sess
:
pruned_boxes
=
sess
.
run
(
pruned_boxes
.
get
())
self
.
assertAllEqual
(
pruned_boxes
,
exp_boxes
)
def
test_change_coordinate_frame
(
self
):
corners
=
tf
.
constant
([[
0.25
,
0.5
,
0.75
,
0.75
],
[
0.5
,
0.0
,
1.0
,
1.0
]])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
boxes
=
box_list
.
BoxList
(
corners
)
expected_corners
=
tf
.
constant
([[
0
,
0.5
,
1.0
,
1.0
],
[
0.5
,
-
0.5
,
1.5
,
1.5
]])
expected_boxes
=
box_list
.
BoxList
(
expected_corners
)
output
=
box_list_ops
.
change_coordinate_frame
(
boxes
,
window
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_boxes_
=
sess
.
run
([
output
.
get
(),
expected_boxes
.
get
()])
self
.
assertAllClose
(
output_
,
expected_boxes_
)
def
test_ioaworks_on_empty_inputs
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
boxes_empty
=
box_list
.
BoxList
(
tf
.
zeros
((
0
,
4
)))
ioa_empty_1
=
box_list_ops
.
ioa
(
boxes1
,
boxes_empty
)
ioa_empty_2
=
box_list_ops
.
ioa
(
boxes_empty
,
boxes2
)
ioa_empty_3
=
box_list_ops
.
ioa
(
boxes_empty
,
boxes_empty
)
with
self
.
test_session
()
as
sess
:
ioa_output_1
,
ioa_output_2
,
ioa_output_3
=
sess
.
run
(
[
ioa_empty_1
,
ioa_empty_2
,
ioa_empty_3
])
self
.
assertAllEqual
(
ioa_output_1
.
shape
,
(
2
,
0
))
self
.
assertAllEqual
(
ioa_output_2
.
shape
,
(
0
,
3
))
self
.
assertAllEqual
(
ioa_output_3
.
shape
,
(
0
,
0
))
def
test_pairwise_distances
(
self
):
corners1
=
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
],
[
1.0
,
1.0
,
0.0
,
2.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
1.0
,
0.0
],
[
-
4.0
,
0.0
,
0.0
,
3.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]])
exp_output
=
[[
26
,
25
,
0
],
[
18
,
27
,
6
]]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
dist_matrix
=
box_list_ops
.
sq_dist
(
boxes1
,
boxes2
)
with
self
.
test_session
()
as
sess
:
dist_output
=
sess
.
run
(
dist_matrix
)
self
.
assertAllClose
(
dist_output
,
exp_output
)
def
test_boolean_mask
(
self
):
corners
=
tf
.
constant
(
[
4
*
[
0.0
],
4
*
[
1.0
],
4
*
[
2.0
],
4
*
[
3.0
],
4
*
[
4.0
]])
indicator
=
tf
.
constant
([
True
,
False
,
True
,
False
,
True
],
tf
.
bool
)
expected_subset
=
[
4
*
[
0.0
],
4
*
[
2.0
],
4
*
[
4.0
]]
boxes
=
box_list
.
BoxList
(
corners
)
subset
=
box_list_ops
.
boolean_mask
(
boxes
,
indicator
)
with
self
.
test_session
()
as
sess
:
subset_output
=
sess
.
run
(
subset
.
get
())
self
.
assertAllClose
(
subset_output
,
expected_subset
)
def
test_boolean_mask_with_field
(
self
):
corners
=
tf
.
constant
(
[
4
*
[
0.0
],
4
*
[
1.0
],
4
*
[
2.0
],
4
*
[
3.0
],
4
*
[
4.0
]])
indicator
=
tf
.
constant
([
True
,
False
,
True
,
False
,
True
],
tf
.
bool
)
weights
=
tf
.
constant
([[.
1
],
[.
3
],
[.
5
],
[.
7
],
[.
9
]],
tf
.
float32
)
expected_subset
=
[
4
*
[
0.0
],
4
*
[
2.0
],
4
*
[
4.0
]]
expected_weights
=
[[.
1
],
[.
5
],
[.
9
]]
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'weights'
,
weights
)
subset
=
box_list_ops
.
boolean_mask
(
boxes
,
indicator
,
[
'weights'
])
with
self
.
test_session
()
as
sess
:
subset_output
,
weights_output
=
sess
.
run
(
[
subset
.
get
(),
subset
.
get_field
(
'weights'
)])
self
.
assertAllClose
(
subset_output
,
expected_subset
)
self
.
assertAllClose
(
weights_output
,
expected_weights
)
def
test_gather
(
self
):
corners
=
tf
.
constant
(
[
4
*
[
0.0
],
4
*
[
1.0
],
4
*
[
2.0
],
4
*
[
3.0
],
4
*
[
4.0
]])
indices
=
tf
.
constant
([
0
,
2
,
4
],
tf
.
int32
)
expected_subset
=
[
4
*
[
0.0
],
4
*
[
2.0
],
4
*
[
4.0
]]
boxes
=
box_list
.
BoxList
(
corners
)
subset
=
box_list_ops
.
gather
(
boxes
,
indices
)
with
self
.
test_session
()
as
sess
:
subset_output
=
sess
.
run
(
subset
.
get
())
self
.
assertAllClose
(
subset_output
,
expected_subset
)
def
test_gather_with_field
(
self
):
corners
=
tf
.
constant
([
4
*
[
0.0
],
4
*
[
1.0
],
4
*
[
2.0
],
4
*
[
3.0
],
4
*
[
4.0
]])
indices
=
tf
.
constant
([
0
,
2
,
4
],
tf
.
int32
)
weights
=
tf
.
constant
([[.
1
],
[.
3
],
[.
5
],
[.
7
],
[.
9
]],
tf
.
float32
)
expected_subset
=
[
4
*
[
0.0
],
4
*
[
2.0
],
4
*
[
4.0
]]
expected_weights
=
[[.
1
],
[.
5
],
[.
9
]]
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'weights'
,
weights
)
subset
=
box_list_ops
.
gather
(
boxes
,
indices
,
[
'weights'
])
with
self
.
test_session
()
as
sess
:
subset_output
,
weights_output
=
sess
.
run
(
[
subset
.
get
(),
subset
.
get_field
(
'weights'
)])
self
.
assertAllClose
(
subset_output
,
expected_subset
)
self
.
assertAllClose
(
weights_output
,
expected_weights
)
def
test_gather_with_invalid_field
(
self
):
corners
=
tf
.
constant
([
4
*
[
0.0
],
4
*
[
1.0
]])
indices
=
tf
.
constant
([
0
,
1
],
tf
.
int32
)
weights
=
tf
.
constant
([[.
1
],
[.
3
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'weights'
,
weights
)
with
self
.
assertRaises
(
ValueError
):
box_list_ops
.
gather
(
boxes
,
indices
,
[
'foo'
,
'bar'
])
def
test_gather_with_invalid_inputs
(
self
):
corners
=
tf
.
constant
(
[
4
*
[
0.0
],
4
*
[
1.0
],
4
*
[
2.0
],
4
*
[
3.0
],
4
*
[
4.0
]])
indices_float32
=
tf
.
constant
([
0
,
2
,
4
],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
with
self
.
assertRaises
(
ValueError
):
_
=
box_list_ops
.
gather
(
boxes
,
indices_float32
)
indices_2d
=
tf
.
constant
([[
0
,
2
,
4
]],
tf
.
int32
)
boxes
=
box_list
.
BoxList
(
corners
)
with
self
.
assertRaises
(
ValueError
):
_
=
box_list_ops
.
gather
(
boxes
,
indices_2d
)
def
test_gather_with_dynamic_indexing
(
self
):
corners
=
tf
.
constant
([
4
*
[
0.0
],
4
*
[
1.0
],
4
*
[
2.0
],
4
*
[
3.0
],
4
*
[
4.0
]
])
weights
=
tf
.
constant
([.
5
,
.
3
,
.
7
,
.
1
,
.
9
],
tf
.
float32
)
indices
=
tf
.
reshape
(
tf
.
where
(
tf
.
greater
(
weights
,
0.4
)),
[
-
1
])
expected_subset
=
[
4
*
[
0.0
],
4
*
[
2.0
],
4
*
[
4.0
]]
expected_weights
=
[.
5
,
.
7
,
.
9
]
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'weights'
,
weights
)
subset
=
box_list_ops
.
gather
(
boxes
,
indices
,
[
'weights'
])
with
self
.
test_session
()
as
sess
:
subset_output
,
weights_output
=
sess
.
run
([
subset
.
get
(),
subset
.
get_field
(
'weights'
)])
self
.
assertAllClose
(
subset_output
,
expected_subset
)
self
.
assertAllClose
(
weights_output
,
expected_weights
)
def
test_sort_by_field_ascending_order
(
self
):
exp_corners
=
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]]
exp_scores
=
[.
95
,
.
9
,
.
75
,
.
6
,
.
5
,
.
3
]
exp_weights
=
[.
2
,
.
45
,
.
6
,
.
75
,
.
8
,
.
92
]
shuffle
=
[
2
,
4
,
0
,
5
,
1
,
3
]
corners
=
tf
.
constant
([
exp_corners
[
i
]
for
i
in
shuffle
],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
(
[
exp_scores
[
i
]
for
i
in
shuffle
],
tf
.
float32
))
boxes
.
add_field
(
'weights'
,
tf
.
constant
(
[
exp_weights
[
i
]
for
i
in
shuffle
],
tf
.
float32
))
sort_by_weight
=
box_list_ops
.
sort_by_field
(
boxes
,
'weights'
,
order
=
box_list_ops
.
SortOrder
.
ascend
)
with
self
.
test_session
()
as
sess
:
corners_out
,
scores_out
,
weights_out
=
sess
.
run
([
sort_by_weight
.
get
(),
sort_by_weight
.
get_field
(
'scores'
),
sort_by_weight
.
get_field
(
'weights'
)])
self
.
assertAllClose
(
corners_out
,
exp_corners
)
self
.
assertAllClose
(
scores_out
,
exp_scores
)
self
.
assertAllClose
(
weights_out
,
exp_weights
)
def
test_sort_by_field_descending_order
(
self
):
exp_corners
=
[[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]]
exp_scores
=
[.
95
,
.
9
,
.
75
,
.
6
,
.
5
,
.
3
]
exp_weights
=
[.
2
,
.
45
,
.
6
,
.
75
,
.
8
,
.
92
]
shuffle
=
[
2
,
4
,
0
,
5
,
1
,
3
]
corners
=
tf
.
constant
([
exp_corners
[
i
]
for
i
in
shuffle
],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
(
[
exp_scores
[
i
]
for
i
in
shuffle
],
tf
.
float32
))
boxes
.
add_field
(
'weights'
,
tf
.
constant
(
[
exp_weights
[
i
]
for
i
in
shuffle
],
tf
.
float32
))
sort_by_score
=
box_list_ops
.
sort_by_field
(
boxes
,
'scores'
)
with
self
.
test_session
()
as
sess
:
corners_out
,
scores_out
,
weights_out
=
sess
.
run
([
sort_by_score
.
get
(
),
sort_by_score
.
get_field
(
'scores'
),
sort_by_score
.
get_field
(
'weights'
)])
self
.
assertAllClose
(
corners_out
,
exp_corners
)
self
.
assertAllClose
(
scores_out
,
exp_scores
)
self
.
assertAllClose
(
weights_out
,
exp_weights
)
def
test_sort_by_field_invalid_inputs
(
self
):
corners
=
tf
.
constant
([
4
*
[
0.0
],
4
*
[
0.5
],
4
*
[
1.0
],
4
*
[
2.0
],
4
*
[
3.0
],
4
*
[
4.0
]])
misc
=
tf
.
constant
([[.
95
,
.
9
],
[.
5
,
.
3
]],
tf
.
float32
)
weights
=
tf
.
constant
([.
1
,
.
2
],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'misc'
,
misc
)
boxes
.
add_field
(
'weights'
,
weights
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaises
(
ValueError
):
box_list_ops
.
sort_by_field
(
boxes
,
'area'
)
with
self
.
assertRaises
(
ValueError
):
box_list_ops
.
sort_by_field
(
boxes
,
'misc'
)
with
self
.
assertRaisesWithPredicateMatch
(
errors
.
InvalidArgumentError
,
'Incorrect field size'
):
sess
.
run
(
box_list_ops
.
sort_by_field
(
boxes
,
'weights'
).
get
())
def
test_visualize_boxes_in_image
(
self
):
image
=
tf
.
zeros
((
6
,
4
,
3
))
corners
=
tf
.
constant
([[
0
,
0
,
5
,
3
],
[
0
,
0
,
3
,
2
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
image_and_boxes
=
box_list_ops
.
visualize_boxes_in_image
(
image
,
boxes
)
image_and_boxes_bw
=
tf
.
to_float
(
tf
.
greater
(
tf
.
reduce_sum
(
image_and_boxes
,
2
),
0.0
))
exp_result
=
[[
1
,
1
,
1
,
0
],
[
1
,
1
,
1
,
0
],
[
1
,
1
,
1
,
0
],
[
1
,
0
,
1
,
0
],
[
1
,
1
,
1
,
0
],
[
0
,
0
,
0
,
0
]]
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
image_and_boxes_bw
)
self
.
assertAllEqual
(
output
.
astype
(
int
),
exp_result
)
def
test_filter_field_value_equals
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'classes'
,
tf
.
constant
([
1
,
2
,
1
,
2
,
2
,
1
]))
exp_output1
=
[[
0
,
0
,
1
,
1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
100
,
1
,
101
]]
exp_output2
=
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
]]
filtered_boxes1
=
box_list_ops
.
filter_field_value_equals
(
boxes
,
'classes'
,
1
)
filtered_boxes2
=
box_list_ops
.
filter_field_value_equals
(
boxes
,
'classes'
,
2
)
with
self
.
test_session
()
as
sess
:
filtered_output1
,
filtered_output2
=
sess
.
run
([
filtered_boxes1
.
get
(),
filtered_boxes2
.
get
()])
self
.
assertAllClose
(
filtered_output1
,
exp_output1
)
self
.
assertAllClose
(
filtered_output2
,
exp_output2
)
def
test_filter_greater_than
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
([.
1
,
.
75
,
.
9
,
.
5
,
.
5
,
.
8
]))
thresh
=
.
6
exp_output
=
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
100
,
1
,
101
]]
filtered_boxes
=
box_list_ops
.
filter_greater_than
(
boxes
,
thresh
)
with
self
.
test_session
()
as
sess
:
filtered_output
=
sess
.
run
(
filtered_boxes
.
get
())
self
.
assertAllClose
(
filtered_output
,
exp_output
)
def
test_clip_box_list
(
self
):
boxlist
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
],
[
0.6
,
0.6
,
0.8
,
0.8
],
[
0.2
,
0.2
,
0.3
,
0.3
]],
tf
.
float32
))
boxlist
.
add_field
(
'classes'
,
tf
.
constant
([
0
,
0
,
1
,
1
]))
boxlist
.
add_field
(
'scores'
,
tf
.
constant
([
0.75
,
0.65
,
0.3
,
0.2
]))
num_boxes
=
2
clipped_boxlist
=
box_list_ops
.
pad_or_clip_box_list
(
boxlist
,
num_boxes
)
expected_boxes
=
[[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
]]
expected_classes
=
[
0
,
0
]
expected_scores
=
[
0.75
,
0.65
]
with
self
.
test_session
()
as
sess
:
boxes_out
,
classes_out
,
scores_out
=
sess
.
run
(
[
clipped_boxlist
.
get
(),
clipped_boxlist
.
get_field
(
'classes'
),
clipped_boxlist
.
get_field
(
'scores'
)])
self
.
assertAllClose
(
expected_boxes
,
boxes_out
)
self
.
assertAllEqual
(
expected_classes
,
classes_out
)
self
.
assertAllClose
(
expected_scores
,
scores_out
)
def
test_pad_box_list
(
self
):
boxlist
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
]],
tf
.
float32
))
boxlist
.
add_field
(
'classes'
,
tf
.
constant
([
0
,
1
]))
boxlist
.
add_field
(
'scores'
,
tf
.
constant
([
0.75
,
0.2
]))
num_boxes
=
4
padded_boxlist
=
box_list_ops
.
pad_or_clip_box_list
(
boxlist
,
num_boxes
)
expected_boxes
=
[[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
expected_classes
=
[
0
,
1
,
0
,
0
]
expected_scores
=
[
0.75
,
0.2
,
0
,
0
]
with
self
.
test_session
()
as
sess
:
boxes_out
,
classes_out
,
scores_out
=
sess
.
run
(
[
padded_boxlist
.
get
(),
padded_boxlist
.
get_field
(
'classes'
),
padded_boxlist
.
get_field
(
'scores'
)])
self
.
assertAllClose
(
expected_boxes
,
boxes_out
)
self
.
assertAllEqual
(
expected_classes
,
classes_out
)
self
.
assertAllClose
(
expected_scores
,
scores_out
)
class
ConcatenateTest
(
tf
.
test
.
TestCase
):
def
test_invalid_input_box_list_list
(
self
):
with
self
.
assertRaises
(
ValueError
):
box_list_ops
.
concatenate
(
None
)
with
self
.
assertRaises
(
ValueError
):
box_list_ops
.
concatenate
([])
with
self
.
assertRaises
(
ValueError
):
corners
=
tf
.
constant
([[
0
,
0
,
0
,
0
]],
tf
.
float32
)
boxlist
=
box_list
.
BoxList
(
corners
)
box_list_ops
.
concatenate
([
boxlist
,
2
])
def
test_concatenate_with_missing_fields
(
self
):
corners1
=
tf
.
constant
([[
0
,
0
,
0
,
0
],
[
1
,
2
,
3
,
4
]],
tf
.
float32
)
scores1
=
tf
.
constant
([
1.0
,
2.1
])
corners2
=
tf
.
constant
([[
0
,
3
,
1
,
6
],
[
2
,
4
,
3
,
8
]],
tf
.
float32
)
boxlist1
=
box_list
.
BoxList
(
corners1
)
boxlist1
.
add_field
(
'scores'
,
scores1
)
boxlist2
=
box_list
.
BoxList
(
corners2
)
with
self
.
assertRaises
(
ValueError
):
box_list_ops
.
concatenate
([
boxlist1
,
boxlist2
])
def
test_concatenate_with_incompatible_field_shapes
(
self
):
corners1
=
tf
.
constant
([[
0
,
0
,
0
,
0
],
[
1
,
2
,
3
,
4
]],
tf
.
float32
)
scores1
=
tf
.
constant
([
1.0
,
2.1
])
corners2
=
tf
.
constant
([[
0
,
3
,
1
,
6
],
[
2
,
4
,
3
,
8
]],
tf
.
float32
)
scores2
=
tf
.
constant
([[
1.0
,
1.0
],
[
2.1
,
3.2
]])
boxlist1
=
box_list
.
BoxList
(
corners1
)
boxlist1
.
add_field
(
'scores'
,
scores1
)
boxlist2
=
box_list
.
BoxList
(
corners2
)
boxlist2
.
add_field
(
'scores'
,
scores2
)
with
self
.
assertRaises
(
ValueError
):
box_list_ops
.
concatenate
([
boxlist1
,
boxlist2
])
def
test_concatenate_is_correct
(
self
):
corners1
=
tf
.
constant
([[
0
,
0
,
0
,
0
],
[
1
,
2
,
3
,
4
]],
tf
.
float32
)
scores1
=
tf
.
constant
([
1.0
,
2.1
])
corners2
=
tf
.
constant
([[
0
,
3
,
1
,
6
],
[
2
,
4
,
3
,
8
],
[
1
,
0
,
5
,
10
]],
tf
.
float32
)
scores2
=
tf
.
constant
([
1.0
,
2.1
,
5.6
])
exp_corners
=
[[
0
,
0
,
0
,
0
],
[
1
,
2
,
3
,
4
],
[
0
,
3
,
1
,
6
],
[
2
,
4
,
3
,
8
],
[
1
,
0
,
5
,
10
]]
exp_scores
=
[
1.0
,
2.1
,
1.0
,
2.1
,
5.6
]
boxlist1
=
box_list
.
BoxList
(
corners1
)
boxlist1
.
add_field
(
'scores'
,
scores1
)
boxlist2
=
box_list
.
BoxList
(
corners2
)
boxlist2
.
add_field
(
'scores'
,
scores2
)
result
=
box_list_ops
.
concatenate
([
boxlist1
,
boxlist2
])
with
self
.
test_session
()
as
sess
:
corners_output
,
scores_output
=
sess
.
run
(
[
result
.
get
(),
result
.
get_field
(
'scores'
)])
self
.
assertAllClose
(
corners_output
,
exp_corners
)
self
.
assertAllClose
(
scores_output
,
exp_scores
)
class
NonMaxSuppressionTest
(
tf
.
test
.
TestCase
):
def
test_with_invalid_scores_field
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
([.
9
,
.
75
,
.
6
,
.
95
,
.
5
]))
iou_thresh
=
.
5
max_output_size
=
3
nms
=
box_list_ops
.
non_max_suppression
(
boxes
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesWithPredicateMatch
(
errors
.
InvalidArgumentError
,
'scores has incompatible shape'
):
sess
.
run
(
nms
.
get
())
def
test_select_from_three_clusters
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
([.
9
,
.
75
,
.
6
,
.
95
,
.
5
,
.
3
]))
iou_thresh
=
.
5
max_output_size
=
3
exp_nms
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
100
,
1
,
101
]]
nms
=
box_list_ops
.
non_max_suppression
(
boxes
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms
.
get
())
self
.
assertAllClose
(
nms_output
,
exp_nms
)
def
test_select_at_most_two_boxes_from_three_clusters
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
([.
9
,
.
75
,
.
6
,
.
95
,
.
5
,
.
3
]))
iou_thresh
=
.
5
max_output_size
=
2
exp_nms
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
]]
nms
=
box_list_ops
.
non_max_suppression
(
boxes
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms
.
get
())
self
.
assertAllClose
(
nms_output
,
exp_nms
)
def
test_select_at_most_thirty_boxes_from_three_clusters
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
([.
9
,
.
75
,
.
6
,
.
95
,
.
5
,
.
3
]))
iou_thresh
=
.
5
max_output_size
=
30
exp_nms
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
100
,
1
,
101
]]
nms
=
box_list_ops
.
non_max_suppression
(
boxes
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms
.
get
())
self
.
assertAllClose
(
nms_output
,
exp_nms
)
def
test_select_single_box
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
1
,
1
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
([.
9
]))
iou_thresh
=
.
5
max_output_size
=
3
exp_nms
=
[[
0
,
0
,
1
,
1
]]
nms
=
box_list_ops
.
non_max_suppression
(
boxes
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms
.
get
())
self
.
assertAllClose
(
nms_output
,
exp_nms
)
def
test_select_from_ten_identical_boxes
(
self
):
corners
=
tf
.
constant
(
10
*
[[
0
,
0
,
1
,
1
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
boxes
.
add_field
(
'scores'
,
tf
.
constant
(
10
*
[.
9
]))
iou_thresh
=
.
5
max_output_size
=
3
exp_nms
=
[[
0
,
0
,
1
,
1
]]
nms
=
box_list_ops
.
non_max_suppression
(
boxes
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms
.
get
())
self
.
assertAllClose
(
nms_output
,
exp_nms
)
def
test_copy_extra_fields
(
self
):
corners
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
]],
tf
.
float32
)
boxes
=
box_list
.
BoxList
(
corners
)
tensor1
=
np
.
array
([[
1
],
[
4
]])
tensor2
=
np
.
array
([[
1
,
1
],
[
2
,
2
]])
boxes
.
add_field
(
'tensor1'
,
tf
.
constant
(
tensor1
))
boxes
.
add_field
(
'tensor2'
,
tf
.
constant
(
tensor2
))
new_boxes
=
box_list
.
BoxList
(
tf
.
constant
([[
0
,
0
,
10
,
10
],
[
1
,
3
,
5
,
5
]],
tf
.
float32
))
new_boxes
=
box_list_ops
.
_copy_extra_fields
(
new_boxes
,
boxes
)
with
self
.
test_session
()
as
sess
:
self
.
assertAllClose
(
tensor1
,
sess
.
run
(
new_boxes
.
get_field
(
'tensor1'
)))
self
.
assertAllClose
(
tensor2
,
sess
.
run
(
new_boxes
.
get_field
(
'tensor2'
)))
class
CoordinatesConversionTest
(
tf
.
test
.
TestCase
):
def
test_to_normalized_coordinates
(
self
):
coordinates
=
tf
.
constant
([[
0
,
0
,
100
,
100
],
[
25
,
25
,
75
,
75
]],
tf
.
float32
)
img
=
tf
.
ones
((
128
,
100
,
100
,
3
))
boxlist
=
box_list
.
BoxList
(
coordinates
)
normalized_boxlist
=
box_list_ops
.
to_normalized_coordinates
(
boxlist
,
tf
.
shape
(
img
)[
1
],
tf
.
shape
(
img
)[
2
])
expected_boxes
=
[[
0
,
0
,
1
,
1
],
[
0.25
,
0.25
,
0.75
,
0.75
]]
with
self
.
test_session
()
as
sess
:
normalized_boxes
=
sess
.
run
(
normalized_boxlist
.
get
())
self
.
assertAllClose
(
normalized_boxes
,
expected_boxes
)
def
test_to_normalized_coordinates_already_normalized
(
self
):
coordinates
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0.25
,
0.25
,
0.75
,
0.75
]],
tf
.
float32
)
img
=
tf
.
ones
((
128
,
100
,
100
,
3
))
boxlist
=
box_list
.
BoxList
(
coordinates
)
normalized_boxlist
=
box_list_ops
.
to_normalized_coordinates
(
boxlist
,
tf
.
shape
(
img
)[
1
],
tf
.
shape
(
img
)[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesOpError
(
'assertion failed'
):
sess
.
run
(
normalized_boxlist
.
get
())
def
test_to_absolute_coordinates
(
self
):
coordinates
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
0.25
,
0.25
,
0.75
,
0.75
]],
tf
.
float32
)
img
=
tf
.
ones
((
128
,
100
,
100
,
3
))
boxlist
=
box_list
.
BoxList
(
coordinates
)
absolute_boxlist
=
box_list_ops
.
to_absolute_coordinates
(
boxlist
,
tf
.
shape
(
img
)[
1
],
tf
.
shape
(
img
)[
2
])
expected_boxes
=
[[
0
,
0
,
100
,
100
],
[
25
,
25
,
75
,
75
]]
with
self
.
test_session
()
as
sess
:
absolute_boxes
=
sess
.
run
(
absolute_boxlist
.
get
())
self
.
assertAllClose
(
absolute_boxes
,
expected_boxes
)
def
test_to_absolute_coordinates_already_abolute
(
self
):
coordinates
=
tf
.
constant
([[
0
,
0
,
100
,
100
],
[
25
,
25
,
75
,
75
]],
tf
.
float32
)
img
=
tf
.
ones
((
128
,
100
,
100
,
3
))
boxlist
=
box_list
.
BoxList
(
coordinates
)
absolute_boxlist
=
box_list_ops
.
to_absolute_coordinates
(
boxlist
,
tf
.
shape
(
img
)[
1
],
tf
.
shape
(
img
)[
2
])
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesOpError
(
'assertion failed'
):
sess
.
run
(
absolute_boxlist
.
get
())
def
test_convert_to_normalized_and_back
(
self
):
coordinates
=
np
.
random
.
uniform
(
size
=
(
100
,
4
))
coordinates
=
np
.
round
(
np
.
sort
(
coordinates
)
*
200
)
coordinates
[:,
2
:
4
]
+=
1
coordinates
[
99
,
:]
=
[
0
,
0
,
201
,
201
]
img
=
tf
.
ones
((
128
,
202
,
202
,
3
))
boxlist
=
box_list
.
BoxList
(
tf
.
constant
(
coordinates
,
tf
.
float32
))
boxlist
=
box_list_ops
.
to_normalized_coordinates
(
boxlist
,
tf
.
shape
(
img
)[
1
],
tf
.
shape
(
img
)[
2
])
boxlist
=
box_list_ops
.
to_absolute_coordinates
(
boxlist
,
tf
.
shape
(
img
)[
1
],
tf
.
shape
(
img
)[
2
])
with
self
.
test_session
()
as
sess
:
out
=
sess
.
run
(
boxlist
.
get
())
self
.
assertAllClose
(
out
,
coordinates
)
def
test_convert_to_absolute_and_back
(
self
):
coordinates
=
np
.
random
.
uniform
(
size
=
(
100
,
4
))
coordinates
=
np
.
sort
(
coordinates
)
coordinates
[
99
,
:]
=
[
0
,
0
,
1
,
1
]
img
=
tf
.
ones
((
128
,
202
,
202
,
3
))
boxlist
=
box_list
.
BoxList
(
tf
.
constant
(
coordinates
,
tf
.
float32
))
boxlist
=
box_list_ops
.
to_absolute_coordinates
(
boxlist
,
tf
.
shape
(
img
)[
1
],
tf
.
shape
(
img
)[
2
])
boxlist
=
box_list_ops
.
to_normalized_coordinates
(
boxlist
,
tf
.
shape
(
img
)[
1
],
tf
.
shape
(
img
)[
2
])
with
self
.
test_session
()
as
sess
:
out
=
sess
.
run
(
boxlist
.
get
())
self
.
assertAllClose
(
out
,
coordinates
)
class
BoxRefinementTest
(
tf
.
test
.
TestCase
):
def
test_box_voting
(
self
):
candidates
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.6
,
0.6
,
0.8
,
0.8
]],
tf
.
float32
))
candidates
.
add_field
(
'ExtraField'
,
tf
.
constant
([
1
,
2
]))
pool
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
],
[
0.6
,
0.6
,
0.8
,
0.8
]],
tf
.
float32
))
pool
.
add_field
(
'scores'
,
tf
.
constant
([
0.75
,
0.25
,
0.3
]))
averaged_boxes
=
box_list_ops
.
box_voting
(
candidates
,
pool
)
expected_boxes
=
[[
0.1
,
0.1
,
0.425
,
0.425
],
[
0.6
,
0.6
,
0.8
,
0.8
]]
expected_scores
=
[
0.5
,
0.3
]
with
self
.
test_session
()
as
sess
:
boxes_out
,
scores_out
,
extra_field_out
=
sess
.
run
(
[
averaged_boxes
.
get
(),
averaged_boxes
.
get_field
(
'scores'
),
averaged_boxes
.
get_field
(
'ExtraField'
)])
self
.
assertAllClose
(
expected_boxes
,
boxes_out
)
self
.
assertAllClose
(
expected_scores
,
scores_out
)
self
.
assertAllEqual
(
extra_field_out
,
[
1
,
2
])
def
test_box_voting_fails_with_negative_scores
(
self
):
candidates
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
]],
tf
.
float32
))
pool
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
]],
tf
.
float32
))
pool
.
add_field
(
'scores'
,
tf
.
constant
([
-
0.2
]))
averaged_boxes
=
box_list_ops
.
box_voting
(
candidates
,
pool
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesOpError
(
'Scores must be non negative'
):
sess
.
run
([
averaged_boxes
.
get
()])
def
test_box_voting_fails_when_unmatched
(
self
):
candidates
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
]],
tf
.
float32
))
pool
=
box_list
.
BoxList
(
tf
.
constant
([[
0.6
,
0.6
,
0.8
,
0.8
]],
tf
.
float32
))
pool
.
add_field
(
'scores'
,
tf
.
constant
([
0.2
]))
averaged_boxes
=
box_list_ops
.
box_voting
(
candidates
,
pool
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesOpError
(
'Each box in selected_boxes must match '
'with at least one box in pool_boxes.'
):
sess
.
run
([
averaged_boxes
.
get
()])
def
test_refine_boxes
(
self
):
pool
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
],
[
0.6
,
0.6
,
0.8
,
0.8
]],
tf
.
float32
))
pool
.
add_field
(
'ExtraField'
,
tf
.
constant
([
1
,
2
,
3
]))
pool
.
add_field
(
'scores'
,
tf
.
constant
([
0.75
,
0.25
,
0.3
]))
refined_boxes
=
box_list_ops
.
refine_boxes
(
pool
,
0.5
,
10
)
expected_boxes
=
[[
0.1
,
0.1
,
0.425
,
0.425
],
[
0.6
,
0.6
,
0.8
,
0.8
]]
expected_scores
=
[
0.5
,
0.3
]
with
self
.
test_session
()
as
sess
:
boxes_out
,
scores_out
,
extra_field_out
=
sess
.
run
(
[
refined_boxes
.
get
(),
refined_boxes
.
get_field
(
'scores'
),
refined_boxes
.
get_field
(
'ExtraField'
)])
self
.
assertAllClose
(
expected_boxes
,
boxes_out
)
self
.
assertAllClose
(
expected_scores
,
scores_out
)
self
.
assertAllEqual
(
extra_field_out
,
[
1
,
3
])
def
test_refine_boxes_multi_class
(
self
):
pool
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
],
[
0.6
,
0.6
,
0.8
,
0.8
],
[
0.2
,
0.2
,
0.3
,
0.3
]],
tf
.
float32
))
pool
.
add_field
(
'classes'
,
tf
.
constant
([
0
,
0
,
1
,
1
]))
pool
.
add_field
(
'scores'
,
tf
.
constant
([
0.75
,
0.25
,
0.3
,
0.2
]))
refined_boxes
=
box_list_ops
.
refine_boxes_multi_class
(
pool
,
3
,
0.5
,
10
)
expected_boxes
=
[[
0.1
,
0.1
,
0.425
,
0.425
],
[
0.6
,
0.6
,
0.8
,
0.8
],
[
0.2
,
0.2
,
0.3
,
0.3
]]
expected_scores
=
[
0.5
,
0.3
,
0.2
]
with
self
.
test_session
()
as
sess
:
boxes_out
,
scores_out
,
extra_field_out
=
sess
.
run
(
[
refined_boxes
.
get
(),
refined_boxes
.
get_field
(
'scores'
),
refined_boxes
.
get_field
(
'classes'
)])
self
.
assertAllClose
(
expected_boxes
,
boxes_out
)
self
.
assertAllClose
(
expected_scores
,
scores_out
)
self
.
assertAllEqual
(
extra_field_out
,
[
0
,
1
,
1
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/box_list_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_list."""
import
tensorflow
as
tf
from
object_detection.core
import
box_list
class
BoxListTest
(
tf
.
test
.
TestCase
):
"""Tests for BoxList class."""
def
test_num_boxes
(
self
):
data
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
3
],
[
3
,
4
,
5
,
5
]],
tf
.
float32
)
expected_num_boxes
=
3
boxes
=
box_list
.
BoxList
(
data
)
with
self
.
test_session
()
as
sess
:
num_boxes_output
=
sess
.
run
(
boxes
.
num_boxes
())
self
.
assertEquals
(
num_boxes_output
,
expected_num_boxes
)
def
test_get_correct_center_coordinates_and_sizes
(
self
):
boxes
=
[[
10.0
,
10.0
,
20.0
,
15.0
],
[
0.2
,
0.1
,
0.5
,
0.4
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
boxes
))
centers_sizes
=
boxes
.
get_center_coordinates_and_sizes
()
expected_centers_sizes
=
[[
15
,
0.35
],
[
12.5
,
0.25
],
[
10
,
0.3
],
[
5
,
0.3
]]
with
self
.
test_session
()
as
sess
:
centers_sizes_out
=
sess
.
run
(
centers_sizes
)
self
.
assertAllClose
(
centers_sizes_out
,
expected_centers_sizes
)
def
test_create_box_list_with_dynamic_shape
(
self
):
data
=
tf
.
constant
([[
0
,
0
,
1
,
1
],
[
1
,
1
,
2
,
3
],
[
3
,
4
,
5
,
5
]],
tf
.
float32
)
indices
=
tf
.
reshape
(
tf
.
where
(
tf
.
greater
([
1
,
0
,
1
],
0
)),
[
-
1
])
data
=
tf
.
gather
(
data
,
indices
)
assert
data
.
get_shape
().
as_list
()
==
[
None
,
4
]
expected_num_boxes
=
2
boxes
=
box_list
.
BoxList
(
data
)
with
self
.
test_session
()
as
sess
:
num_boxes_output
=
sess
.
run
(
boxes
.
num_boxes
())
self
.
assertEquals
(
num_boxes_output
,
expected_num_boxes
)
def
test_transpose_coordinates
(
self
):
boxes
=
[[
10.0
,
10.0
,
20.0
,
15.0
],
[
0.2
,
0.1
,
0.5
,
0.4
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
boxes
))
boxes
.
transpose_coordinates
()
expected_corners
=
[[
10.0
,
10.0
,
15.0
,
20.0
],
[
0.1
,
0.2
,
0.4
,
0.5
]]
with
self
.
test_session
()
as
sess
:
corners_out
=
sess
.
run
(
boxes
.
get
())
self
.
assertAllClose
(
corners_out
,
expected_corners
)
def
test_box_list_invalid_inputs
(
self
):
data0
=
tf
.
constant
([[[
0
,
0
,
1
,
1
],
[
3
,
4
,
5
,
5
]]],
tf
.
float32
)
data1
=
tf
.
constant
([[
0
,
0
,
1
],
[
1
,
1
,
2
],
[
3
,
4
,
5
]],
tf
.
float32
)
data2
=
tf
.
constant
([[
0
,
0
,
1
],
[
1
,
1
,
2
],
[
3
,
4
,
5
]],
tf
.
int32
)
with
self
.
assertRaises
(
ValueError
):
_
=
box_list
.
BoxList
(
data0
)
with
self
.
assertRaises
(
ValueError
):
_
=
box_list
.
BoxList
(
data1
)
with
self
.
assertRaises
(
ValueError
):
_
=
box_list
.
BoxList
(
data2
)
def
test_num_boxes_static
(
self
):
box_corners
=
[[
10.0
,
10.0
,
20.0
,
15.0
],
[
0.2
,
0.1
,
0.5
,
0.4
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
self
.
assertEquals
(
boxes
.
num_boxes_static
(),
2
)
self
.
assertEquals
(
type
(
boxes
.
num_boxes_static
()),
int
)
def
test_num_boxes_static_for_uninferrable_shape
(
self
):
placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
[
None
,
4
])
boxes
=
box_list
.
BoxList
(
placeholder
)
self
.
assertEquals
(
boxes
.
num_boxes_static
(),
None
)
def
test_as_tensor_dict
(
self
):
boxlist
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
]],
tf
.
float32
))
boxlist
.
add_field
(
'classes'
,
tf
.
constant
([
0
,
1
]))
boxlist
.
add_field
(
'scores'
,
tf
.
constant
([
0.75
,
0.2
]))
tensor_dict
=
boxlist
.
as_tensor_dict
()
expected_boxes
=
[[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
]]
expected_classes
=
[
0
,
1
]
expected_scores
=
[
0.75
,
0.2
]
with
self
.
test_session
()
as
sess
:
tensor_dict_out
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
3
,
len
(
tensor_dict_out
))
self
.
assertAllClose
(
expected_boxes
,
tensor_dict_out
[
'boxes'
])
self
.
assertAllEqual
(
expected_classes
,
tensor_dict_out
[
'classes'
])
self
.
assertAllClose
(
expected_scores
,
tensor_dict_out
[
'scores'
])
def
test_as_tensor_dict_with_features
(
self
):
boxlist
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
]],
tf
.
float32
))
boxlist
.
add_field
(
'classes'
,
tf
.
constant
([
0
,
1
]))
boxlist
.
add_field
(
'scores'
,
tf
.
constant
([
0.75
,
0.2
]))
tensor_dict
=
boxlist
.
as_tensor_dict
([
'boxes'
,
'classes'
,
'scores'
])
expected_boxes
=
[[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
]]
expected_classes
=
[
0
,
1
]
expected_scores
=
[
0.75
,
0.2
]
with
self
.
test_session
()
as
sess
:
tensor_dict_out
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
3
,
len
(
tensor_dict_out
))
self
.
assertAllClose
(
expected_boxes
,
tensor_dict_out
[
'boxes'
])
self
.
assertAllEqual
(
expected_classes
,
tensor_dict_out
[
'classes'
])
self
.
assertAllClose
(
expected_scores
,
tensor_dict_out
[
'scores'
])
def
test_as_tensor_dict_missing_field
(
self
):
boxlist
=
box_list
.
BoxList
(
tf
.
constant
([[
0.1
,
0.1
,
0.4
,
0.4
],
[
0.1
,
0.1
,
0.5
,
0.5
]],
tf
.
float32
))
boxlist
.
add_field
(
'classes'
,
tf
.
constant
([
0
,
1
]))
boxlist
.
add_field
(
'scores'
,
tf
.
constant
([
0.75
,
0.2
]))
with
self
.
assertRaises
(
ValueError
):
boxlist
.
as_tensor_dict
([
'foo'
,
'bar'
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/box_predictor.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Box predictor for object detectors.
Box predictors are classes that take a high level
image feature map as input and produce two predictions,
(1) a tensor encoding box locations, and
(2) a tensor encoding classes for each box.
These components are passed directly to loss functions
in our detection models.
These modules are separated from the main model since the same
few box predictor architectures are shared across many models.
"""
from
abc
import
abstractmethod
import
tensorflow
as
tf
from
object_detection.utils
import
ops
from
object_detection.utils
import
static_shape
slim
=
tf
.
contrib
.
slim
BOX_ENCODINGS
=
'box_encodings'
CLASS_PREDICTIONS_WITH_BACKGROUND
=
'class_predictions_with_background'
MASK_PREDICTIONS
=
'mask_predictions'
class
BoxPredictor
(
object
):
"""BoxPredictor."""
def
__init__
(
self
,
is_training
,
num_classes
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
"""
self
.
_is_training
=
is_training
self
.
_num_classes
=
num_classes
@
property
def
num_classes
(
self
):
return
self
.
_num_classes
def
predict
(
self
,
image_features
,
num_predictions_per_location
,
scope
,
**
params
):
"""Computes encoded object locations and corresponding confidences.
Takes a high level image feature map as input and produce two predictions,
(1) a tensor encoding box locations, and
(2) a tensor encoding class scores for each corresponding box.
In this interface, we only assume that two tensors are returned as output
and do not assume anything about their shapes.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
scope: Variable and Op scope name.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A float tensor of shape
[batch_size, num_anchors, q, code_size] representing the location of
the objects, where q is 1 or the number of classes.
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
with
tf
.
variable_scope
(
scope
):
return
self
.
_predict
(
image_features
,
num_predictions_per_location
,
**
params
)
# TODO: num_predictions_per_location could be moved to constructor.
# This is currently only used by ConvolutionalBoxPredictor.
@
abstractmethod
def
_predict
(
self
,
image_features
,
num_predictions_per_location
,
**
params
):
"""Implementations must override this method.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
**params: Additional keyword arguments for specific implementations of
BoxPredictor.
Returns:
A dictionary containing at least the following tensors.
box_encodings: A float tensor of shape
[batch_size, num_anchors, q, code_size] representing the location of
the objects, where q is 1 or the number of classes.
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
pass
class
RfcnBoxPredictor
(
BoxPredictor
):
"""RFCN Box Predictor.
Applies a position sensitve ROI pooling on position sensitive feature maps to
predict classes and refined locations. See https://arxiv.org/abs/1605.06409
for details.
This is used for the second stage of the RFCN meta architecture. Notice that
locations are *not* shared across classes, thus for each anchor, a separate
prediction is made for each class.
"""
def
__init__
(
self
,
is_training
,
num_classes
,
conv_hyperparams
,
num_spatial_bins
,
depth
,
crop_size
,
box_code_size
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for conolutional
layers.
num_spatial_bins: A list of two integers `[spatial_bins_y,
spatial_bins_x]`.
depth: Target depth to reduce the input feature maps to.
crop_size: A list of two integers `[crop_height, crop_width]`.
box_code_size: Size of encoding for each box.
"""
super
(
RfcnBoxPredictor
,
self
).
__init__
(
is_training
,
num_classes
)
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_num_spatial_bins
=
num_spatial_bins
self
.
_depth
=
depth
self
.
_crop_size
=
crop_size
self
.
_box_code_size
=
box_code_size
@
property
def
num_classes
(
self
):
return
self
.
_num_classes
def
_predict
(
self
,
image_features
,
num_predictions_per_location
,
proposal_boxes
):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
Currently, this must be set to 1, or an error will be raised.
proposal_boxes: A float tensor of shape [batch_size, num_proposals,
box_code_size].
Returns:
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the
location of the objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class
predictions for the proposals.
Raises:
ValueError: if num_predictions_per_location is not 1.
"""
if
num_predictions_per_location
!=
1
:
raise
ValueError
(
'Currently RfcnBoxPredictor only supports '
'predicting a single box per class per location.'
)
batch_size
=
tf
.
shape
(
proposal_boxes
)[
0
]
num_boxes
=
tf
.
shape
(
proposal_boxes
)[
1
]
def
get_box_indices
(
proposals
):
proposals_shape
=
proposals
.
get_shape
().
as_list
()
if
any
(
dim
is
None
for
dim
in
proposals_shape
):
proposals_shape
=
tf
.
shape
(
proposals
)
ones_mat
=
tf
.
ones
(
proposals_shape
[:
2
],
dtype
=
tf
.
int32
)
multiplier
=
tf
.
expand_dims
(
tf
.
range
(
start
=
0
,
limit
=
proposals_shape
[
0
]),
1
)
return
tf
.
reshape
(
ones_mat
*
multiplier
,
[
-
1
])
net
=
image_features
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
net
=
slim
.
conv2d
(
net
,
self
.
_depth
,
[
1
,
1
],
scope
=
'reduce_depth'
)
# Location predictions.
location_feature_map_depth
=
(
self
.
_num_spatial_bins
[
0
]
*
self
.
_num_spatial_bins
[
1
]
*
self
.
num_classes
*
self
.
_box_code_size
)
location_feature_map
=
slim
.
conv2d
(
net
,
location_feature_map_depth
,
[
1
,
1
],
activation_fn
=
None
,
scope
=
'refined_locations'
)
box_encodings
=
ops
.
position_sensitive_crop_regions
(
location_feature_map
,
boxes
=
tf
.
reshape
(
proposal_boxes
,
[
-
1
,
self
.
_box_code_size
]),
box_ind
=
get_box_indices
(
proposal_boxes
),
crop_size
=
self
.
_crop_size
,
num_spatial_bins
=
self
.
_num_spatial_bins
,
global_pool
=
True
)
box_encodings
=
tf
.
squeeze
(
box_encodings
,
squeeze_dims
=
[
1
,
2
])
box_encodings
=
tf
.
reshape
(
box_encodings
,
[
batch_size
*
num_boxes
,
1
,
self
.
num_classes
,
self
.
_box_code_size
])
# Class predictions.
total_classes
=
self
.
num_classes
+
1
# Account for background class.
class_feature_map_depth
=
(
self
.
_num_spatial_bins
[
0
]
*
self
.
_num_spatial_bins
[
1
]
*
total_classes
)
class_feature_map
=
slim
.
conv2d
(
net
,
class_feature_map_depth
,
[
1
,
1
],
activation_fn
=
None
,
scope
=
'class_predictions'
)
class_predictions_with_background
=
ops
.
position_sensitive_crop_regions
(
class_feature_map
,
boxes
=
tf
.
reshape
(
proposal_boxes
,
[
-
1
,
self
.
_box_code_size
]),
box_ind
=
get_box_indices
(
proposal_boxes
),
crop_size
=
self
.
_crop_size
,
num_spatial_bins
=
self
.
_num_spatial_bins
,
global_pool
=
True
)
class_predictions_with_background
=
tf
.
squeeze
(
class_predictions_with_background
,
squeeze_dims
=
[
1
,
2
])
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
*
num_boxes
,
1
,
total_classes
])
return
{
BOX_ENCODINGS
:
box_encodings
,
CLASS_PREDICTIONS_WITH_BACKGROUND
:
class_predictions_with_background
}
class
MaskRCNNBoxPredictor
(
BoxPredictor
):
"""Mask R-CNN Box Predictor.
See Mask R-CNN: He, K., Gkioxari, G., Dollar, P., & Girshick, R. (2017).
Mask R-CNN. arXiv preprint arXiv:1703.06870.
This is used for the second stage of the Mask R-CNN detector where proposals
cropped from an image are arranged along the batch dimension of the input
image_features tensor. Notice that locations are *not* shared across classes,
thus for each anchor, a separate prediction is made for each class.
In addition to predicting boxes and classes, optionally this class allows
predicting masks and/or keypoints inside detection boxes.
Currently this box predictor makes per-class predictions; that is, each
anchor makes a separate box prediction for each class.
"""
def
__init__
(
self
,
is_training
,
num_classes
,
fc_hyperparams
,
use_dropout
,
dropout_keep_prob
,
box_code_size
,
conv_hyperparams
=
None
,
predict_instance_masks
=
False
,
mask_prediction_conv_depth
=
256
,
predict_keypoints
=
False
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
fc_hyperparams: Slim arg_scope with hyperparameters for fully
connected ops.
use_dropout: Option to use dropout or not. Note that a single dropout
op is applied here prior to both box and class predictions, which stands
in contrast to the ConvolutionalBoxPredictor below.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
box_code_size: Size of encoding for each box.
conv_hyperparams: Slim arg_scope with hyperparameters for convolution
ops.
predict_instance_masks: Whether to predict object masks inside detection
boxes.
mask_prediction_conv_depth: The depth for the first conv2d_transpose op
applied to the image_features in the mask prediciton branch.
predict_keypoints: Whether to predict keypoints insde detection boxes.
Raises:
ValueError: If predict_instance_masks or predict_keypoints is true.
"""
super
(
MaskRCNNBoxPredictor
,
self
).
__init__
(
is_training
,
num_classes
)
self
.
_fc_hyperparams
=
fc_hyperparams
self
.
_use_dropout
=
use_dropout
self
.
_box_code_size
=
box_code_size
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_predict_instance_masks
=
predict_instance_masks
self
.
_mask_prediction_conv_depth
=
mask_prediction_conv_depth
self
.
_predict_keypoints
=
predict_keypoints
if
self
.
_predict_keypoints
:
raise
ValueError
(
'Keypoint prediction is unimplemented.'
)
if
((
self
.
_predict_instance_masks
or
self
.
_predict_keypoints
)
and
self
.
_conv_hyperparams
is
None
):
raise
ValueError
(
'`conv_hyperparams` must be provided when predicting '
'masks.'
)
@
property
def
num_classes
(
self
):
return
self
.
_num_classes
def
_predict
(
self
,
image_features
,
num_predictions_per_location
):
"""Computes encoded object locations and corresponding confidences.
Flattens image_features and applies fully connected ops (with no
non-linearity) to predict box encodings and class predictions. In this
setting, anchors are not spatially arranged in any way and are assumed to
have been folded into the batch dimension. Thus we output 1 for the
anchors dimension.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
Currently, this must be set to 1, or an error will be raised.
Returns:
A dictionary containing the following tensors.
box_encodings: A float tensor of shape
[batch_size, 1, num_classes, code_size] representing the
location of the objects.
class_predictions_with_background: A float tensor of shape
[batch_size, 1, num_classes + 1] representing the class
predictions for the proposals.
If predict_masks is True the dictionary also contains:
instance_masks: A float tensor of shape
[batch_size, 1, num_classes, image_height, image_width]
If predict_keypoints is True the dictionary also contains:
keypoints: [batch_size, 1, num_keypoints, 2]
Raises:
ValueError: if num_predictions_per_location is not 1.
"""
if
num_predictions_per_location
!=
1
:
raise
ValueError
(
'Currently FullyConnectedBoxPredictor only supports '
'predicting a single box per class per location.'
)
spatial_averaged_image_features
=
tf
.
reduce_mean
(
image_features
,
[
1
,
2
],
keep_dims
=
True
,
name
=
'AvgPool'
)
flattened_image_features
=
slim
.
flatten
(
spatial_averaged_image_features
)
if
self
.
_use_dropout
:
flattened_image_features
=
slim
.
dropout
(
flattened_image_features
,
keep_prob
=
self
.
_dropout_keep_prob
,
is_training
=
self
.
_is_training
)
with
slim
.
arg_scope
(
self
.
_fc_hyperparams
):
box_encodings
=
slim
.
fully_connected
(
flattened_image_features
,
self
.
_num_classes
*
self
.
_box_code_size
,
activation_fn
=
None
,
scope
=
'BoxEncodingPredictor'
)
class_predictions_with_background
=
slim
.
fully_connected
(
flattened_image_features
,
self
.
_num_classes
+
1
,
activation_fn
=
None
,
scope
=
'ClassPredictor'
)
box_encodings
=
tf
.
reshape
(
box_encodings
,
[
-
1
,
1
,
self
.
_num_classes
,
self
.
_box_code_size
])
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
-
1
,
1
,
self
.
_num_classes
+
1
])
predictions_dict
=
{
BOX_ENCODINGS
:
box_encodings
,
CLASS_PREDICTIONS_WITH_BACKGROUND
:
class_predictions_with_background
}
if
self
.
_predict_instance_masks
:
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
upsampled_features
=
slim
.
conv2d_transpose
(
image_features
,
num_outputs
=
self
.
_mask_prediction_conv_depth
,
kernel_size
=
[
2
,
2
],
stride
=
2
)
mask_predictions
=
slim
.
conv2d
(
upsampled_features
,
num_outputs
=
self
.
num_classes
,
activation_fn
=
None
,
kernel_size
=
[
1
,
1
])
instance_masks
=
tf
.
expand_dims
(
tf
.
transpose
(
mask_predictions
,
perm
=
[
0
,
3
,
1
,
2
]),
axis
=
1
,
name
=
'MaskPredictor'
)
predictions_dict
[
MASK_PREDICTIONS
]
=
instance_masks
return
predictions_dict
class
ConvolutionalBoxPredictor
(
BoxPredictor
):
"""Convolutional Box Predictor.
Optionally add an intermediate 1x1 convolutional layer after features and
predict in parallel branches box_encodings and
class_predictions_with_background.
Currently this box predictor assumes that predictions are "shared" across
classes --- that is each anchor makes box predictions which do not depend
on class.
"""
def
__init__
(
self
,
is_training
,
num_classes
,
conv_hyperparams
,
min_depth
,
max_depth
,
num_layers_before_predictor
,
use_dropout
,
dropout_keep_prob
,
kernel_size
,
box_code_size
,
apply_sigmoid_to_scores
=
False
):
"""Constructor.
Args:
is_training: Indicates whether the BoxPredictor is in training mode.
num_classes: number of classes. Note that num_classes *does not*
include the background category, so if groundtruth labels take values
in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
assigned classification targets can range from {0,... K}).
conv_hyperparams: Slim arg_scope with hyperparameters for convolution ops.
min_depth: Minumum feature depth prior to predicting box encodings
and class predictions.
max_depth: Maximum feature depth prior to predicting box encodings
and class predictions. If max_depth is set to 0, no additional
feature map will be inserted before location and class predictions.
num_layers_before_predictor: Number of the additional conv layers before
the predictor.
use_dropout: Option to use dropout for class prediction or not.
dropout_keep_prob: Keep probability for dropout.
This is only used if use_dropout is True.
kernel_size: Size of final convolution kernel. If the
spatial resolution of the feature map is smaller than the kernel size,
then the kernel size is automatically set to be
min(feature_width, feature_height).
box_code_size: Size of encoding for each box.
apply_sigmoid_to_scores: if True, apply the sigmoid on the output
class_predictions.
Raises:
ValueError: if min_depth > max_depth.
"""
super
(
ConvolutionalBoxPredictor
,
self
).
__init__
(
is_training
,
num_classes
)
if
min_depth
>
max_depth
:
raise
ValueError
(
'min_depth should be less than or equal to max_depth'
)
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_min_depth
=
min_depth
self
.
_max_depth
=
max_depth
self
.
_num_layers_before_predictor
=
num_layers_before_predictor
self
.
_use_dropout
=
use_dropout
self
.
_kernel_size
=
kernel_size
self
.
_box_code_size
=
box_code_size
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_apply_sigmoid_to_scores
=
apply_sigmoid_to_scores
def
_predict
(
self
,
image_features
,
num_predictions_per_location
):
"""Computes encoded object locations and corresponding confidences.
Args:
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
num_predictions_per_location: an integer representing the number of box
predictions to be made per spatial location in the feature map.
Returns:
A dictionary containing the following tensors.
box_encodings: A float tensor of shape [batch_size, num_anchors, 1,
code_size] representing the location of the objects, where
num_anchors = feat_height * feat_width * num_predictions_per_location
class_predictions_with_background: A float tensor of shape
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
"""
features_depth
=
static_shape
.
get_depth
(
image_features
.
get_shape
())
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
# Add a slot for the background class.
num_class_slots
=
self
.
num_classes
+
1
net
=
image_features
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
),
\
slim
.
arg_scope
([
slim
.
dropout
],
is_training
=
self
.
_is_training
):
# Add additional conv layers before the predictor.
if
depth
>
0
and
self
.
_num_layers_before_predictor
>
0
:
for
i
in
range
(
self
.
_num_layers_before_predictor
):
net
=
slim
.
conv2d
(
net
,
depth
,
[
1
,
1
],
scope
=
'Conv2d_%d_1x1_%d'
%
(
i
,
depth
))
with
slim
.
arg_scope
([
slim
.
conv2d
],
activation_fn
=
None
,
normalizer_fn
=
None
,
normalizer_params
=
None
):
box_encodings
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
self
.
_box_code_size
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'BoxEncodingPredictor'
)
if
self
.
_use_dropout
:
net
=
slim
.
dropout
(
net
,
keep_prob
=
self
.
_dropout_keep_prob
)
class_predictions_with_background
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'ClassPredictor'
)
if
self
.
_apply_sigmoid_to_scores
:
class_predictions_with_background
=
tf
.
sigmoid
(
class_predictions_with_background
)
batch_size
=
static_shape
.
get_batch_size
(
image_features
.
get_shape
())
if
batch_size
is
None
:
features_height
=
static_shape
.
get_height
(
image_features
.
get_shape
())
features_width
=
static_shape
.
get_width
(
image_features
.
get_shape
())
flattened_predictions_size
=
(
features_height
*
features_width
*
num_predictions_per_location
)
box_encodings
=
tf
.
reshape
(
box_encodings
,
[
-
1
,
flattened_predictions_size
,
1
,
self
.
_box_code_size
])
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
-
1
,
flattened_predictions_size
,
num_class_slots
])
else
:
box_encodings
=
tf
.
reshape
(
box_encodings
,
[
batch_size
,
-
1
,
1
,
self
.
_box_code_size
])
class_predictions_with_background
=
tf
.
reshape
(
class_predictions_with_background
,
[
batch_size
,
-
1
,
num_class_slots
])
return
{
BOX_ENCODINGS
:
box_encodings
,
CLASS_PREDICTIONS_WITH_BACKGROUND
:
class_predictions_with_background
}
object_detection/core/box_predictor_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.box_predictor."""
import
numpy
as
np
import
tensorflow
as
tf
from
google.protobuf
import
text_format
from
object_detection.builders
import
hyperparams_builder
from
object_detection.core
import
box_predictor
from
object_detection.protos
import
hyperparams_pb2
class
MaskRCNNBoxPredictorTest
(
tf
.
test
.
TestCase
):
def
_build_arg_scope_with_hyperparams
(
self
,
op_type
=
hyperparams_pb2
.
Hyperparams
.
FC
):
hyperparams
=
hyperparams_pb2
.
Hyperparams
()
hyperparams_text_proto
=
"""
activation: NONE
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
hyperparams_text_proto
,
hyperparams
)
hyperparams
.
op
=
op_type
return
hyperparams_builder
.
build
(
hyperparams
,
is_training
=
True
)
def
test_get_boxes_with_five_classes
(
self
):
image_features
=
tf
.
random_uniform
([
2
,
7
,
7
,
3
],
dtype
=
tf
.
float32
)
mask_box_predictor
=
box_predictor
.
MaskRCNNBoxPredictor
(
is_training
=
False
,
num_classes
=
5
,
fc_hyperparams
=
self
.
_build_arg_scope_with_hyperparams
(),
use_dropout
=
False
,
dropout_keep_prob
=
0.5
,
box_code_size
=
4
,
)
box_predictions
=
mask_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
class_predictions_with_background_shape
)
=
sess
.
run
(
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
class_predictions_with_background
)])
self
.
assertAllEqual
(
box_encodings_shape
,
[
2
,
1
,
5
,
4
])
self
.
assertAllEqual
(
class_predictions_with_background_shape
,
[
2
,
1
,
6
])
def
test_value_error_on_predict_instance_masks_with_no_conv_hyperparms
(
self
):
with
self
.
assertRaises
(
ValueError
):
box_predictor
.
MaskRCNNBoxPredictor
(
is_training
=
False
,
num_classes
=
5
,
fc_hyperparams
=
self
.
_build_arg_scope_with_hyperparams
(),
use_dropout
=
False
,
dropout_keep_prob
=
0.5
,
box_code_size
=
4
,
predict_instance_masks
=
True
)
def
test_get_instance_masks
(
self
):
image_features
=
tf
.
random_uniform
([
2
,
7
,
7
,
3
],
dtype
=
tf
.
float32
)
mask_box_predictor
=
box_predictor
.
MaskRCNNBoxPredictor
(
is_training
=
False
,
num_classes
=
5
,
fc_hyperparams
=
self
.
_build_arg_scope_with_hyperparams
(),
use_dropout
=
False
,
dropout_keep_prob
=
0.5
,
box_code_size
=
4
,
conv_hyperparams
=
self
.
_build_arg_scope_with_hyperparams
(
op_type
=
hyperparams_pb2
.
Hyperparams
.
CONV
),
predict_instance_masks
=
True
)
box_predictions
=
mask_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
)
mask_predictions
=
box_predictions
[
box_predictor
.
MASK_PREDICTIONS
]
self
.
assertListEqual
([
2
,
1
,
5
,
14
,
14
],
mask_predictions
.
get_shape
().
as_list
())
def
test_do_not_return_instance_masks_and_keypoints_without_request
(
self
):
image_features
=
tf
.
random_uniform
([
2
,
7
,
7
,
3
],
dtype
=
tf
.
float32
)
mask_box_predictor
=
box_predictor
.
MaskRCNNBoxPredictor
(
is_training
=
False
,
num_classes
=
5
,
fc_hyperparams
=
self
.
_build_arg_scope_with_hyperparams
(),
use_dropout
=
False
,
dropout_keep_prob
=
0.5
,
box_code_size
=
4
)
box_predictions
=
mask_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
)
self
.
assertEqual
(
len
(
box_predictions
),
2
)
self
.
assertTrue
(
box_predictor
.
BOX_ENCODINGS
in
box_predictions
)
self
.
assertTrue
(
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
in
box_predictions
)
def
test_value_error_on_predict_keypoints
(
self
):
with
self
.
assertRaises
(
ValueError
):
box_predictor
.
MaskRCNNBoxPredictor
(
is_training
=
False
,
num_classes
=
5
,
fc_hyperparams
=
self
.
_build_arg_scope_with_hyperparams
(),
use_dropout
=
False
,
dropout_keep_prob
=
0.5
,
box_code_size
=
4
,
predict_keypoints
=
True
)
class
RfcnBoxPredictorTest
(
tf
.
test
.
TestCase
):
def
_build_arg_scope_with_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
build
(
conv_hyperparams
,
is_training
=
True
)
def
test_get_correct_box_encoding_and_class_prediction_shapes
(
self
):
image_features
=
tf
.
random_uniform
([
4
,
8
,
8
,
64
],
dtype
=
tf
.
float32
)
proposal_boxes
=
tf
.
random_normal
([
4
,
2
,
4
],
dtype
=
tf
.
float32
)
rfcn_box_predictor
=
box_predictor
.
RfcnBoxPredictor
(
is_training
=
False
,
num_classes
=
2
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
num_spatial_bins
=
[
3
,
3
],
depth
=
4
,
crop_size
=
[
12
,
12
],
box_code_size
=
4
)
box_predictions
=
rfcn_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
,
proposal_boxes
=
proposal_boxes
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
class_predictions_shape
)
=
sess
.
run
(
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
class_predictions_with_background
)])
self
.
assertAllEqual
(
box_encodings_shape
,
[
8
,
1
,
2
,
4
])
self
.
assertAllEqual
(
class_predictions_shape
,
[
8
,
1
,
3
])
class
ConvolutionalBoxPredictorTest
(
tf
.
test
.
TestCase
):
def
_build_arg_scope_with_conv_hyperparams
(
self
):
conv_hyperparams
=
hyperparams_pb2
.
Hyperparams
()
conv_hyperparams_text_proto
=
"""
activation: RELU_6
regularizer {
l2_regularizer {
}
}
initializer {
truncated_normal_initializer {
}
}
"""
text_format
.
Merge
(
conv_hyperparams_text_proto
,
conv_hyperparams
)
return
hyperparams_builder
.
build
(
conv_hyperparams
,
is_training
=
True
)
def
test_get_boxes_for_five_aspect_ratios_per_location
(
self
):
image_features
=
tf
.
random_uniform
([
4
,
8
,
8
,
64
],
dtype
=
tf
.
float32
)
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
5
,
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
objectness_predictions_shape
)
=
sess
.
run
(
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
objectness_predictions
)])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
320
,
1
])
def
test_get_boxes_for_one_aspect_ratio_per_location
(
self
):
image_features
=
tf
.
random_uniform
([
4
,
8
,
8
,
64
],
dtype
=
tf
.
float32
)
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
1
,
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
objectness_predictions_shape
)
=
sess
.
run
(
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
objectness_predictions
)])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
64
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
64
,
1
])
def
test_get_multi_class_predictions_for_five_aspect_ratios_per_location
(
self
):
num_classes_without_background
=
6
image_features
=
tf
.
random_uniform
([
4
,
8
,
8
,
64
],
dtype
=
tf
.
float32
)
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
num_classes_without_background
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
5
,
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
class_predictions_with_background
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
init_op
=
tf
.
global_variables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
class_predictions_with_background_shape
)
=
sess
.
run
([
tf
.
shape
(
box_encodings
),
tf
.
shape
(
class_predictions_with_background
)])
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
320
,
1
,
4
])
self
.
assertAllEqual
(
class_predictions_with_background_shape
,
[
4
,
320
,
num_classes_without_background
+
1
])
def
test_get_boxes_for_five_aspect_ratios_per_location_fully_convolutional
(
self
):
image_features
=
tf
.
placeholder
(
dtype
=
tf
.
float32
,
shape
=
[
4
,
None
,
None
,
64
])
conv_box_predictor
=
box_predictor
.
ConvolutionalBoxPredictor
(
is_training
=
False
,
num_classes
=
0
,
conv_hyperparams
=
self
.
_build_arg_scope_with_conv_hyperparams
(),
min_depth
=
0
,
max_depth
=
32
,
num_layers_before_predictor
=
1
,
use_dropout
=
True
,
dropout_keep_prob
=
0.8
,
kernel_size
=
1
,
box_code_size
=
4
)
box_predictions
=
conv_box_predictor
.
predict
(
image_features
,
num_predictions_per_location
=
5
,
scope
=
'BoxPredictor'
)
box_encodings
=
box_predictions
[
box_predictor
.
BOX_ENCODINGS
]
objectness_predictions
=
box_predictions
[
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
]
init_op
=
tf
.
global_variables_initializer
()
resolution
=
32
expected_num_anchors
=
resolution
*
resolution
*
5
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init_op
)
(
box_encodings_shape
,
objectness_predictions_shape
)
=
sess
.
run
(
[
tf
.
shape
(
box_encodings
),
tf
.
shape
(
objectness_predictions
)],
feed_dict
=
{
image_features
:
np
.
random
.
rand
(
4
,
resolution
,
resolution
,
64
)})
self
.
assertAllEqual
(
box_encodings_shape
,
[
4
,
expected_num_anchors
,
1
,
4
])
self
.
assertAllEqual
(
objectness_predictions_shape
,
[
4
,
expected_num_anchors
,
1
])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/data_decoder.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Interface for data decoders.
Data decoders decode the input data and return a dictionary of tensors keyed by
the entries in core.reader.Fields.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
class
DataDecoder
(
object
):
"""Interface for data decoders."""
__metaclass__
=
ABCMeta
@
abstractmethod
def
decode
(
self
,
data
):
"""Return a single image and associated labels.
Args:
data: a string tensor holding a serialized protocol buffer corresponding
to data for a single image.
Returns:
tensor_dict: a dictionary containing tensors. Possible keys are defined in
reader.Fields.
"""
pass
object_detection/core/keypoint_ops.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Keypoint operations.
Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
where the last dimension holds rank 2 tensors of the form [y, x] representing
the coordinates of the keypoint.
"""
import
numpy
as
np
import
tensorflow
as
tf
def
scale
(
keypoints
,
y_scale
,
x_scale
,
scope
=
None
):
"""Scales keypoint coordinates in x and y dimensions.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'Scale'
):
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
new_keypoints
=
keypoints
*
[[[
y_scale
,
x_scale
]]]
return
new_keypoints
def
clip_to_window
(
keypoints
,
window
,
scope
=
None
):
"""Clips keypoints to a window.
This op clips any input keypoints to a window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window to which the op should clip the keypoints.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'ClipToWindow'
):
y
,
x
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
y
=
tf
.
maximum
(
tf
.
minimum
(
y
,
win_y_max
),
win_y_min
)
x
=
tf
.
maximum
(
tf
.
minimum
(
x
,
win_x_max
),
win_x_min
)
new_keypoints
=
tf
.
concat
([
y
,
x
],
2
)
return
new_keypoints
def
prune_outside_window
(
keypoints
,
window
,
scope
=
None
):
"""Prunes keypoints that fall outside a given window.
This function replaces keypoints that fall outside the given window with nan.
See also clip_to_window which clips any keypoints that fall outside the given
window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window outside of which the op should prune the keypoints.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'PruneOutsideWindow'
):
y
,
x
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
win_y_min
,
win_x_min
,
win_y_max
,
win_x_max
=
tf
.
unstack
(
window
)
valid_indices
=
tf
.
logical_and
(
tf
.
logical_and
(
y
>=
win_y_min
,
y
<=
win_y_max
),
tf
.
logical_and
(
x
>=
win_x_min
,
x
<=
win_x_max
))
new_y
=
tf
.
where
(
valid_indices
,
y
,
np
.
nan
*
tf
.
ones_like
(
y
))
new_x
=
tf
.
where
(
valid_indices
,
x
,
np
.
nan
*
tf
.
ones_like
(
x
))
new_keypoints
=
tf
.
concat
([
new_y
,
new_x
],
2
)
return
new_keypoints
def
change_coordinate_frame
(
keypoints
,
window
,
scope
=
None
):
"""Changes coordinate frame of the keypoints to be relative to window's frame.
Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
coordinates from keypoints of shape [num_instances, num_keypoints, 2]
to be relative to this window.
An example use case is data augmentation: where we are given groundtruth
keypoints and would like to randomly crop the image to some window. In this
case we need to change the coordinate frame of each groundtruth keypoint to be
relative to this new window.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
window we should change the coordinate frame to.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'ChangeCoordinateFrame'
):
win_height
=
window
[
2
]
-
window
[
0
]
win_width
=
window
[
3
]
-
window
[
1
]
new_keypoints
=
scale
(
keypoints
-
[
window
[
0
],
window
[
1
]],
1.0
/
win_height
,
1.0
/
win_width
)
return
new_keypoints
def
to_normalized_coordinates
(
keypoints
,
height
,
width
,
check_range
=
True
,
scope
=
None
):
"""Converts absolute keypoint coordinates to normalized coordinates in [0, 1].
Usually one uses the dynamic shape of the image or conv-layer tensor:
keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
tf.shape(images)[1],
tf.shape(images)[2]),
This function raises an assertion failed error at graph execution time when
the maximum coordinate is smaller than 1.01 (which means that coordinates are
already normalized). The value 1.01 is to deal with small rounding errors.
Args:
keypoints: A tensor of shape [num_instances, num_keypoints, 2].
height: Maximum value for y coordinate of absolute keypoint coordinates.
width: Maximum value for x coordinate of absolute keypoint coordinates.
check_range: If True, checks if the coordinates are normalized.
scope: name scope.
Returns:
tensor of shape [num_instances, num_keypoints, 2] with normalized
coordinates in [0, 1].
"""
with
tf
.
name_scope
(
scope
,
'ToNormalizedCoordinates'
):
height
=
tf
.
cast
(
height
,
tf
.
float32
)
width
=
tf
.
cast
(
width
,
tf
.
float32
)
if
check_range
:
max_val
=
tf
.
reduce_max
(
keypoints
)
max_assert
=
tf
.
Assert
(
tf
.
greater
(
max_val
,
1.01
),
[
'max value is lower than 1.01: '
,
max_val
])
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
return
scale
(
keypoints
,
1.0
/
height
,
1.0
/
width
)
def
to_absolute_coordinates
(
keypoints
,
height
,
width
,
check_range
=
True
,
scope
=
None
):
"""Converts normalized keypoint coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum keypoint
coordinate value is larger than 1.01 (in which case coordinates are already
absolute).
Args:
keypoints: A tensor of shape [num_instances, num_keypoints, 2]
height: Maximum value for y coordinate of absolute keypoint coordinates.
width: Maximum value for x coordinate of absolute keypoint coordinates.
check_range: If True, checks if the coordinates are normalized or not.
scope: name scope.
Returns:
tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
in terms of the image size.
"""
with
tf
.
name_scope
(
scope
,
'ToAbsoluteCoordinates'
):
height
=
tf
.
cast
(
height
,
tf
.
float32
)
width
=
tf
.
cast
(
width
,
tf
.
float32
)
# Ensure range of input keypoints is correct.
if
check_range
:
max_val
=
tf
.
reduce_max
(
keypoints
)
max_assert
=
tf
.
Assert
(
tf
.
greater_equal
(
1.01
,
max_val
),
[
'maximum keypoint coordinate value is larger '
'than 1.01: '
,
max_val
])
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
return
scale
(
keypoints
,
height
,
width
)
def
flip_horizontal
(
keypoints
,
flip_point
,
flip_permutation
,
scope
=
None
):
"""Flips the keypoints horizontally around the flip_point.
This operation flips the x coordinate for each keypoint around the flip_point
and also permutes the keypoints in a manner specified by flip_permutation.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'FlipHorizontal'
):
keypoints
=
tf
.
transpose
(
keypoints
,
[
1
,
0
,
2
])
keypoints
=
tf
.
gather
(
keypoints
,
flip_permutation
)
v
,
u
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
u
=
flip_point
*
2.0
-
u
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
transpose
(
new_keypoints
,
[
1
,
0
,
2
])
return
new_keypoints
object_detection/core/keypoint_ops_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.keypoint_ops."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
keypoint_ops
class
KeypointOpsTest
(
tf
.
test
.
TestCase
):
"""Tests for common keypoint operations."""
def
test_scale
(
self
):
keypoints
=
tf
.
constant
([
[[
0.0
,
0.0
],
[
100.0
,
200.0
]],
[[
50.0
,
120.0
],
[
100.0
,
140.0
]]
])
y_scale
=
tf
.
constant
(
1.0
/
100
)
x_scale
=
tf
.
constant
(
1.0
/
200
)
expected_keypoints
=
tf
.
constant
([
[[
0.
,
0.
],
[
1.0
,
1.0
]],
[[
0.5
,
0.6
],
[
1.0
,
0.7
]]
])
output
=
keypoint_ops
.
scale
(
keypoints
,
y_scale
,
x_scale
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_clip_to_window
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
expected_keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.25
],
[
0.75
,
0.75
]]
])
output
=
keypoint_ops
.
clip_to_window
(
keypoints
,
window
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_prune_outside_window
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
expected_keypoints
=
tf
.
constant
([[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
]]])
output
=
keypoint_ops
.
prune_outside_window
(
keypoints
,
window
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_change_coordinate_frame
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
expected_keypoints
=
tf
.
constant
([
[[
0
,
0.5
],
[
1.0
,
1.0
]],
[[
0.5
,
-
0.5
],
[
1.5
,
1.5
]]
])
output
=
keypoint_ops
.
change_coordinate_frame
(
keypoints
,
window
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_to_normalized_coordinates
(
self
):
keypoints
=
tf
.
constant
([
[[
10.
,
30.
],
[
30.
,
45.
]],
[[
20.
,
0.
],
[
40.
,
60.
]]
])
output
=
keypoint_ops
.
to_normalized_coordinates
(
keypoints
,
40
,
60
)
expected_keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_to_normalized_coordinates_already_normalized
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
output
=
keypoint_ops
.
to_normalized_coordinates
(
keypoints
,
40
,
60
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesOpError
(
'assertion failed'
):
sess
.
run
(
output
)
def
test_to_absolute_coordinates
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
output
=
keypoint_ops
.
to_absolute_coordinates
(
keypoints
,
40
,
60
)
expected_keypoints
=
tf
.
constant
([
[[
10.
,
30.
],
[
30.
,
45.
]],
[[
20.
,
0.
],
[
40.
,
60.
]]
])
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_to_absolute_coordinates_already_absolute
(
self
):
keypoints
=
tf
.
constant
([
[[
10.
,
30.
],
[
30.
,
45.
]],
[[
20.
,
0.
],
[
40.
,
60.
]]
])
output
=
keypoint_ops
.
to_absolute_coordinates
(
keypoints
,
40
,
60
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesOpError
(
'assertion failed'
):
sess
.
run
(
output
)
def
test_flip_horizontal
(
self
):
keypoints
=
tf
.
constant
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
[[
0.4
,
0.4
],
[
0.5
,
0.5
],
[
0.6
,
0.6
]]
])
flip_permutation
=
[
0
,
2
,
1
]
expected_keypoints
=
tf
.
constant
([
[[
0.1
,
0.9
],
[
0.3
,
0.7
],
[
0.2
,
0.8
]],
[[
0.4
,
0.6
],
[
0.6
,
0.4
],
[
0.5
,
0.5
]],
])
output
=
keypoint_ops
.
flip_horizontal
(
keypoints
,
0.5
,
flip_permutation
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/losses.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Classification and regression loss functions for object detection.
Localization losses:
* WeightedL2LocalizationLoss
* WeightedSmoothL1LocalizationLoss
* WeightedIOULocalizationLoss
Classification losses:
* WeightedSigmoidClassificationLoss
* WeightedSoftmaxClassificationLoss
* BootstrappedSigmoidClassificationLoss
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.utils
import
ops
slim
=
tf
.
contrib
.
slim
class
Loss
(
object
):
"""Abstract base class for loss functions."""
__metaclass__
=
ABCMeta
def
__call__
(
self
,
prediction_tensor
,
target_tensor
,
ignore_nan_targets
=
False
,
scope
=
None
,
**
params
):
"""Call the loss function.
Args:
prediction_tensor: a tensor representing predicted quantities.
target_tensor: a tensor representing regression or classification targets.
ignore_nan_targets: whether to ignore nan targets in the loss computation.
E.g. can be used if the target tensor is missing groundtruth data that
shouldn't be factored into the loss.
scope: Op scope name. Defaults to 'Loss' if None.
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: a tensor representing the value of the loss function.
"""
with
tf
.
name_scope
(
scope
,
'Loss'
,
[
prediction_tensor
,
target_tensor
,
params
])
as
scope
:
if
ignore_nan_targets
:
target_tensor
=
tf
.
where
(
tf
.
is_nan
(
target_tensor
),
prediction_tensor
,
target_tensor
)
return
self
.
_compute_loss
(
prediction_tensor
,
target_tensor
,
**
params
)
@
abstractmethod
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
**
params
):
"""Method to be overriden by implementations.
Args:
prediction_tensor: a tensor representing predicted quantities
target_tensor: a tensor representing regression or classification targets
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: a tensor representing the value of the loss function
"""
pass
class
WeightedL2LocalizationLoss
(
Loss
):
"""L2 localization loss function with anchorwise output support.
Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
"""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
weighted_diff
=
(
prediction_tensor
-
target_tensor
)
*
tf
.
expand_dims
(
weights
,
2
)
square_diff
=
0.5
*
tf
.
square
(
weighted_diff
)
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
square_diff
,
2
)
return
tf
.
reduce_sum
(
square_diff
)
class
WeightedSmoothL1LocalizationLoss
(
Loss
):
"""Smooth L1 localization loss function.
The smooth L1_loss is defined elementwise as .5 x^2 if |x|<1 and |x|-.5
otherwise, where x is the difference between predictions and target.
See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
"""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
diff
=
prediction_tensor
-
target_tensor
abs_diff
=
tf
.
abs
(
diff
)
abs_diff_lt_1
=
tf
.
less
(
abs_diff
,
1
)
anchorwise_smooth_l1norm
=
tf
.
reduce_sum
(
tf
.
where
(
abs_diff_lt_1
,
0.5
*
tf
.
square
(
abs_diff
),
abs_diff
-
0.5
),
2
)
*
weights
if
self
.
_anchorwise_output
:
return
anchorwise_smooth_l1norm
return
tf
.
reduce_sum
(
anchorwise_smooth_l1norm
)
class
WeightedIOULocalizationLoss
(
Loss
):
"""IOU localization loss function.
Sums the IOU for corresponding pairs of predicted/groundtruth boxes
and for each pair assign a loss of 1 - IOU. We then compute a weighted
sum over all pairs which is returned as the total loss.
"""
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
representing the decoded predicted boxes
target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
representing the decoded target boxes
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
predicted_boxes
=
box_list
.
BoxList
(
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
4
]))
target_boxes
=
box_list
.
BoxList
(
tf
.
reshape
(
target_tensor
,
[
-
1
,
4
]))
per_anchor_iou_loss
=
1.0
-
box_list_ops
.
matched_iou
(
predicted_boxes
,
target_boxes
)
return
tf
.
reduce_sum
(
tf
.
reshape
(
weights
,
[
-
1
])
*
per_anchor_iou_loss
)
class
WeightedSigmoidClassificationLoss
(
Loss
):
"""Sigmoid cross entropy classification loss function."""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
,
class_indices
=
None
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
class_indices: (Optional) A 1-D integer tensor of class indices.
If provided, computes loss only for the specified class indices.
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
weights
=
tf
.
expand_dims
(
weights
,
2
)
if
class_indices
is
not
None
:
weights
*=
tf
.
reshape
(
ops
.
indices_to_dense_vector
(
class_indices
,
tf
.
shape
(
prediction_tensor
)[
2
]),
[
1
,
1
,
-
1
])
per_entry_cross_ent
=
(
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
target_tensor
,
logits
=
prediction_tensor
))
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
,
2
)
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
)
class
WeightedSoftmaxClassificationLoss
(
Loss
):
"""Softmax loss function."""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Whether to output loss per anchor (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
num_classes
=
prediction_tensor
.
get_shape
().
as_list
()[
-
1
]
per_row_cross_ent
=
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
tf
.
reshape
(
target_tensor
,
[
-
1
,
num_classes
]),
logits
=
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
num_classes
])))
if
self
.
_anchorwise_output
:
return
tf
.
reshape
(
per_row_cross_ent
,
tf
.
shape
(
weights
))
*
weights
return
tf
.
reduce_sum
(
per_row_cross_ent
*
tf
.
reshape
(
weights
,
[
-
1
]))
class
BootstrappedSigmoidClassificationLoss
(
Loss
):
"""Bootstrapped sigmoid cross entropy classification loss function.
This loss uses a convex combination of training labels and the current model's
predictions as training targets in the classification loss. The idea is that
as the model improves over time, its predictions can be trusted more and we
can use these predictions to mitigate the damage of noisy/incorrect labels,
because incorrect labels are likely to be eventually highly inconsistent with
other stimuli predicted to have the same label by the model.
In "soft" bootstrapping, we use all predicted class probabilities, whereas in
"hard" bootstrapping, we use the single class favored by the model.
See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
Reed et al. (ICLR 2015).
"""
def
__init__
(
self
,
alpha
,
bootstrap_type
=
'soft'
,
anchorwise_output
=
False
):
"""Constructor.
Args:
alpha: a float32 scalar tensor between 0 and 1 representing interpolation
weight
bootstrap_type: set to either 'hard' or 'soft' (default)
anchorwise_output: Outputs loss per anchor. (default False)
Raises:
ValueError: if bootstrap_type is not either 'hard' or 'soft'
"""
if
bootstrap_type
!=
'hard'
and
bootstrap_type
!=
'soft'
:
raise
ValueError
(
'Unrecognized bootstrap_type: must be one of '
'
\'
hard
\'
or
\'
soft.
\'
'
)
self
.
_alpha
=
alpha
self
.
_bootstrap_type
=
bootstrap_type
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
if
self
.
_bootstrap_type
==
'soft'
:
bootstrap_target_tensor
=
self
.
_alpha
*
target_tensor
+
(
1.0
-
self
.
_alpha
)
*
tf
.
sigmoid
(
prediction_tensor
)
else
:
bootstrap_target_tensor
=
self
.
_alpha
*
target_tensor
+
(
1.0
-
self
.
_alpha
)
*
tf
.
cast
(
tf
.
sigmoid
(
prediction_tensor
)
>
0.5
,
tf
.
float32
)
per_entry_cross_ent
=
(
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
bootstrap_target_tensor
,
logits
=
prediction_tensor
))
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
tf
.
expand_dims
(
weights
,
2
),
2
)
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
tf
.
expand_dims
(
weights
,
2
))
class
HardExampleMiner
(
object
):
"""Hard example mining for regions in a list of images.
Implements hard example mining to select a subset of regions to be
back-propagated. For each image, selects the regions with highest losses,
subject to the condition that a newly selected region cannot have
an IOU > iou_threshold with any of the previously selected regions.
This can be achieved by re-using a greedy non-maximum suppression algorithm.
A constraint on the number of negatives mined per positive region can also be
enforced.
Reference papers: "Training Region-based Object Detectors with Online
Hard Example Mining" (CVPR 2016) by Srivastava et al., and
"SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
"""
def
__init__
(
self
,
num_hard_examples
=
64
,
iou_threshold
=
0.7
,
loss_type
=
'both'
,
cls_loss_weight
=
0.05
,
loc_loss_weight
=
0.06
,
max_negatives_per_positive
=
None
,
min_negatives_per_image
=
0
):
"""Constructor.
The hard example mining implemented by this class can replicate the behavior
in the two aforementioned papers (Srivastava et al., and Liu et al).
To replicate the A2 paper (Srivastava et al), num_hard_examples is set
to a fixed parameter (64 by default) and iou_threshold is set to .7 for
running non-max-suppression the predicted boxes prior to hard mining.
In order to replicate the SSD paper (Liu et al), num_hard_examples should
be set to None, max_negatives_per_positive should be 3 and iou_threshold
should be 1.0 (in order to effectively turn off NMS).
Args:
num_hard_examples: maximum number of hard examples to be
selected per image (prior to enforcing max negative to positive ratio
constraint). If set to None, all examples obtained after NMS are
considered.
iou_threshold: minimum intersection over union for an example
to be discarded during NMS.
loss_type: use only classification losses ('cls', default),
localization losses ('loc') or both losses ('both').
In the last case, cls_loss_weight and loc_loss_weight are used to
compute weighted sum of the two losses.
cls_loss_weight: weight for classification loss.
loc_loss_weight: weight for location loss.
max_negatives_per_positive: maximum number of negatives to retain for
each positive anchor. By default, num_negatives_per_positive is None,
which means that we do not enforce a prespecified negative:positive
ratio. Note also that num_negatives_per_positives can be a float
(and will be converted to be a float even if it is passed in otherwise).
min_negatives_per_image: minimum number of negative anchors to sample for
a given image. Setting this to a positive number allows sampling
negatives in an image without any positive anchors and thus not biased
towards at least one detection per image.
"""
self
.
_num_hard_examples
=
num_hard_examples
self
.
_iou_threshold
=
iou_threshold
self
.
_loss_type
=
loss_type
self
.
_cls_loss_weight
=
cls_loss_weight
self
.
_loc_loss_weight
=
loc_loss_weight
self
.
_max_negatives_per_positive
=
max_negatives_per_positive
self
.
_min_negatives_per_image
=
min_negatives_per_image
if
self
.
_max_negatives_per_positive
is
not
None
:
self
.
_max_negatives_per_positive
=
float
(
self
.
_max_negatives_per_positive
)
self
.
_num_positives_list
=
None
self
.
_num_negatives_list
=
None
def
__call__
(
self
,
location_losses
,
cls_losses
,
decoded_boxlist_list
,
match_list
=
None
):
"""Computes localization and classification losses after hard mining.
Args:
location_losses: a float tensor of shape [num_images, num_anchors]
representing anchorwise localization losses.
cls_losses: a float tensor of shape [num_images, num_anchors]
representing anchorwise classification losses.
decoded_boxlist_list: a list of decoded BoxList representing location
predictions for each image.
match_list: an optional list of matcher.Match objects encoding the match
between anchors and groundtruth boxes for each image of the batch,
with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors. Match objects in match_list are
used to reference which anchors are positive, negative or ignored. If
self._max_negatives_per_positive exists, these are then used to enforce
a prespecified negative to positive ratio.
Returns:
mined_location_loss: a float scalar with sum of localization losses from
selected hard examples.
mined_cls_loss: a float scalar with sum of classification losses from
selected hard examples.
Raises:
ValueError: if location_losses, cls_losses and decoded_boxlist_list do
not have compatible shapes (i.e., they must correspond to the same
number of images).
ValueError: if match_list is specified but its length does not match
len(decoded_boxlist_list).
"""
mined_location_losses
=
[]
mined_cls_losses
=
[]
location_losses
=
tf
.
unstack
(
location_losses
)
cls_losses
=
tf
.
unstack
(
cls_losses
)
num_images
=
len
(
decoded_boxlist_list
)
if
not
match_list
:
match_list
=
num_images
*
[
None
]
if
not
len
(
location_losses
)
==
len
(
decoded_boxlist_list
)
==
len
(
cls_losses
):
raise
ValueError
(
'location_losses, cls_losses and decoded_boxlist_list '
'do not have compatible shapes.'
)
if
not
isinstance
(
match_list
,
list
):
raise
ValueError
(
'match_list must be a list.'
)
if
len
(
match_list
)
!=
len
(
decoded_boxlist_list
):
raise
ValueError
(
'match_list must either be None or have '
'length=len(decoded_boxlist_list).'
)
num_positives_list
=
[]
num_negatives_list
=
[]
for
ind
,
detection_boxlist
in
enumerate
(
decoded_boxlist_list
):
box_locations
=
detection_boxlist
.
get
()
match
=
match_list
[
ind
]
image_losses
=
cls_losses
[
ind
]
if
self
.
_loss_type
==
'loc'
:
image_losses
=
location_losses
[
ind
]
elif
self
.
_loss_type
==
'both'
:
image_losses
*=
self
.
_cls_loss_weight
image_losses
+=
location_losses
[
ind
]
*
self
.
_loc_loss_weight
if
self
.
_num_hard_examples
is
not
None
:
num_hard_examples
=
self
.
_num_hard_examples
else
:
num_hard_examples
=
detection_boxlist
.
num_boxes
()
selected_indices
=
tf
.
image
.
non_max_suppression
(
box_locations
,
image_losses
,
num_hard_examples
,
self
.
_iou_threshold
)
if
self
.
_max_negatives_per_positive
is
not
None
and
match
:
(
selected_indices
,
num_positives
,
num_negatives
)
=
self
.
_subsample_selection_to_desired_neg_pos_ratio
(
selected_indices
,
match
,
self
.
_max_negatives_per_positive
,
self
.
_min_negatives_per_image
)
num_positives_list
.
append
(
num_positives
)
num_negatives_list
.
append
(
num_negatives
)
mined_location_losses
.
append
(
tf
.
reduce_sum
(
tf
.
gather
(
location_losses
[
ind
],
selected_indices
)))
mined_cls_losses
.
append
(
tf
.
reduce_sum
(
tf
.
gather
(
cls_losses
[
ind
],
selected_indices
)))
location_loss
=
tf
.
reduce_sum
(
tf
.
stack
(
mined_location_losses
))
cls_loss
=
tf
.
reduce_sum
(
tf
.
stack
(
mined_cls_losses
))
if
match
and
self
.
_max_negatives_per_positive
:
self
.
_num_positives_list
=
num_positives_list
self
.
_num_negatives_list
=
num_negatives_list
return
(
location_loss
,
cls_loss
)
def
summarize
(
self
):
"""Summarize the number of positives and negatives after mining."""
if
self
.
_num_positives_list
and
self
.
_num_negatives_list
:
avg_num_positives
=
tf
.
reduce_mean
(
tf
.
to_float
(
self
.
_num_positives_list
))
avg_num_negatives
=
tf
.
reduce_mean
(
tf
.
to_float
(
self
.
_num_negatives_list
))
tf
.
summary
.
scalar
(
'HardExampleMiner/NumPositives'
,
avg_num_positives
)
tf
.
summary
.
scalar
(
'HardExampleMiner/NumNegatives'
,
avg_num_negatives
)
def
_subsample_selection_to_desired_neg_pos_ratio
(
self
,
indices
,
match
,
max_negatives_per_positive
,
min_negatives_per_image
=
0
):
"""Subsample a collection of selected indices to a desired neg:pos ratio.
This function takes a subset of M indices (indexing into a large anchor
collection of N anchors where M<N) which are labeled as positive/negative
via a Match object (matched indices are positive, unmatched indices
are negative). It returns a subset of the provided indices retaining all
positives as well as up to the first K negatives, where:
K=floor(num_negative_per_positive * num_positives).
For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors),
with positives=[2, 5] and negatives=[4, 7, 9, 10] and
num_negatives_per_positive=1, then the returned subset of indices
is [2, 4, 5, 7].
Args:
indices: An integer tensor of shape [M] representing a collection
of selected anchor indices
match: A matcher.Match object encoding the match between anchors and
groundtruth boxes for a given image, with rows of the Match objects
corresponding to groundtruth boxes and columns corresponding to anchors.
max_negatives_per_positive: (float) maximum number of negatives for
each positive anchor.
min_negatives_per_image: minimum number of negative anchors for a given
image. Allow sampling negatives in image without any positive anchors.
Returns:
selected_indices: An integer tensor of shape [M'] representing a
collection of selected anchor indices with M' <= M.
num_positives: An integer tensor representing the number of positive
examples in selected set of indices.
num_negatives: An integer tensor representing the number of negative
examples in selected set of indices.
"""
positives_indicator
=
tf
.
gather
(
match
.
matched_column_indicator
(),
indices
)
negatives_indicator
=
tf
.
gather
(
match
.
unmatched_column_indicator
(),
indices
)
num_positives
=
tf
.
reduce_sum
(
tf
.
to_int32
(
positives_indicator
))
max_negatives
=
tf
.
maximum
(
min_negatives_per_image
,
tf
.
to_int32
(
max_negatives_per_positive
*
tf
.
to_float
(
num_positives
)))
topk_negatives_indicator
=
tf
.
less_equal
(
tf
.
cumsum
(
tf
.
to_int32
(
negatives_indicator
)),
max_negatives
)
subsampled_selection_indices
=
tf
.
where
(
tf
.
logical_or
(
positives_indicator
,
topk_negatives_indicator
))
num_negatives
=
tf
.
size
(
subsampled_selection_indices
)
-
num_positives
return
(
tf
.
reshape
(
tf
.
gather
(
indices
,
subsampled_selection_indices
),
[
-
1
]),
num_positives
,
num_negatives
)
object_detection/core/losses_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.research.vale.object_detection.losses."""
import
math
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
losses
from
object_detection.core
import
matcher
class
WeightedL2LocalizationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
batch_size
=
3
num_anchors
=
10
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
],
[
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
],
[
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedL2LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
expected_loss
=
(
3
*
5
*
4
)
/
2.0
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss
)
def
testReturnsCorrectAnchorwiseLoss
(
self
):
batch_size
=
3
num_anchors
=
16
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
ones
([
batch_size
,
num_anchors
])
loss_op
=
losses
.
WeightedL2LocalizationLoss
(
anchorwise_output
=
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
expected_loss
=
np
.
ones
((
batch_size
,
num_anchors
))
*
2
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss
)
def
testReturnsCorrectLossSum
(
self
):
batch_size
=
3
num_anchors
=
16
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
ones
([
batch_size
,
num_anchors
])
loss_op
=
losses
.
WeightedL2LocalizationLoss
(
anchorwise_output
=
False
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
expected_loss
=
tf
.
nn
.
l2_loss
(
prediction_tensor
-
target_tensor
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
expected_loss_output
=
sess
.
run
(
expected_loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss_output
)
def
testReturnsCorrectNanLoss
(
self
):
batch_size
=
3
num_anchors
=
10
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
concat
([
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
/
2
]),
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
/
2
])
*
np
.
nan
],
axis
=
2
)
weights
=
tf
.
ones
([
batch_size
,
num_anchors
])
loss_op
=
losses
.
WeightedL2LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
,
ignore_nan_targets
=
True
)
expected_loss
=
(
3
*
5
*
4
)
/
2.0
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss
)
class
WeightedSmoothL1LocalizationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
batch_size
=
2
num_anchors
=
3
code_size
=
4
prediction_tensor
=
tf
.
constant
([[[
2.5
,
0
,
.
4
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
2.5
,
0
,
.
4
]],
[[
3.5
,
0
,
0
,
0
],
[
0
,
.
4
,
0
,
.
9
],
[
0
,
0
,
1.5
,
0
]]],
tf
.
float32
)
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
constant
([[
2
,
1
,
1
],
[
0
,
3
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSmoothL1LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
7.695
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
WeightedIOULocalizationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
1.5
,
0
,
2.4
,
1
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
.
5
,
.
25
]]])
target_tensor
=
tf
.
constant
([[[
1.5
,
0
,
2.4
,
1
],
[
0
,
0
,
1
,
1
],
[
50
,
50
,
500.5
,
100.25
]]])
weights
=
[[
1.0
,
.
5
,
2.0
]]
loss_op
=
losses
.
WeightedIOULocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
2.0
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
WeightedSigmoidClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
-
2
*
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectLossWithClassIndices
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
,
100
],
[
100
,
-
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
,
100
],
[
-
100
,
-
100
,
100
,
-
100
]],
[[
-
100
,
0
,
100
,
100
],
[
-
100
,
100
,
-
100
,
100
],
[
100
,
100
,
100
,
100
],
[
0
,
0
,
-
1
,
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
,
0
],
[
1
,
0
,
0
,
1
],
[
1
,
0
,
0
,
0
],
[
0
,
0
,
1
,
1
]],
[[
0
,
0
,
1
,
0
],
[
0
,
1
,
0
,
0
],
[
1
,
1
,
1
,
0
],
[
1
,
0
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
# Ignores the last class.
class_indices
=
tf
.
constant
([
0
,
1
,
2
],
tf
.
int32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
,
class_indices
=
class_indices
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
WeightedSoftmaxClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
0
,
1
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
.
5
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
-
1.5
*
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
0
,
1
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
.
5
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
(
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
0.5
*
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
BootstrappedSigmoidClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLossSoftBootstrapping
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
0
],
[
100
,
-
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
-
100
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
alpha
=
tf
.
constant
(.
5
,
tf
.
float32
)
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'soft'
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
-
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectLossHardBootstrapping
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
0
],
[
100
,
-
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
-
100
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
alpha
=
tf
.
constant
(.
5
,
tf
.
float32
)
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'hard'
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
-
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
alpha
=
tf
.
constant
(.
5
,
tf
.
float32
)
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'hard'
,
anchorwise_output
=
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
HardExampleMinerTest
(
tf
.
test
.
TestCase
):
def
testHardMiningWithSingleLossType
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
],
[
0
,
1
,
2
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
10
,
50
,
110
],
[
9
,
6
,
3
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
]],
tf
.
float32
)
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
# Uses only location loss to select hard examples
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
1
,
iou_threshold
=
0.0
,
loss_type
=
'loc'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
)
exp_loc_loss
=
100
+
3
exp_cls_loss
=
0
+
0
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
def
testHardMiningWithBothLossType
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
],
[
0
,
1
,
2
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
10
,
50
,
110
],
[
9
,
6
,
3
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
]],
tf
.
float32
)
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
1
,
iou_threshold
=
0.0
,
loss_type
=
'both'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
)
exp_loc_loss
=
80
+
0
exp_cls_loss
=
50
+
9
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
def
testHardMiningNMS
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
],
[
0
,
1
,
2
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
10
,
50
,
110
],
[
9
,
6
,
3
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.9
,
0.9
,
0.99
,
0.99
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
]],
tf
.
float32
)
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
2
,
iou_threshold
=
0.5
,
loss_type
=
'cls'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
)
exp_loc_loss
=
0
+
90
+
0
+
1
exp_cls_loss
=
110
+
10
+
9
+
6
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
def
testEnforceNegativesPerPositiveRatio
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
,
1
,
2
,
3
,
10
,
20
,
100
,
20
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
0
,
100
,
0
,
90
,
70
,
0
,
60
,
0
,
17
,
13
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.5
,
0.1
],
[
0.0
,
0.0
,
0.6
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.8
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
1.0
,
0.1
],
[
0.0
,
0.0
,
1.1
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
]],
tf
.
float32
)
match_results
=
tf
.
constant
([
2
,
-
1
,
0
,
-
1
,
-
1
,
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
3
])
match_list
=
[
matcher
.
Match
(
match_results
)]
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
max_negatives_per_positive_list
=
[
0.0
,
0.5
,
1.0
,
1.5
,
10
]
exp_loc_loss_list
=
[
80
+
2
,
80
+
1
+
2
,
80
+
1
+
2
+
10
,
80
+
1
+
2
+
10
+
100
,
80
+
1
+
2
+
10
+
100
+
20
]
exp_cls_loss_list
=
[
100
+
70
,
100
+
90
+
70
,
100
+
90
+
70
+
60
,
100
+
90
+
70
+
60
+
17
,
100
+
90
+
70
+
60
+
17
+
13
]
for
max_negatives_per_positive
,
exp_loc_loss
,
exp_cls_loss
in
zip
(
max_negatives_per_positive_list
,
exp_loc_loss_list
,
exp_cls_loss_list
):
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
None
,
iou_threshold
=
0.9999
,
loss_type
=
'cls'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
,
max_negatives_per_positive
=
max_negatives_per_positive
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
,
match_list
)
loss_op
.
summarize
()
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
def
testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
,
1
,
2
,
3
,
10
,
20
,
100
,
20
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
0
,
100
,
0
,
90
,
70
,
0
,
60
,
0
,
17
,
13
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.5
,
0.1
],
[
0.0
,
0.0
,
0.6
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.8
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
1.0
,
0.1
],
[
0.0
,
0.0
,
1.1
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
]],
tf
.
float32
)
match_results
=
tf
.
constant
([
-
1
]
*
12
)
match_list
=
[
matcher
.
Match
(
match_results
)]
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
min_negatives_per_image_list
=
[
0
,
1
,
2
,
4
,
5
,
6
]
exp_loc_loss_list
=
[
0
,
80
,
80
+
1
,
80
+
1
+
2
+
10
,
80
+
1
+
2
+
10
+
100
,
80
+
1
+
2
+
10
+
100
+
20
]
exp_cls_loss_list
=
[
0
,
100
,
100
+
90
,
100
+
90
+
70
+
60
,
100
+
90
+
70
+
60
+
17
,
100
+
90
+
70
+
60
+
17
+
13
]
for
min_negatives_per_image
,
exp_loc_loss
,
exp_cls_loss
in
zip
(
min_negatives_per_image_list
,
exp_loc_loss_list
,
exp_cls_loss_list
):
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
None
,
iou_threshold
=
0.9999
,
loss_type
=
'cls'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
,
max_negatives_per_positive
=
3
,
min_negatives_per_image
=
min_negatives_per_image
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
,
match_list
)
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/matcher.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the
matcher is to match row and column indices based on the similarity matrix and
other optional parameters. Each column is matched to at most one row. There
are three possibilities for the matching:
1) match: A column matches a row.
2) no_match: A column does not match any row.
3) ignore: A column that is neither 'match' nor no_match.
The ignore case is regularly encountered in object detection: when an anchor has
a relatively small overlap with a ground-truth box, one neither wants to
consider this box a positive example (match) nor a negative example (no match).
The Match class is used to store the match results and it provides simple apis
to query the results.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
class
Match
(
object
):
"""Class to store results from the matcher.
This class is used to store the results from the matcher. It provides
convenient methods to query the matching results.
"""
def
__init__
(
self
,
match_results
):
"""Constructs a Match object.
Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
Raises:
ValueError: if match_results does not have rank 1 or is not an
integer int32 scalar tensor
"""
if
match_results
.
shape
.
ndims
!=
1
:
raise
ValueError
(
'match_results should have rank 1'
)
if
match_results
.
dtype
!=
tf
.
int32
:
raise
ValueError
(
'match_results should be an int32 or int64 scalar '
'tensor'
)
self
.
_match_results
=
match_results
@
property
def
match_results
(
self
):
"""The accessor for match results.
Returns:
the tensor which encodes the match results.
"""
return
self
.
_match_results
def
matched_column_indices
(
self
):
"""Returns column indices that match to some row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
greater
(
self
.
_match_results
,
-
1
)))
def
matched_column_indicator
(
self
):
"""Returns column indices that are matched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
tf
.
greater_equal
(
self
.
_match_results
,
0
)
def
num_matched_columns
(
self
):
"""Returns number (int32 scalar tensor) of matched columns."""
return
tf
.
size
(
self
.
matched_column_indices
())
def
unmatched_column_indices
(
self
):
"""Returns column indices that do not match any row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
equal
(
self
.
_match_results
,
-
1
)))
def
unmatched_column_indicator
(
self
):
"""Returns column indices that are unmatched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
tf
.
equal
(
self
.
_match_results
,
-
1
)
def
num_unmatched_columns
(
self
):
"""Returns number (int32 scalar tensor) of unmatched columns."""
return
tf
.
size
(
self
.
unmatched_column_indices
())
def
ignored_column_indices
(
self
):
"""Returns column indices that are ignored (neither Matched nor Unmatched).
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
self
.
ignored_column_indicator
()))
def
ignored_column_indicator
(
self
):
"""Returns boolean column indicator where True means the colum is ignored.
Returns:
column_indicator: boolean vector which is True for all ignored column
indices.
"""
return
tf
.
equal
(
self
.
_match_results
,
-
2
)
def
num_ignored_columns
(
self
):
"""Returns number (int32 scalar tensor) of matched columns."""
return
tf
.
size
(
self
.
ignored_column_indices
())
def
unmatched_or_ignored_column_indices
(
self
):
"""Returns column indices that are unmatched or ignored.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
greater
(
0
,
self
.
_match_results
)))
def
matched_row_indices
(
self
):
"""Returns row indices that match some column.
The indices returned by this op are ordered so as to be in correspondence
with the output of matched_column_indicator(). For example if
self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
[7, 3], then we know that column 0 was matched to row 7 and column 2 was
matched to row 3.
Returns:
row_indices: int32 tensor of shape [K] with row indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
gather
(
self
.
_match_results
,
self
.
matched_column_indices
()))
def
_reshape_and_cast
(
self
,
t
):
return
tf
.
cast
(
tf
.
reshape
(
t
,
[
-
1
]),
tf
.
int32
)
class
Matcher
(
object
):
"""Abstract base class for matcher.
"""
__metaclass__
=
ABCMeta
def
match
(
self
,
similarity_matrix
,
scope
=
None
,
**
params
):
"""Computes matches among row and column indices and returns the result.
Computes matches among the row and column indices based on the similarity
matrix and optional arguments.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
A Match object with the results of matching.
"""
with
tf
.
name_scope
(
scope
,
'Match'
,
[
similarity_matrix
,
params
])
as
scope
:
return
Match
(
self
.
_match
(
similarity_matrix
,
**
params
))
@
abstractmethod
def
_match
(
self
,
similarity_matrix
,
**
params
):
"""Method to be overriden by implementations.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
that column i is matched to row match_results[i], match_results[i]=-1
means that the column is not matched. match_results[i]=-2 means that
the column is ignored (usually this happens when there is a very weak
match which one neither wants as positive nor negative example).
"""
pass
object_detection/core/matcher_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.matcher."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
matcher
class
AnchorMatcherTest
(
tf
.
test
.
TestCase
):
def
test_get_correct_matched_columnIndices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indices
=
[
0
,
1
,
3
,
5
]
matched_column_indices
=
match
.
matched_column_indices
()
self
.
assertEquals
(
matched_column_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
matched_column_indices
=
sess
.
run
(
matched_column_indices
)
self
.
assertAllEqual
(
matched_column_indices
,
expected_column_indices
)
def
test_get_correct_counts
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
exp_num_matched_columns
=
4
exp_num_unmatched_columns
=
2
exp_num_ignored_columns
=
1
num_matched_columns
=
match
.
num_matched_columns
()
num_unmatched_columns
=
match
.
num_unmatched_columns
()
num_ignored_columns
=
match
.
num_ignored_columns
()
self
.
assertEquals
(
num_matched_columns
.
dtype
,
tf
.
int32
)
self
.
assertEquals
(
num_unmatched_columns
.
dtype
,
tf
.
int32
)
self
.
assertEquals
(
num_ignored_columns
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
(
num_matched_columns_out
,
num_unmatched_columns_out
,
num_ignored_columns_out
)
=
sess
.
run
(
[
num_matched_columns
,
num_unmatched_columns
,
num_ignored_columns
])
self
.
assertAllEqual
(
num_matched_columns_out
,
exp_num_matched_columns
)
self
.
assertAllEqual
(
num_unmatched_columns_out
,
exp_num_unmatched_columns
)
self
.
assertAllEqual
(
num_ignored_columns_out
,
exp_num_ignored_columns
)
def
testGetCorrectUnmatchedColumnIndices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indices
=
[
2
,
4
]
unmatched_column_indices
=
match
.
unmatched_column_indices
()
self
.
assertEquals
(
unmatched_column_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
unmatched_column_indices
=
sess
.
run
(
unmatched_column_indices
)
self
.
assertAllEqual
(
unmatched_column_indices
,
expected_column_indices
)
def
testGetCorrectMatchedRowIndices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_row_indices
=
[
3
,
1
,
0
,
5
]
matched_row_indices
=
match
.
matched_row_indices
()
self
.
assertEquals
(
matched_row_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
matched_row_inds
=
sess
.
run
(
matched_row_indices
)
self
.
assertAllEqual
(
matched_row_inds
,
expected_row_indices
)
def
test_get_correct_ignored_column_indices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indices
=
[
6
]
ignored_column_indices
=
match
.
ignored_column_indices
()
self
.
assertEquals
(
ignored_column_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
ignored_column_indices
=
sess
.
run
(
ignored_column_indices
)
self
.
assertAllEqual
(
ignored_column_indices
,
expected_column_indices
)
def
test_get_correct_matched_column_indicator
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indicator
=
[
True
,
True
,
False
,
True
,
False
,
True
,
False
]
matched_column_indicator
=
match
.
matched_column_indicator
()
self
.
assertEquals
(
matched_column_indicator
.
dtype
,
tf
.
bool
)
with
self
.
test_session
()
as
sess
:
matched_column_indicator
=
sess
.
run
(
matched_column_indicator
)
self
.
assertAllEqual
(
matched_column_indicator
,
expected_column_indicator
)
def
test_get_correct_unmatched_column_indicator
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indicator
=
[
False
,
False
,
True
,
False
,
True
,
False
,
False
]
unmatched_column_indicator
=
match
.
unmatched_column_indicator
()
self
.
assertEquals
(
unmatched_column_indicator
.
dtype
,
tf
.
bool
)
with
self
.
test_session
()
as
sess
:
unmatched_column_indicator
=
sess
.
run
(
unmatched_column_indicator
)
self
.
assertAllEqual
(
unmatched_column_indicator
,
expected_column_indicator
)
def
test_get_correct_ignored_column_indicator
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indicator
=
[
False
,
False
,
False
,
False
,
False
,
False
,
True
]
ignored_column_indicator
=
match
.
ignored_column_indicator
()
self
.
assertEquals
(
ignored_column_indicator
.
dtype
,
tf
.
bool
)
with
self
.
test_session
()
as
sess
:
ignored_column_indicator
=
sess
.
run
(
ignored_column_indicator
)
self
.
assertAllEqual
(
ignored_column_indicator
,
expected_column_indicator
)
def
test_get_correct_unmatched_ignored_column_indices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indices
=
[
2
,
4
,
6
]
unmatched_ignored_column_indices
=
(
match
.
unmatched_or_ignored_column_indices
())
self
.
assertEquals
(
unmatched_ignored_column_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
unmatched_ignored_column_indices
=
sess
.
run
(
unmatched_ignored_column_indices
)
self
.
assertAllEqual
(
unmatched_ignored_column_indices
,
expected_column_indices
)
def
test_all_columns_accounted_for
(
self
):
# Note: deliberately setting to small number so not always
# all possibilities appear (matched, unmatched, ignored)
num_matches
=
10
match_results
=
tf
.
random_uniform
(
[
num_matches
],
minval
=-
2
,
maxval
=
5
,
dtype
=
tf
.
int32
)
match
=
matcher
.
Match
(
match_results
)
matched_column_indices
=
match
.
matched_column_indices
()
unmatched_column_indices
=
match
.
unmatched_column_indices
()
ignored_column_indices
=
match
.
ignored_column_indices
()
with
self
.
test_session
()
as
sess
:
matched
,
unmatched
,
ignored
=
sess
.
run
([
matched_column_indices
,
unmatched_column_indices
,
ignored_column_indices
])
all_indices
=
np
.
hstack
((
matched
,
unmatched
,
ignored
))
all_indices_sorted
=
np
.
sort
(
all_indices
)
self
.
assertAllEqual
(
all_indices_sorted
,
np
.
arange
(
num_matches
,
dtype
=
np
.
int32
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/minibatch_sampler.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base minibatch sampler module.
The job of the minibatch_sampler is to subsample a minibatch based on some
criterion.
The main function call is:
subsample(indicator, batch_size, **params).
Indicator is a 1d boolean tensor where True denotes which examples can be
sampled. It returns a boolean indicator where True denotes an example has been
sampled..
Subclasses should implement the Subsample function and can make use of the
@staticmethod SubsampleIndicator.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
from
object_detection.utils
import
ops
class
MinibatchSampler
(
object
):
"""Abstract base class for subsampling minibatches."""
__metaclass__
=
ABCMeta
def
__init__
(
self
):
"""Constructs a minibatch sampler."""
pass
@
abstractmethod
def
subsample
(
self
,
indicator
,
batch_size
,
**
params
):
"""Returns subsample of entries in indicator.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
**params: additional keyword arguments for specific implementations of
the MinibatchSampler.
Returns:
sample_indicator: boolean tensor of shape [N] whose True entries have been
sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
"""
pass
@
staticmethod
def
subsample_indicator
(
indicator
,
num_samples
):
"""Subsample indicator vector.
Given a boolean indicator vector with M elements set to `True`, the function
assigns all but `num_samples` of these previously `True` elements to
`False`. If `num_samples` is greater than M, the original indicator vector
is returned.
Args:
indicator: a 1-dimensional boolean tensor indicating which elements
are allowed to be sampled and which are not.
num_samples: int32 scalar tensor
Returns:
a boolean tensor with the same shape as input (indicator) tensor
"""
indices
=
tf
.
where
(
indicator
)
indices
=
tf
.
random_shuffle
(
indices
)
indices
=
tf
.
reshape
(
indices
,
[
-
1
])
num_samples
=
tf
.
minimum
(
tf
.
size
(
indices
),
num_samples
)
selected_indices
=
tf
.
slice
(
indices
,
[
0
],
tf
.
reshape
(
num_samples
,
[
1
]))
selected_indicator
=
ops
.
indices_to_dense_vector
(
selected_indices
,
tf
.
shape
(
indicator
)[
0
])
return
tf
.
equal
(
selected_indicator
,
1
)
object_detection/core/minibatch_sampler_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
minibatch_sampler
class
MinibatchSamplerTest
(
tf
.
test
.
TestCase
):
def
test_subsample_indicator_when_more_true_elements_than_num_samples
(
self
):
np_indicator
=
[
True
,
False
,
True
,
False
,
True
,
True
,
False
]
indicator
=
tf
.
constant
(
np_indicator
)
samples
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator
,
3
)
with
self
.
test_session
()
as
sess
:
samples_out
=
sess
.
run
(
samples
)
self
.
assertTrue
(
np
.
sum
(
samples_out
),
3
)
self
.
assertAllEqual
(
samples_out
,
np
.
logical_and
(
samples_out
,
np_indicator
))
def
test_subsample_when_more_true_elements_than_num_samples_no_shape
(
self
):
np_indicator
=
[
True
,
False
,
True
,
False
,
True
,
True
,
False
]
indicator
=
tf
.
placeholder
(
tf
.
bool
)
feed_dict
=
{
indicator
:
np_indicator
}
samples
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator
,
3
)
with
self
.
test_session
()
as
sess
:
samples_out
=
sess
.
run
(
samples
,
feed_dict
=
feed_dict
)
self
.
assertTrue
(
np
.
sum
(
samples_out
),
3
)
self
.
assertAllEqual
(
samples_out
,
np
.
logical_and
(
samples_out
,
np_indicator
))
def
test_subsample_indicator_when_less_true_elements_than_num_samples
(
self
):
np_indicator
=
[
True
,
False
,
True
,
False
,
True
,
True
,
False
]
indicator
=
tf
.
constant
(
np_indicator
)
samples
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator
,
5
)
with
self
.
test_session
()
as
sess
:
samples_out
=
sess
.
run
(
samples
)
self
.
assertTrue
(
np
.
sum
(
samples_out
),
4
)
self
.
assertAllEqual
(
samples_out
,
np
.
logical_and
(
samples_out
,
np_indicator
))
def
test_subsample_indicator_when_num_samples_is_zero
(
self
):
np_indicator
=
[
True
,
False
,
True
,
False
,
True
,
True
,
False
]
indicator
=
tf
.
constant
(
np_indicator
)
samples_none
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator
,
0
)
with
self
.
test_session
()
as
sess
:
samples_none_out
=
sess
.
run
(
samples_none
)
self
.
assertAllEqual
(
np
.
zeros_like
(
samples_none_out
,
dtype
=
bool
),
samples_none_out
)
def
test_subsample_indicator_when_indicator_all_false
(
self
):
indicator_empty
=
tf
.
zeros
([
0
],
dtype
=
tf
.
bool
)
samples_empty
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator_empty
,
4
)
with
self
.
test_session
()
as
sess
:
samples_empty_out
=
sess
.
run
(
samples_empty
)
self
.
assertEqual
(
0
,
samples_empty_out
.
size
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/model.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Abstract detection model.
This file defines a generic base class for detection models. Programs that are
designed to work with arbitrary detection models should only depend on this
class. We intend for the functions in this class to follow tensor-in/tensor-out
design, thus all functions have tensors or lists/dictionaries holding tensors as
inputs and outputs.
Abstractly, detection models predict output tensors given input images
which can be passed to a loss function at training time or passed to a
postprocessing function at eval time. The computation graphs at a high level
consequently look as follows:
Training time:
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
Evaluation time:
inputs (images tensor) -> preprocess -> predict -> postprocess
-> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
DetectionModels must thus implement four functions (1) preprocess, (2) predict,
(3) postprocess and (4) loss. DetectionModels should make no assumptions about
the input size or aspect ratio --- they are responsible for doing any
resize/reshaping necessary (see docstring for the preprocess function).
Output classes are always integers in the range [0, num_classes). Any mapping
of these integers to semantic labels is to be handled outside of this class.
By default, DetectionModels produce bounding box detections; However, we support
a handful of auxiliary annotations associated with each bounding box, namely,
instance masks and keypoints.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
from
object_detection.core
import
standard_fields
as
fields
class
DetectionModel
(
object
):
"""Abstract base class for detection models."""
__metaclass__
=
ABCMeta
def
__init__
(
self
,
num_classes
):
"""Constructor.
Args:
num_classes: number of classes. Note that num_classes *does not* include
background categories that might be implicitly be predicted in various
implementations.
"""
self
.
_num_classes
=
num_classes
self
.
_groundtruth_lists
=
{}
@
property
def
num_classes
(
self
):
return
self
.
_num_classes
def
groundtruth_lists
(
self
,
field
):
"""Access list of groundtruth tensors.
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints}
Returns:
a list of tensors holding groundtruth information (see also
provide_groundtruth function below), with one entry for each image in the
batch.
Raises:
RuntimeError: if the field has not been provided via provide_groundtruth.
"""
if
field
not
in
self
.
_groundtruth_lists
:
raise
RuntimeError
(
'Groundtruth tensor %s has not been provided'
,
field
)
return
self
.
_groundtruth_lists
[
field
]
@
abstractmethod
def
preprocess
(
self
,
inputs
):
"""Input preprocessing.
To be overridden by implementations.
This function is responsible for any scaling/shifting of input values that
is necessary prior to running the detector on an input image.
It is also responsible for any resizing that might be necessary as images
are assumed to arrive in arbitrary sizes. While this function could
conceivably be part of the predict method (below), it is often convenient
to keep these separate --- for example, we may want to preprocess on one
device, place onto a queue, and let another device (e.g., the GPU) handle
prediction.
A few important notes about the preprocess function:
+ We assume that this operation does not have any trainable variables nor
does it affect the groundtruth annotations in any way (thus data
augmentation operations such as random cropping should be performed
externally).
+ There is no assumption that the batchsize in this function is the same as
the batch size in the predict function. In fact, we recommend calling the
preprocess function prior to calling any batching operations (which should
happen outside of the model) and thus assuming that batch sizes are equal
to 1 in the preprocess function.
+ There is also no explicit assumption that the output resolutions
must be fixed across inputs --- this is to support "fully convolutional"
settings in which input images can have different shapes/resolutions.
Args:
inputs: a [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
"""
pass
@
abstractmethod
def
predict
(
self
,
preprocessed_inputs
):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
Args:
preprocessed_inputs: a [batch, height, width, channels] float32 tensor
representing a batch of images.
Returns:
prediction_dict: a dictionary holding prediction tensors to be
passed to the Loss or Postprocess functions.
"""
pass
@
abstractmethod
def
postprocess
(
self
,
prediction_dict
,
**
params
):
"""Convert predicted output tensors to final detections.
Outputs adhere to the following conventions:
* Classes are integers in [0, num_classes); background classes are removed
and the first non-background class is mapped to 0.
* Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
format and normalized relative to the image window.
* `num_detections` is provided for settings where detections are padded to a
fixed number of boxes.
* We do not specifically assume any kind of probabilistic interpretation
of the scores --- the only important thing is their relative ordering.
Thus implementations of the postprocess function are free to output
logits, probabilities, calibrated probabilities, or anything else.
Args:
prediction_dict: a dictionary holding prediction tensors.
**params: Additional keyword arguments for specific implementations of
DetectionModel.
Returns:
detections: a dictionary containing the following fields
detection_boxes: [batch, max_detections, 4]
detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections]
instance_masks: [batch, max_detections, image_height, image_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
num_detections: [batch]
"""
pass
@
abstractmethod
def
loss
(
self
,
prediction_dict
):
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
provided via the provide_groundtruth function.
Args:
prediction_dict: a dictionary holding predicted tensors
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
loss values.
"""
pass
def
provide_groundtruth
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
=
None
,
groundtruth_keypoints_list
=
None
):
"""Provide groundtruth tensors.
Args:
groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
[num_boxes, 4] containing coordinates of the groundtruth boxes.
Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
format and assumed to be normalized and clipped
relative to the image window with y_min <= y_max and x_min <= x_max.
groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
tensors of shape [num_boxes, num_classes] containing the class targets
with the 0th index assumed to map to the first non-background class.
groundtruth_masks_list: a list of 2-D tf.float32 tensors of
shape [max_detections, height_in, width_in] containing instance
masks with values in {0, 1}. If None, no masks are provided.
Mask resolution `height_in`x`width_in` must agree with the resolution
of the input image tensor provided to the `preprocess` function.
groundtruth_keypoints_list: a list of 2-D tf.float32 tensors of
shape [batch, max_detections, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
"""
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
boxes
]
=
groundtruth_boxes_list
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
classes
]
=
groundtruth_classes_list
if
groundtruth_masks_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
masks
]
=
groundtruth_masks_list
if
groundtruth_keypoints_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
keypoints
]
=
groundtruth_keypoints_list
@
abstractmethod
def
restore_fn
(
self
,
checkpoint_path
,
from_detection_checkpoint
=
True
):
"""Return callable for loading a foreign checkpoint into tensorflow graph.
Loads variables from a different tensorflow graph (typically feature
extractor variables). This enables the model to initialize based on weights
from another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Args:
checkpoint_path: path to checkpoint to restore.
from_detection_checkpoint: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Returns:
a callable which takes a tf.Session as input and loads a checkpoint when
run.
"""
pass
object_detection/core/post_processing.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Post-processing operations on detected boxes."""
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
standard_fields
as
fields
def
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
,
max_total_size
=
0
,
clip_window
=
None
,
change_coordinate_frame
=
False
,
masks
=
None
,
additional_fields
=
None
,
scope
=
None
):
"""Multi-class version of non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
Please note that this operation is performed on *all* classes, therefore any
background classes should be removed prior to calling this function.
Args:
boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
number of classes or 1 depending on whether a separate box is predicted
per class.
scores: A [k, num_classes] float32 tensor containing the scores for each of
the k detections.
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
with previously selected boxes are removed).
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip and normalize boxes to before performing
non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
containing box masks. `q` can be either number of classes or 1 depending
on whether a separate mask is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose first dimensions are all of size `k`. After non-maximum
suppression, all tensors corresponding to the selected boxes will be
added to resulting BoxList.
scope: name scope.
Returns:
a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
contain masks, keypoints, keypoint_heatmaps corresponding to boxes.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
if
not
0
<=
iou_thresh
<=
1.0
:
raise
ValueError
(
'iou_thresh must be between 0 and 1'
)
if
scores
.
shape
.
ndims
!=
2
:
raise
ValueError
(
'scores field must be of rank 2'
)
if
scores
.
shape
[
1
].
value
is
None
:
raise
ValueError
(
'scores must have statically defined second '
'dimension'
)
if
boxes
.
shape
.
ndims
!=
3
:
raise
ValueError
(
'boxes must be of rank 3.'
)
if
not
(
boxes
.
shape
[
1
].
value
==
scores
.
shape
[
1
].
value
or
boxes
.
shape
[
1
].
value
==
1
):
raise
ValueError
(
'second dimension of boxes must be either 1 or equal '
'to the second dimension of scores'
)
if
boxes
.
shape
[
2
].
value
!=
4
:
raise
ValueError
(
'last dimension of boxes must be of size 4.'
)
if
change_coordinate_frame
and
clip_window
is
None
:
raise
ValueError
(
'if change_coordinate_frame is True, then a clip_window'
'must be specified.'
)
with
tf
.
name_scope
(
scope
,
'MultiClassNonMaxSuppression'
):
num_boxes
=
tf
.
shape
(
boxes
)[
0
]
num_scores
=
tf
.
shape
(
scores
)[
0
]
num_classes
=
scores
.
get_shape
()[
1
]
length_assert
=
tf
.
Assert
(
tf
.
equal
(
num_boxes
,
num_scores
),
[
'Incorrect scores field length: actual vs expected.'
,
num_scores
,
num_boxes
])
selected_boxes_list
=
[]
per_class_boxes_list
=
tf
.
unstack
(
boxes
,
axis
=
1
)
if
masks
is
not
None
:
per_class_masks_list
=
tf
.
unstack
(
masks
,
axis
=
1
)
boxes_ids
=
(
range
(
num_classes
)
if
len
(
per_class_boxes_list
)
>
1
else
[
0
]
*
num_classes
)
for
class_idx
,
boxes_idx
in
zip
(
range
(
num_classes
),
boxes_ids
):
per_class_boxes
=
per_class_boxes_list
[
boxes_idx
]
boxlist_and_class_scores
=
box_list
.
BoxList
(
per_class_boxes
)
with
tf
.
control_dependencies
([
length_assert
]):
class_scores
=
tf
.
reshape
(
tf
.
slice
(
scores
,
[
0
,
class_idx
],
tf
.
stack
([
num_scores
,
1
])),
[
-
1
])
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
scores
,
class_scores
)
if
masks
is
not
None
:
per_class_masks
=
per_class_masks_list
[
boxes_idx
]
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
masks
,
per_class_masks
)
if
additional_fields
is
not
None
:
for
key
,
tensor
in
additional_fields
.
items
():
boxlist_and_class_scores
.
add_field
(
key
,
tensor
)
boxlist_filtered
=
box_list_ops
.
filter_greater_than
(
boxlist_and_class_scores
,
score_thresh
)
if
clip_window
is
not
None
:
boxlist_filtered
=
box_list_ops
.
clip_to_window
(
boxlist_filtered
,
clip_window
)
if
change_coordinate_frame
:
boxlist_filtered
=
box_list_ops
.
change_coordinate_frame
(
boxlist_filtered
,
clip_window
)
max_selection_size
=
tf
.
minimum
(
max_size_per_class
,
boxlist_filtered
.
num_boxes
())
selected_indices
=
tf
.
image
.
non_max_suppression
(
boxlist_filtered
.
get
(),
boxlist_filtered
.
get_field
(
fields
.
BoxListFields
.
scores
),
max_selection_size
,
iou_threshold
=
iou_thresh
)
nms_result
=
box_list_ops
.
gather
(
boxlist_filtered
,
selected_indices
)
nms_result
.
add_field
(
fields
.
BoxListFields
.
classes
,
(
tf
.
zeros_like
(
nms_result
.
get_field
(
fields
.
BoxListFields
.
scores
))
+
class_idx
))
selected_boxes_list
.
append
(
nms_result
)
selected_boxes
=
box_list_ops
.
concatenate
(
selected_boxes_list
)
sorted_boxes
=
box_list_ops
.
sort_by_field
(
selected_boxes
,
fields
.
BoxListFields
.
scores
)
if
max_total_size
:
max_total_size
=
tf
.
minimum
(
max_total_size
,
sorted_boxes
.
num_boxes
())
sorted_boxes
=
box_list_ops
.
gather
(
sorted_boxes
,
tf
.
range
(
max_total_size
))
return
sorted_boxes
def
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
,
max_total_size
=
0
,
clip_window
=
None
,
change_coordinate_frame
=
False
,
num_valid_boxes
=
None
,
masks
=
None
,
scope
=
None
):
"""Multi-class version of non maximum suppression that operates on a batch.
This op is similar to `multiclass_non_max_suppression` but operates on a batch
of boxes and scores. See documentation for `multiclass_non_max_suppression`
for details.
Args:
boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
detections. If `q` is 1 then same boxes are used for all classes
otherwise, if `q` is equal to number of classes, class-specific boxes
are used.
scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
the scores for each of the `num_anchors` detections.
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
with previously selected boxes are removed).
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip boxes to before performing non-max
suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
[batch_size] representing the number of valid boxes to be considered
for each image in the batch. This parameter allows for ignoring zero
paddings.
masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
float32 tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
scope: tf scope name.
Returns:
A dictionary containing the following entries:
'detection_boxes': A [batch_size, max_detections, 4] float32 tensor
containing the non-max suppressed boxes.
'detection_scores': A [bath_size, max_detections] float32 tensor containing
the scores for the boxes.
'detection_classes': A [batch_size, max_detections] float32 tensor
containing the class for boxes.
'num_detections': A [batchsize] float32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i] entries in
nms_boxes[i], nms_scores[i] and nms_class[i] are valid. the rest of the
entries are zero paddings.
'detection_masks': (optional) a
[batch_size, max_detections, mask_height, mask_width] float32 tensor
containing masks for each selected box.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
q
=
boxes
.
shape
[
2
].
value
num_classes
=
scores
.
shape
[
2
].
value
if
q
!=
1
and
q
!=
num_classes
:
raise
ValueError
(
'third dimension of boxes must be either 1 or equal '
'to the third dimension of scores'
)
with
tf
.
name_scope
(
scope
,
'BatchMultiClassNonMaxSuppression'
):
per_image_boxes_list
=
tf
.
unstack
(
boxes
)
per_image_scores_list
=
tf
.
unstack
(
scores
)
num_valid_boxes_list
=
len
(
per_image_boxes_list
)
*
[
None
]
per_image_masks_list
=
len
(
per_image_boxes_list
)
*
[
None
]
if
num_valid_boxes
is
not
None
:
num_valid_boxes_list
=
tf
.
unstack
(
num_valid_boxes
)
if
masks
is
not
None
:
per_image_masks_list
=
tf
.
unstack
(
masks
)
detection_boxes_list
=
[]
detection_scores_list
=
[]
detection_classes_list
=
[]
num_detections_list
=
[]
detection_masks_list
=
[]
for
(
per_image_boxes
,
per_image_scores
,
per_image_masks
,
num_valid_boxes
)
in
zip
(
per_image_boxes_list
,
per_image_scores_list
,
per_image_masks_list
,
num_valid_boxes_list
):
if
num_valid_boxes
is
not
None
:
per_image_boxes
=
tf
.
reshape
(
tf
.
slice
(
per_image_boxes
,
3
*
[
0
],
tf
.
stack
([
num_valid_boxes
,
-
1
,
-
1
])),
[
-
1
,
q
,
4
])
per_image_scores
=
tf
.
reshape
(
tf
.
slice
(
per_image_scores
,
[
0
,
0
],
tf
.
stack
([
num_valid_boxes
,
-
1
])),
[
-
1
,
num_classes
])
if
masks
is
not
None
:
per_image_masks
=
tf
.
reshape
(
tf
.
slice
(
per_image_masks
,
4
*
[
0
],
tf
.
stack
([
num_valid_boxes
,
-
1
,
-
1
,
-
1
])),
[
-
1
,
q
,
masks
.
shape
[
3
].
value
,
masks
.
shape
[
4
].
value
])
nmsed_boxlist
=
multiclass_non_max_suppression
(
per_image_boxes
,
per_image_scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
,
max_total_size
,
masks
=
per_image_masks
,
clip_window
=
clip_window
,
change_coordinate_frame
=
change_coordinate_frame
)
num_detections_list
.
append
(
tf
.
to_float
(
nmsed_boxlist
.
num_boxes
()))
padded_boxlist
=
box_list_ops
.
pad_or_clip_box_list
(
nmsed_boxlist
,
max_total_size
)
detection_boxes_list
.
append
(
padded_boxlist
.
get
())
detection_scores_list
.
append
(
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
scores
))
detection_classes_list
.
append
(
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
classes
))
if
masks
is
not
None
:
detection_masks_list
.
append
(
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
masks
))
nms_dict
=
{
'detection_boxes'
:
tf
.
stack
(
detection_boxes_list
),
'detection_scores'
:
tf
.
stack
(
detection_scores_list
),
'detection_classes'
:
tf
.
stack
(
detection_classes_list
),
'num_detections'
:
tf
.
stack
(
num_detections_list
)
}
if
masks
is
not
None
:
nms_dict
[
'detection_masks'
]
=
tf
.
stack
(
detection_masks_list
)
return
nms_dict
object_detection/core/post_processing_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.object_detection.core.post_processing."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
post_processing
from
object_detection.core
import
standard_fields
as
fields
class
MulticlassNonMaxSuppressionTest
(
tf
.
test
.
TestCase
):
def
test_with_invalid_scores_size
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
],
[.
75
],
[.
6
],
[.
95
],
[.
5
]])
iou_thresh
=
.
5
score_thresh
=
0.6
max_output_size
=
3
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesWithPredicateMatch
(
tf
.
errors
.
InvalidArgumentError
,
'Incorrect scores field length'
):
sess
.
run
(
nms
.
get
())
def
test_multiclass_nms_select_with_shared_boxes
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_select_with_shared_boxes_given_keypoints
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
num_keypoints
=
6
keypoints
=
tf
.
tile
(
tf
.
reshape
(
tf
.
range
(
8
),
[
8
,
1
,
1
]),
[
1
,
num_keypoints
,
2
])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
exp_nms_keypoints_tensor
=
tf
.
tile
(
tf
.
reshape
(
tf
.
constant
([
3
,
0
,
6
,
5
],
dtype
=
tf
.
float32
),
[
4
,
1
,
1
]),
[
1
,
num_keypoints
,
2
])
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
additional_fields
=
{
fields
.
BoxListFields
.
keypoints
:
keypoints
})
with
self
.
test_session
()
as
sess
:
(
nms_corners_output
,
nms_scores_output
,
nms_classes_output
,
nms_keypoints
,
exp_nms_keypoints
)
=
sess
.
run
([
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
),
nms
.
get_field
(
fields
.
BoxListFields
.
keypoints
),
exp_nms_keypoints_tensor
])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
self
.
assertAllEqual
(
nms_keypoints
,
exp_nms_keypoints
)
def
test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
num_boxes
=
tf
.
shape
(
boxes
)[
0
]
heatmap_height
=
5
heatmap_width
=
5
num_keypoints
=
17
keypoint_heatmaps
=
tf
.
ones
(
[
num_boxes
,
heatmap_height
,
heatmap_width
,
num_keypoints
],
dtype
=
tf
.
float32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
exp_nms_keypoint_heatmaps
=
np
.
ones
(
(
4
,
heatmap_height
,
heatmap_width
,
num_keypoints
),
dtype
=
np
.
float32
)
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
additional_fields
=
{
fields
.
BoxListFields
.
keypoint_heatmaps
:
keypoint_heatmaps
})
with
self
.
test_session
()
as
sess
:
(
nms_corners_output
,
nms_scores_output
,
nms_classes_output
,
nms_keypoint_heatmaps
)
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
),
nms
.
get_field
(
fields
.
BoxListFields
.
keypoint_heatmaps
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
self
.
assertAllEqual
(
nms_keypoint_heatmaps
,
exp_nms_keypoint_heatmaps
)
def
test_multiclass_nms_with_additional_fields
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
coarse_boxes_key
=
'coarse_boxes'
coarse_boxes
=
tf
.
constant
([[
0.1
,
0.1
,
1.1
,
1.1
],
[
0.1
,
0.2
,
1.1
,
1.2
],
[
0.1
,
-
0.2
,
1.1
,
1.0
],
[
0.1
,
10.1
,
1.1
,
11.1
],
[
0.1
,
10.2
,
1.1
,
11.2
],
[
0.1
,
100.1
,
1.1
,
101.1
],
[
0.1
,
1000.1
,
1.1
,
1002.1
],
[
0.1
,
1000.1
,
1.1
,
1002.2
]],
tf
.
float32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
np
.
array
([[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]],
dtype
=
np
.
float32
)
exp_nms_coarse_corners
=
np
.
array
([[
0.1
,
10.1
,
1.1
,
11.1
],
[
0.1
,
0.1
,
1.1
,
1.1
],
[
0.1
,
1000.1
,
1.1
,
1002.1
],
[
0.1
,
100.1
,
1.1
,
101.1
]],
dtype
=
np
.
float32
)
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
additional_fields
=
{
coarse_boxes_key
:
coarse_boxes
})
with
self
.
test_session
()
as
sess
:
(
nms_corners_output
,
nms_scores_output
,
nms_classes_output
,
nms_coarse_corners
)
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
),
nms
.
get_field
(
coarse_boxes_key
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
self
.
assertAllEqual
(
nms_coarse_corners
,
exp_nms_coarse_corners
)
def
test_multiclass_nms_select_with_shared_boxes_given_masks
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
num_classes
=
2
mask_height
=
3
mask_width
=
3
masks
=
tf
.
tile
(
tf
.
reshape
(
tf
.
range
(
8
),
[
8
,
1
,
1
,
1
]),
[
1
,
num_classes
,
mask_height
,
mask_width
])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
exp_nms_masks_tensor
=
tf
.
tile
(
tf
.
reshape
(
tf
.
constant
([
3
,
0
,
6
,
5
],
dtype
=
tf
.
float32
),
[
4
,
1
,
1
]),
[
1
,
mask_height
,
mask_width
])
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
masks
=
masks
)
with
self
.
test_session
()
as
sess
:
(
nms_corners_output
,
nms_scores_output
,
nms_classes_output
,
nms_masks
,
exp_nms_masks
)
=
sess
.
run
([
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
),
nms
.
get_field
(
fields
.
BoxListFields
.
masks
),
exp_nms_masks_tensor
])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
self
.
assertAllEqual
(
nms_masks
,
exp_nms_masks
)
def
test_multiclass_nms_select_with_clip_window
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
10
,
10
]],
[[
1
,
1
,
11
,
11
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
],
[.
75
]])
clip_window
=
tf
.
constant
([
5
,
4
,
8
,
7
],
tf
.
float32
)
score_thresh
=
0.0
iou_thresh
=
0.5
max_output_size
=
100
exp_nms_corners
=
[[
5
,
4
,
8
,
7
]]
exp_nms_scores
=
[.
9
]
exp_nms_classes
=
[
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
clip_window
=
clip_window
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_select_with_clip_window_change_coordinate_frame
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
10
,
10
]],
[[
1
,
1
,
11
,
11
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
],
[.
75
]])
clip_window
=
tf
.
constant
([
5
,
4
,
8
,
7
],
tf
.
float32
)
score_thresh
=
0.0
iou_thresh
=
0.5
max_output_size
=
100
exp_nms_corners
=
[[
0
,
0
,
1
,
1
]]
exp_nms_scores
=
[.
9
]
exp_nms_classes
=
[
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
clip_window
=
clip_window
,
change_coordinate_frame
=
True
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_select_with_per_class_cap
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_size_per_class
=
2
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
]
exp_nms_classes
=
[
0
,
0
,
1
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_select_with_total_cap
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_size_per_class
=
4
max_total_size
=
2
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
]]
exp_nms_scores
=
[.
95
,
.
9
]
exp_nms_classes
=
[
0
,
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
,
max_total_size
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_threshold_then_select_with_shared_boxes
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
],
[.
75
],
[.
6
],
[.
95
],
[.
5
],
[.
3
],
[.
01
],
[.
01
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
3
exp_nms
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
100
,
1
,
101
]]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms
.
get
())
self
.
assertAllClose
(
nms_output
,
exp_nms
)
def
test_multiclass_nms_select_with_separate_boxes
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
999
,
2
,
1004
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_batch_multiclass_nms_with_batch_size_1
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
999
,
2
,
1004
],
[
0
,
100
,
1
,
101
]]]
exp_nms_scores
=
[[.
95
,
.
9
,
.
85
,
.
3
]]
exp_nms_classes
=
[[
0
,
0
,
1
,
0
]]
nms_dict
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms_dict
)
self
.
assertAllClose
(
nms_output
[
'detection_boxes'
],
exp_nms_corners
)
self
.
assertAllClose
(
nms_output
[
'detection_scores'
],
exp_nms_scores
)
self
.
assertAllClose
(
nms_output
[
'detection_classes'
],
exp_nms_classes
)
self
.
assertEqual
(
nms_output
[
'num_detections'
],
[
4
])
def
test_batch_multiclass_nms_with_batch_size_2
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
999
,
2
,
1004
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
]]]
exp_nms_scores
=
[[.
95
,
.
9
,
0
,
0
],
[.
85
,
.
5
,
.
3
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]]
nms_dict
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms_dict
)
self
.
assertAllClose
(
nms_output
[
'detection_boxes'
],
exp_nms_corners
)
self
.
assertAllClose
(
nms_output
[
'detection_scores'
],
exp_nms_scores
)
self
.
assertAllClose
(
nms_output
[
'detection_classes'
],
exp_nms_classes
)
self
.
assertAllClose
(
nms_output
[
'num_detections'
],
[
2
,
3
])
def
test_batch_multiclass_nms_with_masks
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
masks
=
tf
.
constant
([[[[[
0
,
1
],
[
2
,
3
]],
[[
1
,
2
],
[
3
,
4
]]],
[[[
2
,
3
],
[
4
,
5
]],
[[
3
,
4
],
[
5
,
6
]]],
[[[
4
,
5
],
[
6
,
7
]],
[[
5
,
6
],
[
7
,
8
]]],
[[[
6
,
7
],
[
8
,
9
]],
[[
7
,
8
],
[
9
,
10
]]]],
[[[[
8
,
9
],
[
10
,
11
]],
[[
9
,
10
],
[
11
,
12
]]],
[[[
10
,
11
],
[
12
,
13
]],
[[
11
,
12
],
[
13
,
14
]]],
[[[
12
,
13
],
[
14
,
15
]],
[[
13
,
14
],
[
15
,
16
]]],
[[[
14
,
15
],
[
16
,
17
]],
[[
15
,
16
],
[
17
,
18
]]]]],
tf
.
float32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
999
,
2
,
1004
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
]]]
exp_nms_scores
=
[[.
95
,
.
9
,
0
,
0
],
[.
85
,
.
5
,
.
3
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]]
exp_nms_masks
=
[[[[
6
,
7
],
[
8
,
9
]],
[[
0
,
1
],
[
2
,
3
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
13
,
14
],
[
15
,
16
]],
[[
8
,
9
],
[
10
,
11
]],
[[
10
,
11
],
[
12
,
13
]],
[[
0
,
0
],
[
0
,
0
]]]]
nms_dict
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
masks
=
masks
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms_dict
)
self
.
assertAllClose
(
nms_output
[
'detection_boxes'
],
exp_nms_corners
)
self
.
assertAllClose
(
nms_output
[
'detection_scores'
],
exp_nms_scores
)
self
.
assertAllClose
(
nms_output
[
'detection_classes'
],
exp_nms_classes
)
self
.
assertAllClose
(
nms_output
[
'num_detections'
],
[
2
,
3
])
self
.
assertAllClose
(
nms_output
[
'detection_masks'
],
exp_nms_masks
)
def
test_batch_multiclass_nms_with_masks_and_num_valid_boxes
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
masks
=
tf
.
constant
([[[[[
0
,
1
],
[
2
,
3
]],
[[
1
,
2
],
[
3
,
4
]]],
[[[
2
,
3
],
[
4
,
5
]],
[[
3
,
4
],
[
5
,
6
]]],
[[[
4
,
5
],
[
6
,
7
]],
[[
5
,
6
],
[
7
,
8
]]],
[[[
6
,
7
],
[
8
,
9
]],
[[
7
,
8
],
[
9
,
10
]]]],
[[[[
8
,
9
],
[
10
,
11
]],
[[
9
,
10
],
[
11
,
12
]]],
[[[
10
,
11
],
[
12
,
13
]],
[[
11
,
12
],
[
13
,
14
]]],
[[[
12
,
13
],
[
14
,
15
]],
[[
13
,
14
],
[
15
,
16
]]],
[[[
14
,
15
],
[
16
,
17
]],
[[
15
,
16
],
[
17
,
18
]]]]],
tf
.
float32
)
num_valid_boxes
=
tf
.
constant
([
1
,
1
],
tf
.
int32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]]
exp_nms_scores
=
[[.
9
,
0
,
0
,
0
],
[.
5
,
0
,
0
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
exp_nms_masks
=
[[[[
0
,
1
],
[
2
,
3
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
8
,
9
],
[
10
,
11
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]]]
nms_dict
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
num_valid_boxes
=
num_valid_boxes
,
masks
=
masks
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms_dict
)
self
.
assertAllClose
(
nms_output
[
'detection_boxes'
],
exp_nms_corners
)
self
.
assertAllClose
(
nms_output
[
'detection_scores'
],
exp_nms_scores
)
self
.
assertAllClose
(
nms_output
[
'detection_classes'
],
exp_nms_classes
)
self
.
assertAllClose
(
nms_output
[
'num_detections'
],
[
1
,
1
])
self
.
assertAllClose
(
nms_output
[
'detection_masks'
],
exp_nms_masks
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/prefetcher.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides functions to prefetch tensors to feed into models."""
import
tensorflow
as
tf
def
prefetch
(
tensor_dict
,
capacity
):
"""Creates a prefetch queue for tensors.
Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
dequeue op that evaluates to a tensor_dict. This function is useful in
prefetching preprocessed tensors so that the data is readily available for
consumers.
Example input pipeline when you don't need batching:
----------------------------------------------------
key, string_tensor = slim.parallel_reader.parallel_read(...)
tensor_dict = decoder.decode(string_tensor)
tensor_dict = preprocessor.preprocess(tensor_dict, ...)
prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
tensor_dict = prefetch_queue.dequeue()
outputs = Model(tensor_dict)
...
----------------------------------------------------
For input pipelines with batching, refer to core/batcher.py
Args:
tensor_dict: a dictionary of tensors to prefetch.
capacity: the size of the prefetch queue.
Returns:
a FIFO prefetcher queue
"""
names
=
list
(
tensor_dict
.
keys
())
dtypes
=
[
t
.
dtype
for
t
in
tensor_dict
.
values
()]
shapes
=
[
t
.
get_shape
()
for
t
in
tensor_dict
.
values
()]
prefetch_queue
=
tf
.
PaddingFIFOQueue
(
capacity
,
dtypes
=
dtypes
,
shapes
=
shapes
,
names
=
names
,
name
=
'prefetch_queue'
)
enqueue_op
=
prefetch_queue
.
enqueue
(
tensor_dict
)
tf
.
train
.
queue_runner
.
add_queue_runner
(
tf
.
train
.
queue_runner
.
QueueRunner
(
prefetch_queue
,
[
enqueue_op
]))
tf
.
summary
.
scalar
(
'queue/%s/fraction_of_%d_full'
%
(
prefetch_queue
.
name
,
capacity
),
tf
.
to_float
(
prefetch_queue
.
size
())
*
(
1.
/
capacity
))
return
prefetch_queue
object_detection/core/prefetcher_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.prefetcher."""
import
tensorflow
as
tf
from
object_detection.core
import
prefetcher
slim
=
tf
.
contrib
.
slim
class
PrefetcherTest
(
tf
.
test
.
TestCase
):
def
test_prefetch_tensors_with_fully_defined_shapes
(
self
):
with
self
.
test_session
()
as
sess
:
batch_size
=
10
image_size
=
32
num_batches
=
5
examples
=
tf
.
Variable
(
tf
.
constant
(
0
,
dtype
=
tf
.
int64
))
counter
=
examples
.
count_up_to
(
num_batches
)
image
=
tf
.
random_normal
([
batch_size
,
image_size
,
image_size
,
3
],
dtype
=
tf
.
float32
,
name
=
'images'
)
label
=
tf
.
random_uniform
([
batch_size
,
1
],
0
,
10
,
dtype
=
tf
.
int32
,
name
=
'labels'
)
prefetch_queue
=
prefetcher
.
prefetch
(
tensor_dict
=
{
'counter'
:
counter
,
'image'
:
image
,
'label'
:
label
},
capacity
=
100
)
tensor_dict
=
prefetch_queue
.
dequeue
()
self
.
assertAllEqual
(
tensor_dict
[
'image'
].
get_shape
().
as_list
(),
[
batch_size
,
image_size
,
image_size
,
3
])
self
.
assertAllEqual
(
tensor_dict
[
'label'
].
get_shape
().
as_list
(),
[
batch_size
,
1
])
tf
.
initialize_all_variables
().
run
()
with
slim
.
queues
.
QueueRunners
(
sess
):
for
_
in
range
(
num_batches
):
results
=
sess
.
run
(
tensor_dict
)
self
.
assertEquals
(
results
[
'image'
].
shape
,
(
batch_size
,
image_size
,
image_size
,
3
))
self
.
assertEquals
(
results
[
'label'
].
shape
,
(
batch_size
,
1
))
with
self
.
assertRaises
(
tf
.
errors
.
OutOfRangeError
):
sess
.
run
(
tensor_dict
)
def
test_prefetch_tensors_with_partially_defined_shapes
(
self
):
with
self
.
test_session
()
as
sess
:
batch_size
=
10
image_size
=
32
num_batches
=
5
examples
=
tf
.
Variable
(
tf
.
constant
(
0
,
dtype
=
tf
.
int64
))
counter
=
examples
.
count_up_to
(
num_batches
)
image
=
tf
.
random_normal
([
batch_size
,
tf
.
Variable
(
image_size
),
tf
.
Variable
(
image_size
),
3
],
dtype
=
tf
.
float32
,
name
=
'image'
)
image
.
set_shape
([
batch_size
,
None
,
None
,
3
])
label
=
tf
.
random_uniform
([
batch_size
,
tf
.
Variable
(
1
)],
0
,
10
,
dtype
=
tf
.
int32
,
name
=
'label'
)
label
.
set_shape
([
batch_size
,
None
])
prefetch_queue
=
prefetcher
.
prefetch
(
tensor_dict
=
{
'counter'
:
counter
,
'image'
:
image
,
'label'
:
label
},
capacity
=
100
)
tensor_dict
=
prefetch_queue
.
dequeue
()
self
.
assertAllEqual
(
tensor_dict
[
'image'
].
get_shape
().
as_list
(),
[
batch_size
,
None
,
None
,
3
])
self
.
assertAllEqual
(
tensor_dict
[
'label'
].
get_shape
().
as_list
(),
[
batch_size
,
None
])
tf
.
initialize_all_variables
().
run
()
with
slim
.
queues
.
QueueRunners
(
sess
):
for
_
in
range
(
num_batches
):
results
=
sess
.
run
(
tensor_dict
)
self
.
assertEquals
(
results
[
'image'
].
shape
,
(
batch_size
,
image_size
,
image_size
,
3
))
self
.
assertEquals
(
results
[
'label'
].
shape
,
(
batch_size
,
1
))
with
self
.
assertRaises
(
tf
.
errors
.
OutOfRangeError
):
sess
.
run
(
tensor_dict
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
…
4
5
6
7
8
9
10
11
12
…
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment