Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
f282f6ef
Commit
f282f6ef
authored
Jul 05, 2017
by
Alexander Gorban
Browse files
Merge branch 'master' of github.com:tensorflow/models
parents
58a5da7b
a2970b03
Changes
302
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4705 additions
and
0 deletions
+4705
-0
object_detection/utils/metrics.py
object_detection/utils/metrics.py
+145
-0
object_detection/utils/metrics_test.py
object_detection/utils/metrics_test.py
+79
-0
object_detection/utils/np_box_list.py
object_detection/utils/np_box_list.py
+134
-0
object_detection/utils/np_box_list_ops.py
object_detection/utils/np_box_list_ops.py
+555
-0
object_detection/utils/np_box_list_ops_test.py
object_detection/utils/np_box_list_ops_test.py
+414
-0
object_detection/utils/np_box_list_test.py
object_detection/utils/np_box_list_test.py
+135
-0
object_detection/utils/np_box_ops.py
object_detection/utils/np_box_ops.py
+97
-0
object_detection/utils/np_box_ops_test.py
object_detection/utils/np_box_ops_test.py
+68
-0
object_detection/utils/object_detection_evaluation.py
object_detection/utils/object_detection_evaluation.py
+233
-0
object_detection/utils/object_detection_evaluation_test.py
object_detection/utils/object_detection_evaluation_test.py
+125
-0
object_detection/utils/ops.py
object_detection/utils/ops.py
+651
-0
object_detection/utils/ops_test.py
object_detection/utils/ops_test.py
+1033
-0
object_detection/utils/per_image_evaluation.py
object_detection/utils/per_image_evaluation.py
+260
-0
object_detection/utils/per_image_evaluation_test.py
object_detection/utils/per_image_evaluation_test.py
+212
-0
object_detection/utils/shape_utils.py
object_detection/utils/shape_utils.py
+113
-0
object_detection/utils/shape_utils_test.py
object_detection/utils/shape_utils_test.py
+120
-0
object_detection/utils/static_shape.py
object_detection/utils/static_shape.py
+71
-0
object_detection/utils/static_shape_test.py
object_detection/utils/static_shape_test.py
+50
-0
object_detection/utils/test_utils.py
object_detection/utils/test_utils.py
+137
-0
object_detection/utils/test_utils_test.py
object_detection/utils/test_utils_test.py
+73
-0
No files found.
Too many changes to show.
To preserve performance only
302 of 302+
files are displayed.
Plain diff
Email patch
object_detection/utils/metrics.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Functions for computing metrics like precision, recall, CorLoc and etc."""
from
__future__
import
division
import
numpy
as
np
from
six
import
moves
def
compute_precision_recall
(
scores
,
labels
,
num_gt
):
"""Compute precision and recall.
Args:
scores: A float numpy array representing detection score
labels: A boolean numpy array representing true/false positive labels
num_gt: Number of ground truth instances
Raises:
ValueError: if the input is not of the correct format
Returns:
precision: Fraction of positive instances over detected ones. This value is
None if no ground truth labels are present.
recall: Fraction of detected positive instance over all positive instances.
This value is None if no ground truth labels are present.
"""
if
not
isinstance
(
labels
,
np
.
ndarray
)
or
labels
.
dtype
!=
np
.
bool
or
len
(
labels
.
shape
)
!=
1
:
raise
ValueError
(
"labels must be single dimension bool numpy array"
)
if
not
isinstance
(
scores
,
np
.
ndarray
)
or
len
(
scores
.
shape
)
!=
1
:
raise
ValueError
(
"scores must be single dimension numpy array"
)
if
num_gt
<
np
.
sum
(
labels
):
raise
ValueError
(
"Number of true positives must be smaller than num_gt."
)
if
len
(
scores
)
!=
len
(
labels
):
raise
ValueError
(
"scores and labels must be of the same size."
)
if
num_gt
==
0
:
return
None
,
None
sorted_indices
=
np
.
argsort
(
scores
)
sorted_indices
=
sorted_indices
[::
-
1
]
labels
=
labels
.
astype
(
int
)
true_positive_labels
=
labels
[
sorted_indices
]
false_positive_labels
=
1
-
true_positive_labels
cum_true_positives
=
np
.
cumsum
(
true_positive_labels
)
cum_false_positives
=
np
.
cumsum
(
false_positive_labels
)
precision
=
cum_true_positives
.
astype
(
float
)
/
(
cum_true_positives
+
cum_false_positives
)
recall
=
cum_true_positives
.
astype
(
float
)
/
num_gt
return
precision
,
recall
def
compute_average_precision
(
precision
,
recall
):
"""Compute Average Precision according to the definition in VOCdevkit.
Precision is modified to ensure that it does not decrease as recall
decrease.
Args:
precision: A float [N, 1] numpy array of precisions
recall: A float [N, 1] numpy array of recalls
Raises:
ValueError: if the input is not of the correct format
Returns:
average_precison: The area under the precision recall curve. NaN if
precision and recall are None.
"""
if
precision
is
None
:
if
recall
is
not
None
:
raise
ValueError
(
"If precision is None, recall must also be None"
)
return
np
.
NAN
if
not
isinstance
(
precision
,
np
.
ndarray
)
or
not
isinstance
(
recall
,
np
.
ndarray
):
raise
ValueError
(
"precision and recall must be numpy array"
)
if
precision
.
dtype
!=
np
.
float
or
recall
.
dtype
!=
np
.
float
:
raise
ValueError
(
"input must be float numpy array."
)
if
len
(
precision
)
!=
len
(
recall
):
raise
ValueError
(
"precision and recall must be of the same size."
)
if
not
precision
.
size
:
return
0.0
if
np
.
amin
(
precision
)
<
0
or
np
.
amax
(
precision
)
>
1
:
raise
ValueError
(
"Precision must be in the range of [0, 1]."
)
if
np
.
amin
(
recall
)
<
0
or
np
.
amax
(
recall
)
>
1
:
raise
ValueError
(
"recall must be in the range of [0, 1]."
)
if
not
all
(
recall
[
i
]
<=
recall
[
i
+
1
]
for
i
in
moves
.
range
(
len
(
recall
)
-
1
)):
raise
ValueError
(
"recall must be a non-decreasing array"
)
recall
=
np
.
concatenate
([[
0
],
recall
,
[
1
]])
precision
=
np
.
concatenate
([[
0
],
precision
,
[
0
]])
# Preprocess precision to be a non-decreasing array
for
i
in
range
(
len
(
precision
)
-
2
,
-
1
,
-
1
):
precision
[
i
]
=
np
.
maximum
(
precision
[
i
],
precision
[
i
+
1
])
indices
=
np
.
where
(
recall
[
1
:]
!=
recall
[:
-
1
])[
0
]
+
1
average_precision
=
np
.
sum
(
(
recall
[
indices
]
-
recall
[
indices
-
1
])
*
precision
[
indices
])
return
average_precision
def
compute_cor_loc
(
num_gt_imgs_per_class
,
num_images_correctly_detected_per_class
):
"""Compute CorLoc according to the definition in the following paper.
https://www.robots.ox.ac.uk/~vgg/rg/papers/deselaers-eccv10.pdf
Returns nans if there are no ground truth images for a class.
Args:
num_gt_imgs_per_class: 1D array, representing number of images containing
at least one object instance of a particular class
num_images_correctly_detected_per_class: 1D array, representing number of
images that are correctly detected at least one object instance of a
particular class
Returns:
corloc_per_class: A float numpy array represents the corloc score of each
class
"""
return
np
.
where
(
num_gt_imgs_per_class
==
0
,
np
.
nan
,
num_images_correctly_detected_per_class
/
num_gt_imgs_per_class
)
object_detection/utils/metrics_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.metrics."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.utils
import
metrics
class
MetricsTest
(
tf
.
test
.
TestCase
):
def
test_compute_cor_loc
(
self
):
num_gt_imgs_per_class
=
np
.
array
([
100
,
1
,
5
,
1
,
1
],
dtype
=
int
)
num_images_correctly_detected_per_class
=
np
.
array
([
10
,
0
,
1
,
0
,
0
],
dtype
=
int
)
corloc
=
metrics
.
compute_cor_loc
(
num_gt_imgs_per_class
,
num_images_correctly_detected_per_class
)
expected_corloc
=
np
.
array
([
0.1
,
0
,
0.2
,
0
,
0
],
dtype
=
float
)
self
.
assertTrue
(
np
.
allclose
(
corloc
,
expected_corloc
))
def
test_compute_cor_loc_nans
(
self
):
num_gt_imgs_per_class
=
np
.
array
([
100
,
0
,
0
,
1
,
1
],
dtype
=
int
)
num_images_correctly_detected_per_class
=
np
.
array
([
10
,
0
,
1
,
0
,
0
],
dtype
=
int
)
corloc
=
metrics
.
compute_cor_loc
(
num_gt_imgs_per_class
,
num_images_correctly_detected_per_class
)
expected_corloc
=
np
.
array
([
0.1
,
np
.
nan
,
np
.
nan
,
0
,
0
],
dtype
=
float
)
self
.
assertAllClose
(
corloc
,
expected_corloc
)
def
test_compute_precision_recall
(
self
):
num_gt
=
10
scores
=
np
.
array
([
0.4
,
0.3
,
0.6
,
0.2
,
0.7
,
0.1
],
dtype
=
float
)
labels
=
np
.
array
([
0
,
1
,
1
,
0
,
0
,
1
],
dtype
=
bool
)
accumulated_tp_count
=
np
.
array
([
0
,
1
,
1
,
2
,
2
,
3
],
dtype
=
float
)
expected_precision
=
accumulated_tp_count
/
np
.
array
([
1
,
2
,
3
,
4
,
5
,
6
])
expected_recall
=
accumulated_tp_count
/
num_gt
precision
,
recall
=
metrics
.
compute_precision_recall
(
scores
,
labels
,
num_gt
)
self
.
assertAllClose
(
precision
,
expected_precision
)
self
.
assertAllClose
(
recall
,
expected_recall
)
def
test_compute_average_precision
(
self
):
precision
=
np
.
array
([
0.8
,
0.76
,
0.9
,
0.65
,
0.7
,
0.5
,
0.55
,
0
],
dtype
=
float
)
recall
=
np
.
array
([
0.3
,
0.3
,
0.4
,
0.4
,
0.45
,
0.45
,
0.5
,
0.5
],
dtype
=
float
)
processed_precision
=
np
.
array
([
0.9
,
0.9
,
0.9
,
0.7
,
0.7
,
0.55
,
0.55
,
0
],
dtype
=
float
)
recall_interval
=
np
.
array
([
0.3
,
0
,
0.1
,
0
,
0.05
,
0
,
0.05
,
0
],
dtype
=
float
)
expected_mean_ap
=
np
.
sum
(
recall_interval
*
processed_precision
)
mean_ap
=
metrics
.
compute_average_precision
(
precision
,
recall
)
self
.
assertAlmostEqual
(
expected_mean_ap
,
mean_ap
)
def
test_compute_precision_recall_and_ap_no_groundtruth
(
self
):
num_gt
=
0
scores
=
np
.
array
([
0.4
,
0.3
,
0.6
,
0.2
,
0.7
,
0.1
],
dtype
=
float
)
labels
=
np
.
array
([
0
,
0
,
0
,
0
,
0
,
0
],
dtype
=
bool
)
expected_precision
=
None
expected_recall
=
None
precision
,
recall
=
metrics
.
compute_precision_recall
(
scores
,
labels
,
num_gt
)
self
.
assertEquals
(
precision
,
expected_precision
)
self
.
assertEquals
(
recall
,
expected_recall
)
ap
=
metrics
.
compute_average_precision
(
precision
,
recall
)
self
.
assertTrue
(
np
.
isnan
(
ap
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/utils/np_box_list.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Numpy BoxList classes and functions."""
import
numpy
as
np
from
six
import
moves
class
BoxList
(
object
):
"""Box collection.
BoxList represents a list of bounding boxes as numpy array, where each
bounding box is represented as a row of 4 numbers,
[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes within a
given list correspond to a single image.
Optionally, users can add additional related fields (such as
objectness/classification scores).
"""
def
__init__
(
self
,
data
):
"""Constructs box collection.
Args:
data: a numpy array of shape [N, 4] representing box coordinates
Raises:
ValueError: if bbox data is not a numpy array
ValueError: if invalid dimensions for bbox data
"""
if
not
isinstance
(
data
,
np
.
ndarray
):
raise
ValueError
(
'data must be a numpy array.'
)
if
len
(
data
.
shape
)
!=
2
or
data
.
shape
[
1
]
!=
4
:
raise
ValueError
(
'Invalid dimensions for box data.'
)
if
data
.
dtype
!=
np
.
float32
and
data
.
dtype
!=
np
.
float64
:
raise
ValueError
(
'Invalid data type for box data: float is required.'
)
if
not
self
.
_is_valid_boxes
(
data
):
raise
ValueError
(
'Invalid box data. data must be a numpy array of '
'N*[y_min, x_min, y_max, x_max]'
)
self
.
data
=
{
'boxes'
:
data
}
def
num_boxes
(
self
):
"""Return number of boxes held in collections."""
return
self
.
data
[
'boxes'
].
shape
[
0
]
def
get_extra_fields
(
self
):
"""Return all non-box fields."""
return
[
k
for
k
in
self
.
data
.
keys
()
if
k
!=
'boxes'
]
def
has_field
(
self
,
field
):
return
field
in
self
.
data
def
add_field
(
self
,
field
,
field_data
):
"""Add data to a specified field.
Args:
field: a string parameter used to speficy a related field to be accessed.
field_data: a numpy array of [N, ...] representing the data associated
with the field.
Raises:
ValueError: if the field is already exist or the dimension of the field
data does not matches the number of boxes.
"""
if
self
.
has_field
(
field
):
raise
ValueError
(
'Field '
+
field
+
'already exists'
)
if
len
(
field_data
.
shape
)
<
1
or
field_data
.
shape
[
0
]
!=
self
.
num_boxes
():
raise
ValueError
(
'Invalid dimensions for field data'
)
self
.
data
[
field
]
=
field_data
def
get
(
self
):
"""Convenience function for accesssing box coordinates.
Returns:
a numpy array of shape [N, 4] representing box corners
"""
return
self
.
get_field
(
'boxes'
)
def
get_field
(
self
,
field
):
"""Accesses data associated with the specified field in the box collection.
Args:
field: a string parameter used to speficy a related field to be accessed.
Returns:
a numpy 1-d array representing data of an associated field
Raises:
ValueError: if invalid field
"""
if
not
self
.
has_field
(
field
):
raise
ValueError
(
'field {} does not exist'
.
format
(
field
))
return
self
.
data
[
field
]
def
get_coordinates
(
self
):
"""Get corner coordinates of boxes.
Returns:
a list of 4 1-d numpy arrays [y_min, x_min, y_max, x_max]
"""
box_coordinates
=
self
.
get
()
y_min
=
box_coordinates
[:,
0
]
x_min
=
box_coordinates
[:,
1
]
y_max
=
box_coordinates
[:,
2
]
x_max
=
box_coordinates
[:,
3
]
return
[
y_min
,
x_min
,
y_max
,
x_max
]
def
_is_valid_boxes
(
self
,
data
):
"""Check whether data fullfills the format of N*[ymin, xmin, ymax, xmin].
Args:
data: a numpy array of shape [N, 4] representing box coordinates
Returns:
a boolean indicating whether all ymax of boxes are equal or greater than
ymin, and all xmax of boxes are equal or greater than xmin.
"""
if
data
.
shape
[
0
]
>
0
:
for
i
in
moves
.
range
(
data
.
shape
[
0
]):
if
data
[
i
,
0
]
>
data
[
i
,
2
]
or
data
[
i
,
1
]
>
data
[
i
,
3
]:
return
False
return
True
object_detection/utils/np_box_list_ops.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List operations for Numpy BoxLists.
Example box operations that are supported:
* Areas: compute bounding box areas
* IOU: pairwise intersection-over-union scores
"""
import
numpy
as
np
from
object_detection.utils
import
np_box_list
from
object_detection.utils
import
np_box_ops
class
SortOrder
(
object
):
"""Enum class for sort order.
Attributes:
ascend: ascend order.
descend: descend order.
"""
ASCEND
=
1
DESCEND
=
2
def
area
(
boxlist
):
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
Returns:
a numpy array with shape [N*1] representing box areas
"""
y_min
,
x_min
,
y_max
,
x_max
=
boxlist
.
get_coordinates
()
return
(
y_max
-
y_min
)
*
(
x_max
-
x_min
)
def
intersection
(
boxlist1
,
boxlist2
):
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
return
np_box_ops
.
intersection
(
boxlist1
.
get
(),
boxlist2
.
get
())
def
iou
(
boxlist1
,
boxlist2
):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
return
np_box_ops
.
iou
(
boxlist1
.
get
(),
boxlist2
.
get
())
def
ioa
(
boxlist1
,
boxlist2
):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
return
np_box_ops
.
ioa
(
boxlist1
.
get
(),
boxlist2
.
get
())
def
gather
(
boxlist
,
indices
,
fields
=
None
):
"""Gather boxes from BoxList according to indices and return new BoxList.
By default, Gather returns boxes corresponding to the input index list, as
well as all additional fields stored in the boxlist (indexing into the
first dimension). However one can optionally only gather from a
subset of fields.
Args:
boxlist: BoxList holding N boxes
indices: a 1-d numpy array of type int_
fields: (optional) list of fields to also gather from. If None (default),
all fields are gathered from. Pass an empty fields list to only gather
the box coordinates.
Returns:
subboxlist: a BoxList corresponding to the subset of the input BoxList
specified by indices
Raises:
ValueError: if specified field is not contained in boxlist or if the
indices are not of type int_
"""
if
indices
.
size
:
if
np
.
amax
(
indices
)
>=
boxlist
.
num_boxes
()
or
np
.
amin
(
indices
)
<
0
:
raise
ValueError
(
'indices are out of valid range.'
)
subboxlist
=
np_box_list
.
BoxList
(
boxlist
.
get
()[
indices
,
:])
if
fields
is
None
:
fields
=
boxlist
.
get_extra_fields
()
for
field
in
fields
:
extra_field_data
=
boxlist
.
get_field
(
field
)
subboxlist
.
add_field
(
field
,
extra_field_data
[
indices
,
...])
return
subboxlist
def
sort_by_field
(
boxlist
,
field
,
order
=
SortOrder
.
DESCEND
):
"""Sort boxes and associated fields according to a scalar field.
A common use case is reordering the boxes according to descending scores.
Args:
boxlist: BoxList holding N boxes.
field: A BoxList field for sorting and reordering the BoxList.
order: (Optional) 'descend' or 'ascend'. Default is descend.
Returns:
sorted_boxlist: A sorted BoxList with the field in the specified order.
Raises:
ValueError: if specified field does not exist or is not of single dimension.
ValueError: if the order is not either descend or ascend.
"""
if
not
boxlist
.
has_field
(
field
):
raise
ValueError
(
'Field '
+
field
+
' does not exist'
)
if
len
(
boxlist
.
get_field
(
field
).
shape
)
!=
1
:
raise
ValueError
(
'Field '
+
field
+
'should be single dimension.'
)
if
order
!=
SortOrder
.
DESCEND
and
order
!=
SortOrder
.
ASCEND
:
raise
ValueError
(
'Invalid sort order'
)
field_to_sort
=
boxlist
.
get_field
(
field
)
sorted_indices
=
np
.
argsort
(
field_to_sort
)
if
order
==
SortOrder
.
DESCEND
:
sorted_indices
=
sorted_indices
[::
-
1
]
return
gather
(
boxlist
,
sorted_indices
)
def
non_max_suppression
(
boxlist
,
max_output_size
=
10000
,
iou_threshold
=
1.0
,
score_threshold
=-
10.0
):
"""Non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. In each iteration, the detected bounding box with
highest score in the available pool is selected.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores. All scores belong to the same class.
max_output_size: maximum number of retained boxes
iou_threshold: intersection over union threshold.
score_threshold: minimum score threshold. Remove the boxes with scores
less than this value. Default value is set to -10. A very
low threshold to pass pretty much all the boxes, unless
the user sets a different score threshold.
Returns:
a BoxList holding M boxes where M <= max_output_size
Raises:
ValueError: if 'scores' field does not exist
ValueError: if threshold is not in [0, 1]
ValueError: if max_output_size < 0
"""
if
not
boxlist
.
has_field
(
'scores'
):
raise
ValueError
(
'Field scores does not exist'
)
if
iou_threshold
<
0.
or
iou_threshold
>
1.0
:
raise
ValueError
(
'IOU threshold must be in [0, 1]'
)
if
max_output_size
<
0
:
raise
ValueError
(
'max_output_size must be bigger than 0.'
)
boxlist
=
filter_scores_greater_than
(
boxlist
,
score_threshold
)
if
boxlist
.
num_boxes
()
==
0
:
return
boxlist
boxlist
=
sort_by_field
(
boxlist
,
'scores'
)
# Prevent further computation if NMS is disabled.
if
iou_threshold
==
1.0
:
if
boxlist
.
num_boxes
()
>
max_output_size
:
selected_indices
=
np
.
arange
(
max_output_size
)
return
gather
(
boxlist
,
selected_indices
)
else
:
return
boxlist
boxes
=
boxlist
.
get
()
num_boxes
=
boxlist
.
num_boxes
()
# is_index_valid is True only for all remaining valid boxes,
is_index_valid
=
np
.
full
(
num_boxes
,
1
,
dtype
=
bool
)
selected_indices
=
[]
num_output
=
0
for
i
in
xrange
(
num_boxes
):
if
num_output
<
max_output_size
:
if
is_index_valid
[
i
]:
num_output
+=
1
selected_indices
.
append
(
i
)
is_index_valid
[
i
]
=
False
valid_indices
=
np
.
where
(
is_index_valid
)[
0
]
if
valid_indices
.
size
==
0
:
break
intersect_over_union
=
np_box_ops
.
iou
(
np
.
expand_dims
(
boxes
[
i
,
:],
axis
=
0
),
boxes
[
valid_indices
,
:])
intersect_over_union
=
np
.
squeeze
(
intersect_over_union
,
axis
=
0
)
is_index_valid
[
valid_indices
]
=
np
.
logical_and
(
is_index_valid
[
valid_indices
],
intersect_over_union
<=
iou_threshold
)
return
gather
(
boxlist
,
np
.
array
(
selected_indices
))
def
multi_class_non_max_suppression
(
boxlist
,
score_thresh
,
iou_thresh
,
max_output_size
):
"""Multi-class version of non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores. This scores field is a tensor that can
be 1 dimensional (in the case of a single class) or 2-dimensional, which
which case we assume that it takes the shape [num_boxes, num_classes].
We further assume that this rank is known statically and that
scores.shape[1] is also known (i.e., the number of classes is fixed
and known at graph construction time).
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (boxes that that high IOU overlap
with previously selected boxes are removed).
max_output_size: maximum number of retained boxes per class.
Returns:
a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
if
not
0
<=
iou_thresh
<=
1.0
:
raise
ValueError
(
'thresh must be between 0 and 1'
)
if
not
isinstance
(
boxlist
,
np_box_list
.
BoxList
):
raise
ValueError
(
'boxlist must be a BoxList'
)
if
not
boxlist
.
has_field
(
'scores'
):
raise
ValueError
(
'input boxlist must have
\'
scores
\'
field'
)
scores
=
boxlist
.
get_field
(
'scores'
)
if
len
(
scores
.
shape
)
==
1
:
scores
=
np
.
reshape
(
scores
,
[
-
1
,
1
])
elif
len
(
scores
.
shape
)
==
2
:
if
scores
.
shape
[
1
]
is
None
:
raise
ValueError
(
'scores field must have statically defined second '
'dimension'
)
else
:
raise
ValueError
(
'scores field must be of rank 1 or 2'
)
num_boxes
=
boxlist
.
num_boxes
()
num_scores
=
scores
.
shape
[
0
]
num_classes
=
scores
.
shape
[
1
]
if
num_boxes
!=
num_scores
:
raise
ValueError
(
'Incorrect scores field length: actual vs expected.'
)
selected_boxes_list
=
[]
for
class_idx
in
range
(
num_classes
):
boxlist_and_class_scores
=
np_box_list
.
BoxList
(
boxlist
.
get
())
class_scores
=
np
.
reshape
(
scores
[
0
:
num_scores
,
class_idx
],
[
-
1
])
boxlist_and_class_scores
.
add_field
(
'scores'
,
class_scores
)
boxlist_filt
=
filter_scores_greater_than
(
boxlist_and_class_scores
,
score_thresh
)
nms_result
=
non_max_suppression
(
boxlist_filt
,
max_output_size
=
max_output_size
,
iou_threshold
=
iou_thresh
,
score_threshold
=
score_thresh
)
nms_result
.
add_field
(
'classes'
,
np
.
zeros_like
(
nms_result
.
get_field
(
'scores'
))
+
class_idx
)
selected_boxes_list
.
append
(
nms_result
)
selected_boxes
=
concatenate
(
selected_boxes_list
)
sorted_boxes
=
sort_by_field
(
selected_boxes
,
'scores'
)
return
sorted_boxes
def
scale
(
boxlist
,
y_scale
,
x_scale
):
"""Scale box coordinates in x and y dimensions.
Args:
boxlist: BoxList holding N boxes
y_scale: float
x_scale: float
Returns:
boxlist: BoxList holding N boxes
"""
y_min
,
x_min
,
y_max
,
x_max
=
np
.
array_split
(
boxlist
.
get
(),
4
,
axis
=
1
)
y_min
=
y_scale
*
y_min
y_max
=
y_scale
*
y_max
x_min
=
x_scale
*
x_min
x_max
=
x_scale
*
x_max
scaled_boxlist
=
np_box_list
.
BoxList
(
np
.
hstack
([
y_min
,
x_min
,
y_max
,
x_max
]))
fields
=
boxlist
.
get_extra_fields
()
for
field
in
fields
:
extra_field_data
=
boxlist
.
get_field
(
field
)
scaled_boxlist
.
add_field
(
field
,
extra_field_data
)
return
scaled_boxlist
def
clip_to_window
(
boxlist
,
window
):
"""Clip bounding boxes to a window.
This op clips input bounding boxes (represented by bounding box
corners) to a window, optionally filtering out boxes that do not
overlap at all with the window.
Args:
boxlist: BoxList holding M_in boxes
window: a numpy array of shape [4] representing the
[y_min, x_min, y_max, x_max] window to which the op
should clip boxes.
Returns:
a BoxList holding M_out boxes where M_out <= M_in
"""
y_min
,
x_min
,
y_max
,
x_max
=
np
.
array_split
(
boxlist
.
get
(),
4
,
axis
=
1
)
win_y_min
=
window
[
0
]
win_x_min
=
window
[
1
]
win_y_max
=
window
[
2
]
win_x_max
=
window
[
3
]
y_min_clipped
=
np
.
fmax
(
np
.
fmin
(
y_min
,
win_y_max
),
win_y_min
)
y_max_clipped
=
np
.
fmax
(
np
.
fmin
(
y_max
,
win_y_max
),
win_y_min
)
x_min_clipped
=
np
.
fmax
(
np
.
fmin
(
x_min
,
win_x_max
),
win_x_min
)
x_max_clipped
=
np
.
fmax
(
np
.
fmin
(
x_max
,
win_x_max
),
win_x_min
)
clipped
=
np_box_list
.
BoxList
(
np
.
hstack
([
y_min_clipped
,
x_min_clipped
,
y_max_clipped
,
x_max_clipped
]))
clipped
=
_copy_extra_fields
(
clipped
,
boxlist
)
areas
=
area
(
clipped
)
nonzero_area_indices
=
np
.
reshape
(
np
.
nonzero
(
np
.
greater
(
areas
,
0.0
)),
[
-
1
]).
astype
(
np
.
int32
)
return
gather
(
clipped
,
nonzero_area_indices
)
def
prune_non_overlapping_boxes
(
boxlist1
,
boxlist2
,
minoverlap
=
0.0
):
"""Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
For each box in boxlist1, we want its IOA to be more than minoverlap with
at least one of the boxes in boxlist2. If it does not, we remove it.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
minoverlap: Minimum required overlap between boxes, to count them as
overlapping.
Returns:
A pruned boxlist with size [N', 4].
"""
intersection_over_area
=
ioa
(
boxlist2
,
boxlist1
)
# [M, N] tensor
intersection_over_area
=
np
.
amax
(
intersection_over_area
,
axis
=
0
)
# [N] tensor
keep_bool
=
np
.
greater_equal
(
intersection_over_area
,
np
.
array
(
minoverlap
))
keep_inds
=
np
.
nonzero
(
keep_bool
)[
0
]
new_boxlist1
=
gather
(
boxlist1
,
keep_inds
)
return
new_boxlist1
def
prune_outside_window
(
boxlist
,
window
):
"""Prunes bounding boxes that fall outside a given window.
This function prunes bounding boxes that even partially fall outside the given
window. See also ClipToWindow which only prunes bounding boxes that fall
completely outside the window, and clips any bounding boxes that partially
overflow.
Args:
boxlist: a BoxList holding M_in boxes.
window: a numpy array of size 4, representing [ymin, xmin, ymax, xmax]
of the window.
Returns:
pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in.
valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
in the input tensor.
"""
y_min
,
x_min
,
y_max
,
x_max
=
np
.
array_split
(
boxlist
.
get
(),
4
,
axis
=
1
)
win_y_min
=
window
[
0
]
win_x_min
=
window
[
1
]
win_y_max
=
window
[
2
]
win_x_max
=
window
[
3
]
coordinate_violations
=
np
.
hstack
([
np
.
less
(
y_min
,
win_y_min
),
np
.
less
(
x_min
,
win_x_min
),
np
.
greater
(
y_max
,
win_y_max
),
np
.
greater
(
x_max
,
win_x_max
)])
valid_indices
=
np
.
reshape
(
np
.
where
(
np
.
logical_not
(
np
.
max
(
coordinate_violations
,
axis
=
1
))),
[
-
1
])
return
gather
(
boxlist
,
valid_indices
),
valid_indices
def
concatenate
(
boxlists
,
fields
=
None
):
"""Concatenate list of BoxLists.
This op concatenates a list of input BoxLists into a larger BoxList. It also
handles concatenation of BoxList fields as long as the field tensor shapes
are equal except for the first dimension.
Args:
boxlists: list of BoxList objects
fields: optional list of fields to also concatenate. By default, all
fields from the first BoxList in the list are included in the
concatenation.
Returns:
a BoxList with number of boxes equal to
sum([boxlist.num_boxes() for boxlist in BoxList])
Raises:
ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
contains non BoxList objects), or if requested fields are not contained in
all boxlists
"""
if
not
isinstance
(
boxlists
,
list
):
raise
ValueError
(
'boxlists should be a list'
)
if
not
boxlists
:
raise
ValueError
(
'boxlists should have nonzero length'
)
for
boxlist
in
boxlists
:
if
not
isinstance
(
boxlist
,
np_box_list
.
BoxList
):
raise
ValueError
(
'all elements of boxlists should be BoxList objects'
)
concatenated
=
np_box_list
.
BoxList
(
np
.
vstack
([
boxlist
.
get
()
for
boxlist
in
boxlists
]))
if
fields
is
None
:
fields
=
boxlists
[
0
].
get_extra_fields
()
for
field
in
fields
:
first_field_shape
=
boxlists
[
0
].
get_field
(
field
).
shape
first_field_shape
=
first_field_shape
[
1
:]
for
boxlist
in
boxlists
:
if
not
boxlist
.
has_field
(
field
):
raise
ValueError
(
'boxlist must contain all requested fields'
)
field_shape
=
boxlist
.
get_field
(
field
).
shape
field_shape
=
field_shape
[
1
:]
if
field_shape
!=
first_field_shape
:
raise
ValueError
(
'field %s must have same shape for all boxlists '
'except for the 0th dimension.'
%
field
)
concatenated_field
=
np
.
concatenate
(
[
boxlist
.
get_field
(
field
)
for
boxlist
in
boxlists
],
axis
=
0
)
concatenated
.
add_field
(
field
,
concatenated_field
)
return
concatenated
def
filter_scores_greater_than
(
boxlist
,
thresh
):
"""Filter to keep only boxes with score exceeding a given threshold.
This op keeps the collection of boxes whose corresponding scores are
greater than the input threshold.
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
representing detection scores.
thresh: scalar threshold
Returns:
a BoxList holding M boxes where M <= N
Raises:
ValueError: if boxlist not a BoxList object or if it does not
have a scores field
"""
if
not
isinstance
(
boxlist
,
np_box_list
.
BoxList
):
raise
ValueError
(
'boxlist must be a BoxList'
)
if
not
boxlist
.
has_field
(
'scores'
):
raise
ValueError
(
'input boxlist must have
\'
scores
\'
field'
)
scores
=
boxlist
.
get_field
(
'scores'
)
if
len
(
scores
.
shape
)
>
2
:
raise
ValueError
(
'Scores should have rank 1 or 2'
)
if
len
(
scores
.
shape
)
==
2
and
scores
.
shape
[
1
]
!=
1
:
raise
ValueError
(
'Scores should have rank 1 or have shape '
'consistent with [None, 1]'
)
high_score_indices
=
np
.
reshape
(
np
.
where
(
np
.
greater
(
scores
,
thresh
)),
[
-
1
]).
astype
(
np
.
int32
)
return
gather
(
boxlist
,
high_score_indices
)
def
change_coordinate_frame
(
boxlist
,
window
):
"""Change coordinate frame of the boxlist to be relative to window's frame.
Given a window of the form [ymin, xmin, ymax, xmax],
changes bounding box coordinates from boxlist to be relative to this window
(e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
An example use case is data augmentation: where we are given groundtruth
boxes (boxlist) and would like to randomly crop the image to some
window (window). In this case we need to change the coordinate frame of
each groundtruth box to be relative to this new window.
Args:
boxlist: A BoxList object holding N boxes.
window: a size 4 1-D numpy array.
Returns:
Returns a BoxList object with N boxes.
"""
win_height
=
window
[
2
]
-
window
[
0
]
win_width
=
window
[
3
]
-
window
[
1
]
boxlist_new
=
scale
(
np_box_list
.
BoxList
(
boxlist
.
get
()
-
[
window
[
0
],
window
[
1
],
window
[
0
],
window
[
1
]]),
1.0
/
win_height
,
1.0
/
win_width
)
_copy_extra_fields
(
boxlist_new
,
boxlist
)
return
boxlist_new
def
_copy_extra_fields
(
boxlist_to_copy_to
,
boxlist_to_copy_from
):
"""Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
Args:
boxlist_to_copy_to: BoxList to which extra fields are copied.
boxlist_to_copy_from: BoxList from which fields are copied.
Returns:
boxlist_to_copy_to with extra fields.
"""
for
field
in
boxlist_to_copy_from
.
get_extra_fields
():
boxlist_to_copy_to
.
add_field
(
field
,
boxlist_to_copy_from
.
get_field
(
field
))
return
boxlist_to_copy_to
def
_update_valid_indices_by_removing_high_iou_boxes
(
selected_indices
,
is_index_valid
,
intersect_over_union
,
threshold
):
max_iou
=
np
.
max
(
intersect_over_union
[:,
selected_indices
],
axis
=
1
)
return
np
.
logical_and
(
is_index_valid
,
max_iou
<=
threshold
)
object_detection/utils/np_box_list_ops_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.np_box_list_ops."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.utils
import
np_box_list
from
object_detection.utils
import
np_box_list_ops
class
AreaRelatedTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
boxes1
=
np
.
array
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]],
dtype
=
float
)
boxes2
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
self
.
boxlist1
=
np_box_list
.
BoxList
(
boxes1
)
self
.
boxlist2
=
np_box_list
.
BoxList
(
boxes2
)
def
test_area
(
self
):
areas
=
np_box_list_ops
.
area
(
self
.
boxlist1
)
expected_areas
=
np
.
array
([
6.0
,
5.0
],
dtype
=
float
)
self
.
assertAllClose
(
expected_areas
,
areas
)
def
test_intersection
(
self
):
intersection
=
np_box_list_ops
.
intersection
(
self
.
boxlist1
,
self
.
boxlist2
)
expected_intersection
=
np
.
array
([[
2.0
,
0.0
,
6.0
],
[
1.0
,
0.0
,
5.0
]],
dtype
=
float
)
self
.
assertAllClose
(
intersection
,
expected_intersection
)
def
test_iou
(
self
):
iou
=
np_box_list_ops
.
iou
(
self
.
boxlist1
,
self
.
boxlist2
)
expected_iou
=
np
.
array
([[
2.0
/
16.0
,
0.0
,
6.0
/
400.0
],
[
1.0
/
16.0
,
0.0
,
5.0
/
400.0
]],
dtype
=
float
)
self
.
assertAllClose
(
iou
,
expected_iou
)
def
test_ioa
(
self
):
boxlist1
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
]],
dtype
=
np
.
float32
))
boxlist2
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.5
,
0.25
,
1.0
,
1.0
],
[
0.0
,
0.0
,
1.0
,
1.0
]],
dtype
=
np
.
float32
))
ioa21
=
np_box_list_ops
.
ioa
(
boxlist2
,
boxlist1
)
expected_ioa21
=
np
.
array
([[
0.5
,
0.0
],
[
1.0
,
1.0
]],
dtype
=
np
.
float32
)
self
.
assertAllClose
(
ioa21
,
expected_ioa21
)
def
test_scale
(
self
):
boxlist
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
]],
dtype
=
np
.
float32
))
boxlist_scaled
=
np_box_list_ops
.
scale
(
boxlist
,
2.0
,
3.0
)
expected_boxlist_scaled
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.5
,
0.75
,
1.5
,
2.25
],
[
0.0
,
0.0
,
1.0
,
2.25
]],
dtype
=
np
.
float32
))
self
.
assertAllClose
(
expected_boxlist_scaled
.
get
(),
boxlist_scaled
.
get
())
def
test_clip_to_window
(
self
):
boxlist
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
],
[
-
0.2
,
-
0.3
,
0.7
,
1.5
]],
dtype
=
np
.
float32
))
boxlist_clipped
=
np_box_list_ops
.
clip_to_window
(
boxlist
,
[
0.0
,
0.0
,
1.0
,
1.0
])
expected_boxlist_clipped
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
],
[
0.0
,
0.0
,
0.7
,
1.0
]],
dtype
=
np
.
float32
))
self
.
assertAllClose
(
expected_boxlist_clipped
.
get
(),
boxlist_clipped
.
get
())
def
test_prune_outside_window
(
self
):
boxlist
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
],
[
-
0.2
,
-
0.3
,
0.7
,
1.5
]],
dtype
=
np
.
float32
))
boxlist_pruned
,
_
=
np_box_list_ops
.
prune_outside_window
(
boxlist
,
[
0.0
,
0.0
,
1.0
,
1.0
])
expected_boxlist_pruned
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
]],
dtype
=
np
.
float32
))
self
.
assertAllClose
(
expected_boxlist_pruned
.
get
(),
boxlist_pruned
.
get
())
def
test_concatenate
(
self
):
boxlist1
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
]],
dtype
=
np
.
float32
))
boxlist2
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.5
,
0.25
,
1.0
,
1.0
],
[
0.0
,
0.0
,
1.0
,
1.0
]],
dtype
=
np
.
float32
))
boxlists
=
[
boxlist1
,
boxlist2
]
boxlist_concatenated
=
np_box_list_ops
.
concatenate
(
boxlists
)
boxlist_concatenated_expected
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
],
[
0.5
,
0.25
,
1.0
,
1.0
],
[
0.0
,
0.0
,
1.0
,
1.0
]],
dtype
=
np
.
float32
))
self
.
assertAllClose
(
boxlist_concatenated_expected
.
get
(),
boxlist_concatenated
.
get
())
def
test_change_coordinate_frame
(
self
):
boxlist
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
]],
dtype
=
np
.
float32
))
boxlist_coord
=
np_box_list_ops
.
change_coordinate_frame
(
boxlist
,
np
.
array
([
0
,
0
,
0.5
,
0.5
],
dtype
=
np
.
float32
))
expected_boxlist_coord
=
np_box_list
.
BoxList
(
np
.
array
([[
0.5
,
0.5
,
1.5
,
1.5
],
[
0
,
0
,
1.0
,
1.5
]],
dtype
=
np
.
float32
))
self
.
assertAllClose
(
boxlist_coord
.
get
(),
expected_boxlist_coord
.
get
())
def
test_filter_scores_greater_than
(
self
):
boxlist
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
]],
dtype
=
np
.
float32
))
boxlist
.
add_field
(
'scores'
,
np
.
array
([
0.8
,
0.2
],
np
.
float32
))
boxlist_greater
=
np_box_list_ops
.
filter_scores_greater_than
(
boxlist
,
0.5
)
expected_boxlist_greater
=
np_box_list
.
BoxList
(
np
.
array
([[
0.25
,
0.25
,
0.75
,
0.75
]],
dtype
=
np
.
float32
))
self
.
assertAllClose
(
boxlist_greater
.
get
(),
expected_boxlist_greater
.
get
())
class
GatherOpsTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
boxes
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
self
.
boxlist
=
np_box_list
.
BoxList
(
boxes
)
self
.
boxlist
.
add_field
(
'scores'
,
np
.
array
([
0.5
,
0.7
,
0.9
],
dtype
=
float
))
self
.
boxlist
.
add_field
(
'labels'
,
np
.
array
([[
0
,
0
,
0
,
1
,
0
],
[
0
,
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
1
]],
dtype
=
int
))
def
test_gather_with_out_of_range_indices
(
self
):
indices
=
np
.
array
([
3
,
1
],
dtype
=
int
)
boxlist
=
self
.
boxlist
with
self
.
assertRaises
(
ValueError
):
np_box_list_ops
.
gather
(
boxlist
,
indices
)
def
test_gather_with_invalid_multidimensional_indices
(
self
):
indices
=
np
.
array
([[
0
,
1
],
[
1
,
2
]],
dtype
=
int
)
boxlist
=
self
.
boxlist
with
self
.
assertRaises
(
ValueError
):
np_box_list_ops
.
gather
(
boxlist
,
indices
)
def
test_gather_without_fields_specified
(
self
):
indices
=
np
.
array
([
2
,
0
,
1
],
dtype
=
int
)
boxlist
=
self
.
boxlist
subboxlist
=
np_box_list_ops
.
gather
(
boxlist
,
indices
)
expected_scores
=
np
.
array
([
0.9
,
0.5
,
0.7
],
dtype
=
float
)
self
.
assertAllClose
(
expected_scores
,
subboxlist
.
get_field
(
'scores'
))
expected_boxes
=
np
.
array
([[
0.0
,
0.0
,
20.0
,
20.0
],
[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
]],
dtype
=
float
)
self
.
assertAllClose
(
expected_boxes
,
subboxlist
.
get
())
expected_labels
=
np
.
array
([[
0
,
0
,
0
,
0
,
1
],
[
0
,
0
,
0
,
1
,
0
],
[
0
,
1
,
0
,
0
,
0
]],
dtype
=
int
)
self
.
assertAllClose
(
expected_labels
,
subboxlist
.
get_field
(
'labels'
))
def
test_gather_with_invalid_field_specified
(
self
):
indices
=
np
.
array
([
2
,
0
,
1
],
dtype
=
int
)
boxlist
=
self
.
boxlist
with
self
.
assertRaises
(
ValueError
):
np_box_list_ops
.
gather
(
boxlist
,
indices
,
'labels'
)
with
self
.
assertRaises
(
ValueError
):
np_box_list_ops
.
gather
(
boxlist
,
indices
,
[
'objectness'
])
def
test_gather_with_fields_specified
(
self
):
indices
=
np
.
array
([
2
,
0
,
1
],
dtype
=
int
)
boxlist
=
self
.
boxlist
subboxlist
=
np_box_list_ops
.
gather
(
boxlist
,
indices
,
[
'labels'
])
self
.
assertFalse
(
subboxlist
.
has_field
(
'scores'
))
expected_boxes
=
np
.
array
([[
0.0
,
0.0
,
20.0
,
20.0
],
[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
]],
dtype
=
float
)
self
.
assertAllClose
(
expected_boxes
,
subboxlist
.
get
())
expected_labels
=
np
.
array
([[
0
,
0
,
0
,
0
,
1
],
[
0
,
0
,
0
,
1
,
0
],
[
0
,
1
,
0
,
0
,
0
]],
dtype
=
int
)
self
.
assertAllClose
(
expected_labels
,
subboxlist
.
get_field
(
'labels'
))
class
SortByFieldTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
boxes
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
self
.
boxlist
=
np_box_list
.
BoxList
(
boxes
)
self
.
boxlist
.
add_field
(
'scores'
,
np
.
array
([
0.5
,
0.9
,
0.4
],
dtype
=
float
))
self
.
boxlist
.
add_field
(
'labels'
,
np
.
array
([[
0
,
0
,
0
,
1
,
0
],
[
0
,
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
1
]],
dtype
=
int
))
def
test_with_invalid_field
(
self
):
with
self
.
assertRaises
(
ValueError
):
np_box_list_ops
.
sort_by_field
(
self
.
boxlist
,
'objectness'
)
with
self
.
assertRaises
(
ValueError
):
np_box_list_ops
.
sort_by_field
(
self
.
boxlist
,
'labels'
)
def
test_with_invalid_sorting_order
(
self
):
with
self
.
assertRaises
(
ValueError
):
np_box_list_ops
.
sort_by_field
(
self
.
boxlist
,
'scores'
,
'Descending'
)
def
test_with_descending_sorting
(
self
):
sorted_boxlist
=
np_box_list_ops
.
sort_by_field
(
self
.
boxlist
,
'scores'
)
expected_boxes
=
np
.
array
([[
14.0
,
14.0
,
15.0
,
15.0
],
[
3.0
,
4.0
,
6.0
,
8.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
self
.
assertAllClose
(
expected_boxes
,
sorted_boxlist
.
get
())
expected_scores
=
np
.
array
([
0.9
,
0.5
,
0.4
],
dtype
=
float
)
self
.
assertAllClose
(
expected_scores
,
sorted_boxlist
.
get_field
(
'scores'
))
def
test_with_ascending_sorting
(
self
):
sorted_boxlist
=
np_box_list_ops
.
sort_by_field
(
self
.
boxlist
,
'scores'
,
np_box_list_ops
.
SortOrder
.
ASCEND
)
expected_boxes
=
np
.
array
([[
0.0
,
0.0
,
20.0
,
20.0
],
[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],],
dtype
=
float
)
self
.
assertAllClose
(
expected_boxes
,
sorted_boxlist
.
get
())
expected_scores
=
np
.
array
([
0.4
,
0.5
,
0.9
],
dtype
=
float
)
self
.
assertAllClose
(
expected_scores
,
sorted_boxlist
.
get_field
(
'scores'
))
class
NonMaximumSuppressionTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
self
.
_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
],
[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
10
,
1
,
11
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
]],
dtype
=
float
)
self
.
_boxlist
=
np_box_list
.
BoxList
(
self
.
_boxes
)
def
test_with_no_scores_field
(
self
):
boxlist
=
np_box_list
.
BoxList
(
self
.
_boxes
)
max_output_size
=
3
iou_threshold
=
0.5
with
self
.
assertRaises
(
ValueError
):
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
def
test_nms_disabled_max_output_size_equals_three
(
self
):
boxlist
=
np_box_list
.
BoxList
(
self
.
_boxes
)
boxlist
.
add_field
(
'scores'
,
np
.
array
([.
9
,
.
75
,
.
6
,
.
95
,
.
2
,
.
3
],
dtype
=
float
))
max_output_size
=
3
iou_threshold
=
1.
# No NMS
expected_boxes
=
np
.
array
([[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0.1
,
1
,
1.1
]],
dtype
=
float
)
nms_boxlist
=
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
self
.
assertAllClose
(
nms_boxlist
.
get
(),
expected_boxes
)
def
test_select_from_three_clusters
(
self
):
boxlist
=
np_box_list
.
BoxList
(
self
.
_boxes
)
boxlist
.
add_field
(
'scores'
,
np
.
array
([.
9
,
.
75
,
.
6
,
.
95
,
.
2
,
.
3
],
dtype
=
float
))
max_output_size
=
3
iou_threshold
=
0.5
expected_boxes
=
np
.
array
([[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
100
,
1
,
101
]],
dtype
=
float
)
nms_boxlist
=
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
self
.
assertAllClose
(
nms_boxlist
.
get
(),
expected_boxes
)
def
test_select_at_most_two_from_three_clusters
(
self
):
boxlist
=
np_box_list
.
BoxList
(
self
.
_boxes
)
boxlist
.
add_field
(
'scores'
,
np
.
array
([.
9
,
.
75
,
.
6
,
.
95
,
.
5
,
.
3
],
dtype
=
float
))
max_output_size
=
2
iou_threshold
=
0.5
expected_boxes
=
np
.
array
([[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
]],
dtype
=
float
)
nms_boxlist
=
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
self
.
assertAllClose
(
nms_boxlist
.
get
(),
expected_boxes
)
def
test_select_at_most_thirty_from_three_clusters
(
self
):
boxlist
=
np_box_list
.
BoxList
(
self
.
_boxes
)
boxlist
.
add_field
(
'scores'
,
np
.
array
([.
9
,
.
75
,
.
6
,
.
95
,
.
5
,
.
3
],
dtype
=
float
))
max_output_size
=
30
iou_threshold
=
0.5
expected_boxes
=
np
.
array
([[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
100
,
1
,
101
]],
dtype
=
float
)
nms_boxlist
=
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
self
.
assertAllClose
(
nms_boxlist
.
get
(),
expected_boxes
)
def
test_select_from_ten_indentical_boxes
(
self
):
boxes
=
np
.
array
(
10
*
[[
0
,
0
,
1
,
1
]],
dtype
=
float
)
boxlist
=
np_box_list
.
BoxList
(
boxes
)
boxlist
.
add_field
(
'scores'
,
np
.
array
(
10
*
[
0.8
]))
iou_threshold
=
.
5
max_output_size
=
3
expected_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
float
)
nms_boxlist
=
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
self
.
assertAllClose
(
nms_boxlist
.
get
(),
expected_boxes
)
def
test_different_iou_threshold
(
self
):
boxes
=
np
.
array
([[
0
,
0
,
20
,
100
],
[
0
,
0
,
20
,
80
],
[
200
,
200
,
210
,
300
],
[
200
,
200
,
210
,
250
]],
dtype
=
float
)
boxlist
=
np_box_list
.
BoxList
(
boxes
)
boxlist
.
add_field
(
'scores'
,
np
.
array
([
0.9
,
0.8
,
0.7
,
0.6
]))
max_output_size
=
4
iou_threshold
=
.
4
expected_boxes
=
np
.
array
([[
0
,
0
,
20
,
100
],
[
200
,
200
,
210
,
300
],],
dtype
=
float
)
nms_boxlist
=
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
self
.
assertAllClose
(
nms_boxlist
.
get
(),
expected_boxes
)
iou_threshold
=
.
5
expected_boxes
=
np
.
array
([[
0
,
0
,
20
,
100
],
[
200
,
200
,
210
,
300
],
[
200
,
200
,
210
,
250
]],
dtype
=
float
)
nms_boxlist
=
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
self
.
assertAllClose
(
nms_boxlist
.
get
(),
expected_boxes
)
iou_threshold
=
.
8
expected_boxes
=
np
.
array
([[
0
,
0
,
20
,
100
],
[
0
,
0
,
20
,
80
],
[
200
,
200
,
210
,
300
],
[
200
,
200
,
210
,
250
]],
dtype
=
float
)
nms_boxlist
=
np_box_list_ops
.
non_max_suppression
(
boxlist
,
max_output_size
,
iou_threshold
)
self
.
assertAllClose
(
nms_boxlist
.
get
(),
expected_boxes
)
def
test_multiclass_nms
(
self
):
boxlist
=
np_box_list
.
BoxList
(
np
.
array
(
[[
0.2
,
0.4
,
0.8
,
0.8
],
[
0.4
,
0.2
,
0.8
,
0.8
],
[
0.6
,
0.0
,
1.0
,
1.0
]],
dtype
=
np
.
float32
))
scores
=
np
.
array
([[
-
0.2
,
0.1
,
0.5
,
-
0.4
,
0.3
],
[
0.7
,
-
0.7
,
0.6
,
0.2
,
-
0.9
],
[
0.4
,
0.34
,
-
0.9
,
0.2
,
0.31
]],
dtype
=
np
.
float32
)
boxlist
.
add_field
(
'scores'
,
scores
)
boxlist_clean
=
np_box_list_ops
.
multi_class_non_max_suppression
(
boxlist
,
score_thresh
=
0.25
,
iou_thresh
=
0.1
,
max_output_size
=
3
)
scores_clean
=
boxlist_clean
.
get_field
(
'scores'
)
classes_clean
=
boxlist_clean
.
get_field
(
'classes'
)
boxes
=
boxlist_clean
.
get
()
expected_scores
=
np
.
array
([
0.7
,
0.6
,
0.34
,
0.31
])
expected_classes
=
np
.
array
([
0
,
2
,
1
,
4
])
expected_boxes
=
np
.
array
([[
0.4
,
0.2
,
0.8
,
0.8
],
[
0.4
,
0.2
,
0.8
,
0.8
],
[
0.6
,
0.0
,
1.0
,
1.0
],
[
0.6
,
0.0
,
1.0
,
1.0
]],
dtype
=
np
.
float32
)
self
.
assertAllClose
(
scores_clean
,
expected_scores
)
self
.
assertAllClose
(
classes_clean
,
expected_classes
)
self
.
assertAllClose
(
boxes
,
expected_boxes
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/utils/np_box_list_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.np_box_list_test."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.utils
import
np_box_list
class
BoxListTest
(
tf
.
test
.
TestCase
):
def
test_invalid_box_data
(
self
):
with
self
.
assertRaises
(
ValueError
):
np_box_list
.
BoxList
([
0
,
0
,
1
,
1
])
with
self
.
assertRaises
(
ValueError
):
np_box_list
.
BoxList
(
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
int
))
with
self
.
assertRaises
(
ValueError
):
np_box_list
.
BoxList
(
np
.
array
([
0
,
1
,
1
,
3
,
4
],
dtype
=
float
))
with
self
.
assertRaises
(
ValueError
):
np_box_list
.
BoxList
(
np
.
array
([[
0
,
1
,
1
,
3
],
[
3
,
1
,
1
,
5
]],
dtype
=
float
))
def
test_has_field_with_existed_field
(
self
):
boxes
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
boxlist
=
np_box_list
.
BoxList
(
boxes
)
self
.
assertTrue
(
boxlist
.
has_field
(
'boxes'
))
def
test_has_field_with_nonexisted_field
(
self
):
boxes
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
boxlist
=
np_box_list
.
BoxList
(
boxes
)
self
.
assertFalse
(
boxlist
.
has_field
(
'scores'
))
def
test_get_field_with_existed_field
(
self
):
boxes
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
boxlist
=
np_box_list
.
BoxList
(
boxes
)
self
.
assertTrue
(
np
.
allclose
(
boxlist
.
get_field
(
'boxes'
),
boxes
))
def
test_get_field_with_nonexited_field
(
self
):
boxes
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
boxlist
=
np_box_list
.
BoxList
(
boxes
)
with
self
.
assertRaises
(
ValueError
):
boxlist
.
get_field
(
'scores'
)
class
AddExtraFieldTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
boxes
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
self
.
boxlist
=
np_box_list
.
BoxList
(
boxes
)
def
test_add_already_existed_field
(
self
):
with
self
.
assertRaises
(
ValueError
):
self
.
boxlist
.
add_field
(
'boxes'
,
np
.
array
([[
0
,
0
,
0
,
1
,
0
]],
dtype
=
float
))
def
test_add_invalid_field_data
(
self
):
with
self
.
assertRaises
(
ValueError
):
self
.
boxlist
.
add_field
(
'scores'
,
np
.
array
([
0.5
,
0.7
],
dtype
=
float
))
with
self
.
assertRaises
(
ValueError
):
self
.
boxlist
.
add_field
(
'scores'
,
np
.
array
([
0.5
,
0.7
,
0.9
,
0.1
],
dtype
=
float
))
def
test_add_single_dimensional_field_data
(
self
):
boxlist
=
self
.
boxlist
scores
=
np
.
array
([
0.5
,
0.7
,
0.9
],
dtype
=
float
)
boxlist
.
add_field
(
'scores'
,
scores
)
self
.
assertTrue
(
np
.
allclose
(
scores
,
self
.
boxlist
.
get_field
(
'scores'
)))
def
test_add_multi_dimensional_field_data
(
self
):
boxlist
=
self
.
boxlist
labels
=
np
.
array
([[
0
,
0
,
0
,
1
,
0
],
[
0
,
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
1
]],
dtype
=
int
)
boxlist
.
add_field
(
'labels'
,
labels
)
self
.
assertTrue
(
np
.
allclose
(
labels
,
self
.
boxlist
.
get_field
(
'labels'
)))
def
test_get_extra_fields
(
self
):
boxlist
=
self
.
boxlist
self
.
assertSameElements
(
boxlist
.
get_extra_fields
(),
[])
scores
=
np
.
array
([
0.5
,
0.7
,
0.9
],
dtype
=
float
)
boxlist
.
add_field
(
'scores'
,
scores
)
self
.
assertSameElements
(
boxlist
.
get_extra_fields
(),
[
'scores'
])
labels
=
np
.
array
([[
0
,
0
,
0
,
1
,
0
],
[
0
,
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
1
]],
dtype
=
int
)
boxlist
.
add_field
(
'labels'
,
labels
)
self
.
assertSameElements
(
boxlist
.
get_extra_fields
(),
[
'scores'
,
'labels'
])
def
test_get_coordinates
(
self
):
y_min
,
x_min
,
y_max
,
x_max
=
self
.
boxlist
.
get_coordinates
()
expected_y_min
=
np
.
array
([
3.0
,
14.0
,
0.0
],
dtype
=
float
)
expected_x_min
=
np
.
array
([
4.0
,
14.0
,
0.0
],
dtype
=
float
)
expected_y_max
=
np
.
array
([
6.0
,
15.0
,
20.0
],
dtype
=
float
)
expected_x_max
=
np
.
array
([
8.0
,
15.0
,
20.0
],
dtype
=
float
)
self
.
assertTrue
(
np
.
allclose
(
y_min
,
expected_y_min
))
self
.
assertTrue
(
np
.
allclose
(
x_min
,
expected_x_min
))
self
.
assertTrue
(
np
.
allclose
(
y_max
,
expected_y_max
))
self
.
assertTrue
(
np
.
allclose
(
x_max
,
expected_x_max
))
def
test_num_boxes
(
self
):
boxes
=
np
.
array
([[
0.
,
0.
,
100.
,
100.
],
[
10.
,
30.
,
50.
,
70.
]],
dtype
=
float
)
boxlist
=
np_box_list
.
BoxList
(
boxes
)
expected_num_boxes
=
2
self
.
assertEquals
(
boxlist
.
num_boxes
(),
expected_num_boxes
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/utils/np_box_ops.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Operations for [N, 4] numpy arrays representing bounding boxes.
Example box operations that are supported:
* Areas: compute bounding box areas
* IOU: pairwise intersection-over-union scores
"""
import
numpy
as
np
def
area
(
boxes
):
"""Computes area of boxes.
Args:
boxes: Numpy array with shape [N, 4] holding N boxes
Returns:
a numpy array with shape [N*1] representing box areas
"""
return
(
boxes
[:,
2
]
-
boxes
[:,
0
])
*
(
boxes
[:,
3
]
-
boxes
[:,
1
])
def
intersection
(
boxes1
,
boxes2
):
"""Compute pairwise intersection areas between boxes.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes
boxes2: a numpy array with shape [M, 4] holding M boxes
Returns:
a numpy array with shape [N*M] representing pairwise intersection area
"""
[
y_min1
,
x_min1
,
y_max1
,
x_max1
]
=
np
.
split
(
boxes1
,
4
,
axis
=
1
)
[
y_min2
,
x_min2
,
y_max2
,
x_max2
]
=
np
.
split
(
boxes2
,
4
,
axis
=
1
)
all_pairs_min_ymax
=
np
.
minimum
(
y_max1
,
np
.
transpose
(
y_max2
))
all_pairs_max_ymin
=
np
.
maximum
(
y_min1
,
np
.
transpose
(
y_min2
))
intersect_heights
=
np
.
maximum
(
np
.
zeros
(
all_pairs_max_ymin
.
shape
),
all_pairs_min_ymax
-
all_pairs_max_ymin
)
all_pairs_min_xmax
=
np
.
minimum
(
x_max1
,
np
.
transpose
(
x_max2
))
all_pairs_max_xmin
=
np
.
maximum
(
x_min1
,
np
.
transpose
(
x_min2
))
intersect_widths
=
np
.
maximum
(
np
.
zeros
(
all_pairs_max_xmin
.
shape
),
all_pairs_min_xmax
-
all_pairs_max_xmin
)
return
intersect_heights
*
intersect_widths
def
iou
(
boxes1
,
boxes2
):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise iou scores.
"""
intersect
=
intersection
(
boxes1
,
boxes2
)
area1
=
area
(
boxes1
)
area2
=
area
(
boxes2
)
union
=
np
.
expand_dims
(
area1
,
axis
=
1
)
+
np
.
expand_dims
(
area2
,
axis
=
0
)
-
intersect
return
intersect
/
union
def
ioa
(
boxes1
,
boxes2
):
"""Computes pairwise intersection-over-area between box collections.
Intersection-over-area (ioa) between two boxes box1 and box2 is defined as
their intersection area over box2's area. Note that ioa is not symmetric,
that is, IOA(box1, box2) != IOA(box2, box1).
Args:
boxes1: a numpy array with shape [N, 4] holding N boxes.
boxes2: a numpy array with shape [M, 4] holding N boxes.
Returns:
a numpy array with shape [N, M] representing pairwise ioa scores.
"""
intersect
=
intersection
(
boxes1
,
boxes2
)
areas
=
np
.
expand_dims
(
area
(
boxes2
),
axis
=
0
)
return
intersect
/
areas
object_detection/utils/np_box_ops_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.np_box_ops."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.utils
import
np_box_ops
class
BoxOpsTests
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
boxes1
=
np
.
array
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]],
dtype
=
float
)
boxes2
=
np
.
array
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]],
dtype
=
float
)
self
.
boxes1
=
boxes1
self
.
boxes2
=
boxes2
def
testArea
(
self
):
areas
=
np_box_ops
.
area
(
self
.
boxes1
)
expected_areas
=
np
.
array
([
6.0
,
5.0
],
dtype
=
float
)
self
.
assertAllClose
(
expected_areas
,
areas
)
def
testIntersection
(
self
):
intersection
=
np_box_ops
.
intersection
(
self
.
boxes1
,
self
.
boxes2
)
expected_intersection
=
np
.
array
([[
2.0
,
0.0
,
6.0
],
[
1.0
,
0.0
,
5.0
]],
dtype
=
float
)
self
.
assertAllClose
(
intersection
,
expected_intersection
)
def
testIOU
(
self
):
iou
=
np_box_ops
.
iou
(
self
.
boxes1
,
self
.
boxes2
)
expected_iou
=
np
.
array
([[
2.0
/
16.0
,
0.0
,
6.0
/
400.0
],
[
1.0
/
16.0
,
0.0
,
5.0
/
400.0
]],
dtype
=
float
)
self
.
assertAllClose
(
iou
,
expected_iou
)
def
testIOA
(
self
):
boxes1
=
np
.
array
([[
0.25
,
0.25
,
0.75
,
0.75
],
[
0.0
,
0.0
,
0.5
,
0.75
]],
dtype
=
np
.
float32
)
boxes2
=
np
.
array
([[
0.5
,
0.25
,
1.0
,
1.0
],
[
0.0
,
0.0
,
1.0
,
1.0
]],
dtype
=
np
.
float32
)
ioa21
=
np_box_ops
.
ioa
(
boxes2
,
boxes1
)
expected_ioa21
=
np
.
array
([[
0.5
,
0.0
],
[
1.0
,
1.0
]],
dtype
=
np
.
float32
)
self
.
assertAllClose
(
ioa21
,
expected_ioa21
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/utils/object_detection_evaluation.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""object_detection_evaluation module.
ObjectDetectionEvaluation is a class which manages ground truth information of a
object detection dataset, and computes frequently used detection metrics such as
Precision, Recall, CorLoc of the provided detection results.
It supports the following operations:
1) Add ground truth information of images sequentially.
2) Add detection result of images sequentially.
3) Evaluate detection metrics on already inserted detection results.
4) Write evaluation result into a pickle file for future processing or
visualization.
Note: This module operates on numpy boxes and box lists.
"""
import
logging
import
numpy
as
np
from
object_detection.utils
import
metrics
from
object_detection.utils
import
per_image_evaluation
class
ObjectDetectionEvaluation
(
object
):
"""Evaluate Object Detection Result."""
def
__init__
(
self
,
num_groundtruth_classes
,
matching_iou_threshold
=
0.5
,
nms_iou_threshold
=
1.0
,
nms_max_output_boxes
=
10000
):
self
.
per_image_eval
=
per_image_evaluation
.
PerImageEvaluation
(
num_groundtruth_classes
,
matching_iou_threshold
,
nms_iou_threshold
,
nms_max_output_boxes
)
self
.
num_class
=
num_groundtruth_classes
self
.
groundtruth_boxes
=
{}
self
.
groundtruth_class_labels
=
{}
self
.
groundtruth_is_difficult_list
=
{}
self
.
num_gt_instances_per_class
=
np
.
zeros
(
self
.
num_class
,
dtype
=
int
)
self
.
num_gt_imgs_per_class
=
np
.
zeros
(
self
.
num_class
,
dtype
=
int
)
self
.
detection_keys
=
set
()
self
.
scores_per_class
=
[[]
for
_
in
range
(
self
.
num_class
)]
self
.
tp_fp_labels_per_class
=
[[]
for
_
in
range
(
self
.
num_class
)]
self
.
num_images_correctly_detected_per_class
=
np
.
zeros
(
self
.
num_class
)
self
.
average_precision_per_class
=
np
.
empty
(
self
.
num_class
,
dtype
=
float
)
self
.
average_precision_per_class
.
fill
(
np
.
nan
)
self
.
precisions_per_class
=
[]
self
.
recalls_per_class
=
[]
self
.
corloc_per_class
=
np
.
ones
(
self
.
num_class
,
dtype
=
float
)
def
clear_detections
(
self
):
self
.
detection_keys
=
{}
self
.
scores_per_class
=
[[]
for
_
in
range
(
self
.
num_class
)]
self
.
tp_fp_labels_per_class
=
[[]
for
_
in
range
(
self
.
num_class
)]
self
.
num_images_correctly_detected_per_class
=
np
.
zeros
(
self
.
num_class
)
self
.
average_precision_per_class
=
np
.
zeros
(
self
.
num_class
,
dtype
=
float
)
self
.
precisions_per_class
=
[]
self
.
recalls_per_class
=
[]
self
.
corloc_per_class
=
np
.
ones
(
self
.
num_class
,
dtype
=
float
)
def
add_single_ground_truth_image_info
(
self
,
image_key
,
groundtruth_boxes
,
groundtruth_class_labels
,
groundtruth_is_difficult_list
=
None
):
"""Add ground truth info of a single image into the evaluation database.
Args:
image_key: sha256 key of image content
groundtruth_boxes: A numpy array of shape [M, 4] representing object box
coordinates[y_min, x_min, y_max, x_max]
groundtruth_class_labels: A 1-d numpy array of length M representing class
labels
groundtruth_is_difficult_list: A length M numpy boolean array denoting
whether a ground truth box is a difficult instance or not. To support
the case that no boxes are difficult, it is by default set as None.
"""
if
image_key
in
self
.
groundtruth_boxes
:
logging
.
warn
(
'image %s has already been added to the ground truth database.'
,
image_key
)
return
self
.
groundtruth_boxes
[
image_key
]
=
groundtruth_boxes
self
.
groundtruth_class_labels
[
image_key
]
=
groundtruth_class_labels
if
groundtruth_is_difficult_list
is
None
:
num_boxes
=
groundtruth_boxes
.
shape
[
0
]
groundtruth_is_difficult_list
=
np
.
zeros
(
num_boxes
,
dtype
=
bool
)
self
.
groundtruth_is_difficult_list
[
image_key
]
=
groundtruth_is_difficult_list
.
astype
(
dtype
=
bool
)
self
.
_update_ground_truth_statistics
(
groundtruth_class_labels
,
groundtruth_is_difficult_list
)
def
add_single_detected_image_info
(
self
,
image_key
,
detected_boxes
,
detected_scores
,
detected_class_labels
):
"""Add detected result of a single image into the evaluation database.
Args:
image_key: sha256 key of image content
detected_boxes: A numpy array of shape [N, 4] representing detected box
coordinates[y_min, x_min, y_max, x_max]
detected_scores: A 1-d numpy array of length N representing classification
score
detected_class_labels: A 1-d numpy array of length N representing class
labels
Raises:
ValueError: if detected_boxes, detected_scores and detected_class_labels
do not have the same length.
"""
if
(
len
(
detected_boxes
)
!=
len
(
detected_scores
)
or
len
(
detected_boxes
)
!=
len
(
detected_class_labels
)):
raise
ValueError
(
'detected_boxes, detected_scores and '
'detected_class_labels should all have same lengths. Got'
'[%d, %d, %d]'
%
len
(
detected_boxes
),
len
(
detected_scores
),
len
(
detected_class_labels
))
if
image_key
in
self
.
detection_keys
:
logging
.
warn
(
'image %s has already been added to the detection result database'
,
image_key
)
return
self
.
detection_keys
.
add
(
image_key
)
if
image_key
in
self
.
groundtruth_boxes
:
groundtruth_boxes
=
self
.
groundtruth_boxes
[
image_key
]
groundtruth_class_labels
=
self
.
groundtruth_class_labels
[
image_key
]
groundtruth_is_difficult_list
=
self
.
groundtruth_is_difficult_list
[
image_key
]
else
:
groundtruth_boxes
=
np
.
empty
(
shape
=
[
0
,
4
],
dtype
=
float
)
groundtruth_class_labels
=
np
.
array
([],
dtype
=
int
)
groundtruth_is_difficult_list
=
np
.
array
([],
dtype
=
bool
)
scores
,
tp_fp_labels
,
is_class_correctly_detected_in_image
=
(
self
.
per_image_eval
.
compute_object_detection_metrics
(
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
,
groundtruth_is_difficult_list
))
for
i
in
range
(
self
.
num_class
):
self
.
scores_per_class
[
i
].
append
(
scores
[
i
])
self
.
tp_fp_labels_per_class
[
i
].
append
(
tp_fp_labels
[
i
])
(
self
.
num_images_correctly_detected_per_class
)
+=
is_class_correctly_detected_in_image
def
_update_ground_truth_statistics
(
self
,
groundtruth_class_labels
,
groundtruth_is_difficult_list
):
"""Update grouth truth statitistics.
1. Difficult boxes are ignored when counting the number of ground truth
instances as done in Pascal VOC devkit.
2. Difficult boxes are treated as normal boxes when computing CorLoc related
statitistics.
Args:
groundtruth_class_labels: An integer numpy array of length M,
representing M class labels of object instances in ground truth
groundtruth_is_difficult_list: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
"""
for
class_index
in
range
(
self
.
num_class
):
num_gt_instances
=
np
.
sum
(
groundtruth_class_labels
[
~
groundtruth_is_difficult_list
]
==
class_index
)
self
.
num_gt_instances_per_class
[
class_index
]
+=
num_gt_instances
if
np
.
any
(
groundtruth_class_labels
==
class_index
):
self
.
num_gt_imgs_per_class
[
class_index
]
+=
1
def
evaluate
(
self
):
"""Compute evaluation result.
Returns:
average_precision_per_class: float numpy array of average precision for
each class.
mean_ap: mean average precision of all classes, float scalar
precisions_per_class: List of precisions, each precision is a float numpy
array
recalls_per_class: List of recalls, each recall is a float numpy array
corloc_per_class: numpy float array
mean_corloc: Mean CorLoc score for each class, float scalar
"""
if
(
self
.
num_gt_instances_per_class
==
0
).
any
():
logging
.
warn
(
'The following classes have no ground truth examples: %s'
,
np
.
squeeze
(
np
.
argwhere
(
self
.
num_gt_instances_per_class
==
0
)))
for
class_index
in
range
(
self
.
num_class
):
if
self
.
num_gt_instances_per_class
[
class_index
]
==
0
:
continue
scores
=
np
.
concatenate
(
self
.
scores_per_class
[
class_index
])
tp_fp_labels
=
np
.
concatenate
(
self
.
tp_fp_labels_per_class
[
class_index
])
precision
,
recall
=
metrics
.
compute_precision_recall
(
scores
,
tp_fp_labels
,
self
.
num_gt_instances_per_class
[
class_index
])
self
.
precisions_per_class
.
append
(
precision
)
self
.
recalls_per_class
.
append
(
recall
)
average_precision
=
metrics
.
compute_average_precision
(
precision
,
recall
)
self
.
average_precision_per_class
[
class_index
]
=
average_precision
self
.
corloc_per_class
=
metrics
.
compute_cor_loc
(
self
.
num_gt_imgs_per_class
,
self
.
num_images_correctly_detected_per_class
)
mean_ap
=
np
.
nanmean
(
self
.
average_precision_per_class
)
mean_corloc
=
np
.
nanmean
(
self
.
corloc_per_class
)
return
(
self
.
average_precision_per_class
,
mean_ap
,
self
.
precisions_per_class
,
self
.
recalls_per_class
,
self
.
corloc_per_class
,
mean_corloc
)
def
get_eval_result
(
self
):
return
EvalResult
(
self
.
average_precision_per_class
,
self
.
precisions_per_class
,
self
.
recalls_per_class
,
self
.
corloc_per_class
)
class
EvalResult
(
object
):
def
__init__
(
self
,
average_precisions
,
precisions
,
recalls
,
all_corloc
):
self
.
precisions
=
precisions
self
.
recalls
=
recalls
self
.
all_corloc
=
all_corloc
self
.
average_precisions
=
average_precisions
object_detection/utils/object_detection_evaluation_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.object_detection_evaluation."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.utils
import
object_detection_evaluation
class
ObjectDetectionEvaluationTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
num_groundtruth_classes
=
3
self
.
od_eval
=
object_detection_evaluation
.
ObjectDetectionEvaluation
(
num_groundtruth_classes
)
image_key1
=
"img1"
groundtruth_boxes1
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
2
],
[
0
,
0
,
3
,
3
]],
dtype
=
float
)
groundtruth_class_labels1
=
np
.
array
([
0
,
2
,
0
],
dtype
=
int
)
self
.
od_eval
.
add_single_ground_truth_image_info
(
image_key1
,
groundtruth_boxes1
,
groundtruth_class_labels1
)
image_key2
=
"img2"
groundtruth_boxes2
=
np
.
array
([[
10
,
10
,
11
,
11
],
[
500
,
500
,
510
,
510
],
[
10
,
10
,
12
,
12
]],
dtype
=
float
)
groundtruth_class_labels2
=
np
.
array
([
0
,
0
,
2
],
dtype
=
int
)
groundtruth_is_difficult_list2
=
np
.
array
([
False
,
True
,
False
],
dtype
=
bool
)
self
.
od_eval
.
add_single_ground_truth_image_info
(
image_key2
,
groundtruth_boxes2
,
groundtruth_class_labels2
,
groundtruth_is_difficult_list2
)
image_key3
=
"img3"
groundtruth_boxes3
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
float
)
groundtruth_class_labels3
=
np
.
array
([
1
],
dtype
=
int
)
self
.
od_eval
.
add_single_ground_truth_image_info
(
image_key3
,
groundtruth_boxes3
,
groundtruth_class_labels3
)
image_key
=
"img2"
detected_boxes
=
np
.
array
(
[[
10
,
10
,
11
,
11
],
[
100
,
100
,
120
,
120
],
[
100
,
100
,
220
,
220
]],
dtype
=
float
)
detected_class_labels
=
np
.
array
([
0
,
0
,
2
],
dtype
=
int
)
detected_scores
=
np
.
array
([
0.7
,
0.8
,
0.9
],
dtype
=
float
)
self
.
od_eval
.
add_single_detected_image_info
(
image_key
,
detected_boxes
,
detected_scores
,
detected_class_labels
)
def
test_add_single_ground_truth_image_info
(
self
):
expected_num_gt_instances_per_class
=
np
.
array
([
3
,
1
,
2
],
dtype
=
int
)
expected_num_gt_imgs_per_class
=
np
.
array
([
2
,
1
,
2
],
dtype
=
int
)
self
.
assertTrue
(
np
.
array_equal
(
expected_num_gt_instances_per_class
,
self
.
od_eval
.
num_gt_instances_per_class
))
self
.
assertTrue
(
np
.
array_equal
(
expected_num_gt_imgs_per_class
,
self
.
od_eval
.
num_gt_imgs_per_class
))
groundtruth_boxes2
=
np
.
array
([[
10
,
10
,
11
,
11
],
[
500
,
500
,
510
,
510
],
[
10
,
10
,
12
,
12
]],
dtype
=
float
)
self
.
assertTrue
(
np
.
allclose
(
self
.
od_eval
.
groundtruth_boxes
[
"img2"
],
groundtruth_boxes2
))
groundtruth_is_difficult_list2
=
np
.
array
([
False
,
True
,
False
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
self
.
od_eval
.
groundtruth_is_difficult_list
[
"img2"
],
groundtruth_is_difficult_list2
))
groundtruth_class_labels1
=
np
.
array
([
0
,
2
,
0
],
dtype
=
int
)
self
.
assertTrue
(
np
.
array_equal
(
self
.
od_eval
.
groundtruth_class_labels
[
"img1"
],
groundtruth_class_labels1
))
def
test_add_single_detected_image_info
(
self
):
expected_scores_per_class
=
[[
np
.
array
([
0.8
,
0.7
],
dtype
=
float
)],
[],
[
np
.
array
([
0.9
],
dtype
=
float
)]]
expected_tp_fp_labels_per_class
=
[[
np
.
array
([
0
,
1
],
dtype
=
bool
)],
[],
[
np
.
array
([
0
],
dtype
=
bool
)]]
expected_num_images_correctly_detected_per_class
=
np
.
array
([
0
,
0
,
0
],
dtype
=
int
)
for
i
in
range
(
self
.
od_eval
.
num_class
):
for
j
in
range
(
len
(
expected_scores_per_class
[
i
])):
self
.
assertTrue
(
np
.
allclose
(
expected_scores_per_class
[
i
][
j
],
self
.
od_eval
.
scores_per_class
[
i
][
j
]))
self
.
assertTrue
(
np
.
array_equal
(
expected_tp_fp_labels_per_class
[
i
][
j
],
self
.
od_eval
.
tp_fp_labels_per_class
[
i
][
j
]))
self
.
assertTrue
(
np
.
array_equal
(
expected_num_images_correctly_detected_per_class
,
self
.
od_eval
.
num_images_correctly_detected_per_class
))
def
test_evaluate
(
self
):
(
average_precision_per_class
,
mean_ap
,
precisions_per_class
,
recalls_per_class
,
corloc_per_class
,
mean_corloc
)
=
self
.
od_eval
.
evaluate
()
expected_precisions_per_class
=
[
np
.
array
([
0
,
0.5
],
dtype
=
float
),
np
.
array
([],
dtype
=
float
),
np
.
array
([
0
],
dtype
=
float
)]
expected_recalls_per_class
=
[
np
.
array
([
0
,
1.
/
3.
],
dtype
=
float
),
np
.
array
([],
dtype
=
float
),
np
.
array
([
0
],
dtype
=
float
)
]
expected_average_precision_per_class
=
np
.
array
([
1.
/
6.
,
0
,
0
],
dtype
=
float
)
expected_corloc_per_class
=
np
.
array
([
0
,
np
.
divide
(
0
,
0
),
0
],
dtype
=
float
)
expected_mean_ap
=
1.
/
18
expected_mean_corloc
=
0.0
for
i
in
range
(
self
.
od_eval
.
num_class
):
self
.
assertTrue
(
np
.
allclose
(
expected_precisions_per_class
[
i
],
precisions_per_class
[
i
]))
self
.
assertTrue
(
np
.
allclose
(
expected_recalls_per_class
[
i
],
recalls_per_class
[
i
]))
self
.
assertTrue
(
np
.
allclose
(
expected_average_precision_per_class
,
average_precision_per_class
))
self
.
assertTrue
(
np
.
allclose
(
expected_corloc_per_class
,
corloc_per_class
))
self
.
assertAlmostEqual
(
expected_mean_ap
,
mean_ap
)
self
.
assertAlmostEqual
(
expected_mean_corloc
,
mean_corloc
)
if
__name__
==
"__main__"
:
tf
.
test
.
main
()
object_detection/utils/ops.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A module for helper tensorflow ops."""
import
math
import
six
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
static_shape
def
expanded_shape
(
orig_shape
,
start_dim
,
num_dims
):
"""Inserts multiple ones into a shape vector.
Inserts an all-1 vector of length num_dims at position start_dim into a shape.
Can be combined with tf.reshape to generalize tf.expand_dims.
Args:
orig_shape: the shape into which the all-1 vector is added (int32 vector)
start_dim: insertion position (int scalar)
num_dims: length of the inserted all-1 vector (int scalar)
Returns:
An int32 vector of length tf.size(orig_shape) + num_dims.
"""
with
tf
.
name_scope
(
'ExpandedShape'
):
start_dim
=
tf
.
expand_dims
(
start_dim
,
0
)
# scalar to rank-1
before
=
tf
.
slice
(
orig_shape
,
[
0
],
start_dim
)
add_shape
=
tf
.
ones
(
tf
.
reshape
(
num_dims
,
[
1
]),
dtype
=
tf
.
int32
)
after
=
tf
.
slice
(
orig_shape
,
start_dim
,
[
-
1
])
new_shape
=
tf
.
concat
([
before
,
add_shape
,
after
],
0
)
return
new_shape
def
normalized_to_image_coordinates
(
normalized_boxes
,
image_shape
,
parallel_iterations
=
32
):
"""Converts a batch of boxes from normal to image coordinates.
Args:
normalized_boxes: a float32 tensor of shape [None, num_boxes, 4] in
normalized coordinates.
image_shape: a float32 tensor of shape [4] containing the image shape.
parallel_iterations: parallelism for the map_fn op.
Returns:
absolute_boxes: a float32 tensor of shape [None, num_boxes, 4] containg the
boxes in image coordinates.
"""
def
_to_absolute_coordinates
(
normalized_boxes
):
return
box_list_ops
.
to_absolute_coordinates
(
box_list
.
BoxList
(
normalized_boxes
),
image_shape
[
1
],
image_shape
[
2
],
check_range
=
False
).
get
()
absolute_boxes
=
tf
.
map_fn
(
_to_absolute_coordinates
,
elems
=
(
normalized_boxes
),
dtype
=
tf
.
float32
,
parallel_iterations
=
parallel_iterations
,
back_prop
=
True
)
return
absolute_boxes
def
meshgrid
(
x
,
y
):
"""Tiles the contents of x and y into a pair of grids.
Multidimensional analog of numpy.meshgrid, giving the same behavior if x and y
are vectors. Generally, this will give:
xgrid(i1, ..., i_m, j_1, ..., j_n) = x(j_1, ..., j_n)
ygrid(i1, ..., i_m, j_1, ..., j_n) = y(i_1, ..., i_m)
Keep in mind that the order of the arguments and outputs is reverse relative
to the order of the indices they go into, done for compatibility with numpy.
The output tensors have the same shapes. Specifically:
xgrid.get_shape() = y.get_shape().concatenate(x.get_shape())
ygrid.get_shape() = y.get_shape().concatenate(x.get_shape())
Args:
x: A tensor of arbitrary shape and rank. xgrid will contain these values
varying in its last dimensions.
y: A tensor of arbitrary shape and rank. ygrid will contain these values
varying in its first dimensions.
Returns:
A tuple of tensors (xgrid, ygrid).
"""
with
tf
.
name_scope
(
'Meshgrid'
):
x
=
tf
.
convert_to_tensor
(
x
)
y
=
tf
.
convert_to_tensor
(
y
)
x_exp_shape
=
expanded_shape
(
tf
.
shape
(
x
),
0
,
tf
.
rank
(
y
))
y_exp_shape
=
expanded_shape
(
tf
.
shape
(
y
),
tf
.
rank
(
y
),
tf
.
rank
(
x
))
xgrid
=
tf
.
tile
(
tf
.
reshape
(
x
,
x_exp_shape
),
y_exp_shape
)
ygrid
=
tf
.
tile
(
tf
.
reshape
(
y
,
y_exp_shape
),
x_exp_shape
)
new_shape
=
y
.
get_shape
().
concatenate
(
x
.
get_shape
())
xgrid
.
set_shape
(
new_shape
)
ygrid
.
set_shape
(
new_shape
)
return
xgrid
,
ygrid
def
pad_to_multiple
(
tensor
,
multiple
):
"""Returns the tensor zero padded to the specified multiple.
Appends 0s to the end of the first and second dimension (height and width) of
the tensor until both dimensions are a multiple of the input argument
'multiple'. E.g. given an input tensor of shape [1, 3, 5, 1] and an input
multiple of 4, PadToMultiple will append 0s so that the resulting tensor will
be of shape [1, 4, 8, 1].
Args:
tensor: rank 4 float32 tensor, where
tensor -> [batch_size, height, width, channels].
multiple: the multiple to pad to.
Returns:
padded_tensor: the tensor zero padded to the specified multiple.
"""
tensor_shape
=
tensor
.
get_shape
()
batch_size
=
static_shape
.
get_batch_size
(
tensor_shape
)
tensor_height
=
static_shape
.
get_height
(
tensor_shape
)
tensor_width
=
static_shape
.
get_width
(
tensor_shape
)
tensor_depth
=
static_shape
.
get_depth
(
tensor_shape
)
if
batch_size
is
None
:
batch_size
=
tf
.
shape
(
tensor
)[
0
]
if
tensor_height
is
None
:
tensor_height
=
tf
.
shape
(
tensor
)[
1
]
padded_tensor_height
=
tf
.
to_int32
(
tf
.
ceil
(
tf
.
to_float
(
tensor_height
)
/
tf
.
to_float
(
multiple
)))
*
multiple
else
:
padded_tensor_height
=
int
(
math
.
ceil
(
float
(
tensor_height
)
/
multiple
)
*
multiple
)
if
tensor_width
is
None
:
tensor_width
=
tf
.
shape
(
tensor
)[
2
]
padded_tensor_width
=
tf
.
to_int32
(
tf
.
ceil
(
tf
.
to_float
(
tensor_width
)
/
tf
.
to_float
(
multiple
)))
*
multiple
else
:
padded_tensor_width
=
int
(
math
.
ceil
(
float
(
tensor_width
)
/
multiple
)
*
multiple
)
if
tensor_depth
is
None
:
tensor_depth
=
tf
.
shape
(
tensor
)[
3
]
# Use tf.concat instead of tf.pad to preserve static shape
height_pad
=
tf
.
zeros
([
batch_size
,
padded_tensor_height
-
tensor_height
,
tensor_width
,
tensor_depth
])
padded_tensor
=
tf
.
concat
([
tensor
,
height_pad
],
1
)
width_pad
=
tf
.
zeros
([
batch_size
,
padded_tensor_height
,
padded_tensor_width
-
tensor_width
,
tensor_depth
])
padded_tensor
=
tf
.
concat
([
padded_tensor
,
width_pad
],
2
)
return
padded_tensor
def
padded_one_hot_encoding
(
indices
,
depth
,
left_pad
):
"""Returns a zero padded one-hot tensor.
This function converts a sparse representation of indices (e.g., [4]) to a
zero padded one-hot representation (e.g., [0, 0, 0, 0, 1] with depth = 4 and
left_pad = 1). If `indices` is empty, the result will simply be a tensor of
shape (0, depth + left_pad). If depth = 0, then this function just returns
`None`.
Args:
indices: an integer tensor of shape [num_indices].
depth: depth for the one-hot tensor (integer).
left_pad: number of zeros to left pad the one-hot tensor with (integer).
Returns:
padded_onehot: a tensor with shape (num_indices, depth + left_pad). Returns
`None` if the depth is zero.
Raises:
ValueError: if `indices` does not have rank 1 or if `left_pad` or `depth are
either negative or non-integers.
TODO: add runtime checks for depth and indices.
"""
if
depth
<
0
or
not
isinstance
(
depth
,
(
int
,
long
)
if
six
.
PY2
else
int
):
raise
ValueError
(
'`depth` must be a non-negative integer.'
)
if
left_pad
<
0
or
not
isinstance
(
left_pad
,
(
int
,
long
)
if
six
.
PY2
else
int
):
raise
ValueError
(
'`left_pad` must be a non-negative integer.'
)
if
depth
==
0
:
return
None
if
len
(
indices
.
get_shape
().
as_list
())
!=
1
:
raise
ValueError
(
'`indices` must have rank 1'
)
def
one_hot_and_pad
():
one_hot
=
tf
.
cast
(
tf
.
one_hot
(
tf
.
cast
(
indices
,
tf
.
int64
),
depth
,
on_value
=
1
,
off_value
=
0
),
tf
.
float32
)
return
tf
.
pad
(
one_hot
,
[[
0
,
0
],
[
left_pad
,
0
]],
mode
=
'CONSTANT'
)
result
=
tf
.
cond
(
tf
.
greater
(
tf
.
size
(
indices
),
0
),
one_hot_and_pad
,
lambda
:
tf
.
zeros
((
depth
+
left_pad
,
0
)))
return
tf
.
reshape
(
result
,
[
-
1
,
depth
+
left_pad
])
def
dense_to_sparse_boxes
(
dense_locations
,
dense_num_boxes
,
num_classes
):
"""Converts bounding boxes from dense to sparse form.
Args:
dense_locations: a [max_num_boxes, 4] tensor in which only the first k rows
are valid bounding box location coordinates, where k is the sum of
elements in dense_num_boxes.
dense_num_boxes: a [max_num_classes] tensor indicating the counts of
various bounding box classes e.g. [1, 0, 0, 2] means that the first
bounding box is of class 0 and the second and third bounding boxes are
of class 3. The sum of elements in this tensor is the number of valid
bounding boxes.
num_classes: number of classes
Returns:
box_locations: a [num_boxes, 4] tensor containing only valid bounding
boxes (i.e. the first num_boxes rows of dense_locations)
box_classes: a [num_boxes] tensor containing the classes of each bounding
box (e.g. dense_num_boxes = [1, 0, 0, 2] => box_classes = [0, 3, 3]
"""
num_valid_boxes
=
tf
.
reduce_sum
(
dense_num_boxes
)
box_locations
=
tf
.
slice
(
dense_locations
,
tf
.
constant
([
0
,
0
]),
tf
.
stack
([
num_valid_boxes
,
4
]))
tiled_classes
=
[
tf
.
tile
([
i
],
tf
.
expand_dims
(
dense_num_boxes
[
i
],
0
))
for
i
in
range
(
num_classes
)]
box_classes
=
tf
.
concat
(
tiled_classes
,
0
)
box_locations
.
set_shape
([
None
,
4
])
return
box_locations
,
box_classes
def
indices_to_dense_vector
(
indices
,
size
,
indices_value
=
1.
,
default_value
=
0
,
dtype
=
tf
.
float32
):
"""Creates dense vector with indices set to specific value and rest to zeros.
This function exists because it is unclear if it is safe to use
tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
with indices which are not ordered.
This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
Args:
indices: 1d Tensor with integer indices which are to be set to
indices_values.
size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector.
dtype: data type.
Returns:
dense 1D Tensor of shape [size] with indices set to indices_values and the
rest set to default_value.
"""
size
=
tf
.
to_int32
(
size
)
zeros
=
tf
.
ones
([
size
],
dtype
=
dtype
)
*
default_value
values
=
tf
.
ones_like
(
indices
,
dtype
=
dtype
)
*
indices_value
return
tf
.
dynamic_stitch
([
tf
.
range
(
size
),
tf
.
to_int32
(
indices
)],
[
zeros
,
values
])
def
retain_groundtruth
(
tensor_dict
,
valid_indices
):
"""Retains groundtruth by valid indices.
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
fields.InputDataFields.groundtruth_difficult
valid_indices: a tensor with valid indices for the box-level groundtruth.
Returns:
a dictionary of tensors containing only the groundtruth for valid_indices.
Raises:
ValueError: If the shape of valid_indices is invalid.
ValueError: field fields.InputDataFields.groundtruth_boxes is
not present in tensor_dict.
"""
input_shape
=
valid_indices
.
get_shape
().
as_list
()
if
not
(
len
(
input_shape
)
==
1
or
(
len
(
input_shape
)
==
2
and
input_shape
[
1
]
==
1
)):
raise
ValueError
(
'The shape of valid_indices is invalid.'
)
valid_indices
=
tf
.
reshape
(
valid_indices
,
[
-
1
])
valid_dict
=
{}
if
fields
.
InputDataFields
.
groundtruth_boxes
in
tensor_dict
:
# Prevents reshape failure when num_boxes is 0.
num_boxes
=
tf
.
maximum
(
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
],
1
)
for
key
in
tensor_dict
:
if
key
in
[
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
]:
valid_dict
[
key
]
=
tf
.
gather
(
tensor_dict
[
key
],
valid_indices
)
# Input decoder returns empty tensor when these fields are not provided.
# Needs to reshape into [num_boxes, -1] for tf.gather() to work.
elif
key
in
[
fields
.
InputDataFields
.
groundtruth_is_crowd
,
fields
.
InputDataFields
.
groundtruth_area
,
fields
.
InputDataFields
.
groundtruth_difficult
,
fields
.
InputDataFields
.
groundtruth_label_types
]:
valid_dict
[
key
]
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
tensor_dict
[
key
],
[
num_boxes
,
-
1
]),
valid_indices
),
[
-
1
])
# Fields that are not associated with boxes.
else
:
valid_dict
[
key
]
=
tensor_dict
[
key
]
else
:
raise
ValueError
(
'%s not present in input tensor dict.'
%
(
fields
.
InputDataFields
.
groundtruth_boxes
))
return
valid_dict
def
retain_groundtruth_with_positive_classes
(
tensor_dict
):
"""Retains only groundtruth with positive class ids.
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
fields.InputDataFields.groundtruth_difficult
Returns:
a dictionary of tensors containing only the groundtruth with positive
classes.
Raises:
ValueError: If groundtruth_classes tensor is not in tensor_dict.
"""
if
fields
.
InputDataFields
.
groundtruth_classes
not
in
tensor_dict
:
raise
ValueError
(
'`groundtruth classes` not in tensor_dict.'
)
keep_indices
=
tf
.
where
(
tf
.
greater
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
],
0
))
return
retain_groundtruth
(
tensor_dict
,
keep_indices
)
def
filter_groundtruth_with_nan_box_coordinates
(
tensor_dict
):
"""Filters out groundtruth with no bounding boxes.
Args:
tensor_dict: a dictionary of following groundtruth tensors -
fields.InputDataFields.groundtruth_boxes
fields.InputDataFields.groundtruth_classes
fields.InputDataFields.groundtruth_is_crowd
fields.InputDataFields.groundtruth_area
fields.InputDataFields.groundtruth_label_types
Returns:
a dictionary of tensors containing only the groundtruth that have bounding
boxes.
"""
groundtruth_boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
nan_indicator_vector
=
tf
.
greater
(
tf
.
reduce_sum
(
tf
.
to_int32
(
tf
.
is_nan
(
groundtruth_boxes
)),
reduction_indices
=
[
1
]),
0
)
valid_indicator_vector
=
tf
.
logical_not
(
nan_indicator_vector
)
valid_indices
=
tf
.
where
(
valid_indicator_vector
)
return
retain_groundtruth
(
tensor_dict
,
valid_indices
)
def
normalize_to_target
(
inputs
,
target_norm_value
,
dim
,
epsilon
=
1e-7
,
trainable
=
True
,
scope
=
'NormalizeToTarget'
,
summarize
=
True
):
"""L2 normalizes the inputs across the specified dimension to a target norm.
This op implements the L2 Normalization layer introduced in
Liu, Wei, et al. "SSD: Single Shot MultiBox Detector."
and Liu, Wei, Andrew Rabinovich, and Alexander C. Berg.
"Parsenet: Looking wider to see better." and is useful for bringing
activations from multiple layers in a convnet to a standard scale.
Note that the rank of `inputs` must be known and the dimension to which
normalization is to be applied should be statically defined.
TODO: Add option to scale by L2 norm of the entire input.
Args:
inputs: A `Tensor` of arbitrary size.
target_norm_value: A float value that specifies an initial target norm or
a list of floats (whose length must be equal to the depth along the
dimension to be normalized) specifying a per-dimension multiplier
after normalization.
dim: The dimension along which the input is normalized.
epsilon: A small value to add to the inputs to avoid dividing by zero.
trainable: Whether the norm is trainable or not
scope: Optional scope for variable_scope.
summarize: Whether or not to add a tensorflow summary for the op.
Returns:
The input tensor normalized to the specified target norm.
Raises:
ValueError: If dim is smaller than the number of dimensions in 'inputs'.
ValueError: If target_norm_value is not a float or a list of floats with
length equal to the depth along the dimension to be normalized.
"""
with
tf
.
variable_scope
(
scope
,
'NormalizeToTarget'
,
[
inputs
]):
if
not
inputs
.
get_shape
():
raise
ValueError
(
'The input rank must be known.'
)
input_shape
=
inputs
.
get_shape
().
as_list
()
input_rank
=
len
(
input_shape
)
if
dim
<
0
or
dim
>=
input_rank
:
raise
ValueError
(
'dim must be non-negative but smaller than the input rank.'
)
if
not
input_shape
[
dim
]:
raise
ValueError
(
'input shape should be statically defined along '
'the specified dimension.'
)
depth
=
input_shape
[
dim
]
if
not
(
isinstance
(
target_norm_value
,
float
)
or
(
isinstance
(
target_norm_value
,
list
)
and
len
(
target_norm_value
)
==
depth
)
and
all
([
isinstance
(
val
,
float
)
for
val
in
target_norm_value
])):
raise
ValueError
(
'target_norm_value must be a float or a list of floats '
'with length equal to the depth along the dimension to '
'be normalized.'
)
if
isinstance
(
target_norm_value
,
float
):
initial_norm
=
depth
*
[
target_norm_value
]
else
:
initial_norm
=
target_norm_value
target_norm
=
tf
.
contrib
.
framework
.
model_variable
(
name
=
'weights'
,
dtype
=
tf
.
float32
,
initializer
=
tf
.
constant
(
initial_norm
,
dtype
=
tf
.
float32
),
trainable
=
trainable
)
if
summarize
:
mean
=
tf
.
reduce_mean
(
target_norm
)
mean
=
tf
.
Print
(
mean
,
[
'NormalizeToTarget:'
,
mean
])
tf
.
summary
.
scalar
(
tf
.
get_variable_scope
().
name
,
mean
)
lengths
=
epsilon
+
tf
.
sqrt
(
tf
.
reduce_sum
(
tf
.
square
(
inputs
),
dim
,
True
))
mult_shape
=
input_rank
*
[
1
]
mult_shape
[
dim
]
=
depth
return
tf
.
reshape
(
target_norm
,
mult_shape
)
*
tf
.
truediv
(
inputs
,
lengths
)
def
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
,
extrapolation_value
=
None
):
"""Position-sensitive crop and pool rectangular regions from a feature grid.
The output crops are split into `spatial_bins_y` vertical bins
and `spatial_bins_x` horizontal bins. For each intersection of a vertical
and a horizontal bin the output values are gathered by performing
`tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of
channels of the image. This reduces `depth` by a factor of
`(spatial_bins_y * spatial_bins_x)`.
When global_pool is True, this function implements a differentiable version
of position-sensitive RoI pooling used in
[R-FCN detection system](https://arxiv.org/abs/1605.06409).
When global_pool is False, this function implements a differentiable version
of position-sensitive assembling operation used in
[instance FCN](https://arxiv.org/abs/1603.08678).
Args:
image: A `Tensor`. Must be one of the following types: `uint8`, `int8`,
`int16`, `int32`, `int64`, `half`, `float32`, `float64`.
A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
Both `image_height` and `image_width` need to be positive.
boxes: A `Tensor` of type `float32`.
A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
specifies the coordinates of a box in the `box_ind[i]` image and is
specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
coordinate value of `y` is mapped to the image coordinate at
`y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image
height is mapped to `[0, image_height - 1] in image height coordinates.
We do allow y1 > y2, in which case the sampled crop is an up-down flipped
version of the original image. The width dimension is treated similarly.
Normalized coordinates outside the `[0, 1]` range are allowed, in which
case we use `extrapolation_value` to extrapolate the input image values.
box_ind: A `Tensor` of type `int32`.
A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`.
The value of `box_ind[i]` specifies the image that the `i`-th box refers
to.
crop_size: A list of two integers `[crop_height, crop_width]`. All
cropped image patches are resized to this size. The aspect ratio of the
image content is not preserved. Both `crop_height` and `crop_width` need
to be positive.
num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`.
Represents the number of position-sensitive bins in y and x directions.
Both values should be >= 1. `crop_height` should be divisible by
`spatial_bins_y`, and similarly for width.
The number of image channels should be divisible by
(spatial_bins_y * spatial_bins_x).
Suggested value from R-FCN paper: [3, 3].
global_pool: A boolean variable.
If True, we perform average global pooling on the features assembled from
the position-sensitive score maps.
If False, we keep the position-pooled features without global pooling
over the spatial coordinates.
Note that using global_pool=True is equivalent to but more efficient than
running the function with global_pool=False and then performing global
average pooling.
extrapolation_value: An optional `float`. Defaults to `0`.
Value used for extrapolation, when applicable.
Returns:
position_sensitive_features: A 4-D tensor of shape
`[num_boxes, K, K, crop_channels]`,
where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`,
where K = 1 when global_pool is True (Average-pooled cropped regions),
and K = crop_size when global_pool is False.
Raises:
ValueError: Raised in four situations:
`num_spatial_bins` is not >= 1;
`num_spatial_bins` does not divide `crop_size`;
`(spatial_bins_y*spatial_bins_x)` does not divide `depth`;
`bin_crop_size` is not square when global_pool=False due to the
constraint in function space_to_depth.
"""
total_bins
=
1
bin_crop_size
=
[]
for
(
num_bins
,
crop_dim
)
in
zip
(
num_spatial_bins
,
crop_size
):
if
num_bins
<
1
:
raise
ValueError
(
'num_spatial_bins should be >= 1'
)
if
crop_dim
%
num_bins
!=
0
:
raise
ValueError
(
'crop_size should be divisible by num_spatial_bins'
)
total_bins
*=
num_bins
bin_crop_size
.
append
(
crop_dim
//
num_bins
)
if
not
global_pool
and
bin_crop_size
[
0
]
!=
bin_crop_size
[
1
]:
raise
ValueError
(
'Only support square bin crop size for now.'
)
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
unstack
(
boxes
,
axis
=
1
)
spatial_bins_y
,
spatial_bins_x
=
num_spatial_bins
# Split each box into spatial_bins_y * spatial_bins_x bins.
position_sensitive_boxes
=
[]
for
bin_y
in
range
(
spatial_bins_y
):
step_y
=
(
ymax
-
ymin
)
/
spatial_bins_y
for
bin_x
in
range
(
spatial_bins_x
):
step_x
=
(
xmax
-
xmin
)
/
spatial_bins_x
box_coordinates
=
[
ymin
+
bin_y
*
step_y
,
xmin
+
bin_x
*
step_x
,
ymin
+
(
bin_y
+
1
)
*
step_y
,
xmin
+
(
bin_x
+
1
)
*
step_x
,
]
position_sensitive_boxes
.
append
(
tf
.
stack
(
box_coordinates
,
axis
=
1
))
image_splits
=
tf
.
split
(
value
=
image
,
num_or_size_splits
=
total_bins
,
axis
=
3
)
image_crops
=
[]
for
(
split
,
box
)
in
zip
(
image_splits
,
position_sensitive_boxes
):
crop
=
tf
.
image
.
crop_and_resize
(
split
,
box
,
box_ind
,
bin_crop_size
,
extrapolation_value
=
extrapolation_value
)
image_crops
.
append
(
crop
)
if
global_pool
:
# Average over all bins.
position_sensitive_features
=
tf
.
add_n
(
image_crops
)
/
len
(
image_crops
)
# Then average over spatial positions within the bins.
position_sensitive_features
=
tf
.
reduce_mean
(
position_sensitive_features
,
[
1
,
2
],
keep_dims
=
True
)
else
:
# Reorder height/width to depth channel.
block_size
=
bin_crop_size
[
0
]
if
block_size
>=
2
:
image_crops
=
[
tf
.
space_to_depth
(
crop
,
block_size
=
block_size
)
for
crop
in
image_crops
]
# Pack image_crops so that first dimension is for position-senstive boxes.
position_sensitive_features
=
tf
.
stack
(
image_crops
,
axis
=
0
)
# Unroll the position-sensitive boxes to spatial positions.
position_sensitive_features
=
tf
.
squeeze
(
tf
.
batch_to_space_nd
(
position_sensitive_features
,
block_shape
=
[
1
]
+
num_spatial_bins
,
crops
=
tf
.
zeros
((
3
,
2
),
dtype
=
tf
.
int32
)),
squeeze_dims
=
[
0
])
# Reorder back the depth channel.
if
block_size
>=
2
:
position_sensitive_features
=
tf
.
depth_to_space
(
position_sensitive_features
,
block_size
=
block_size
)
return
position_sensitive_features
def
reframe_box_masks_to_image_masks
(
box_masks
,
boxes
,
image_height
,
image_width
):
"""Transforms the box masks back to full image masks.
Embeds masks in bounding boxes of larger masks whose shapes correspond to
image shape.
Args:
box_masks: A tf.float32 tensor of size [num_masks, mask_height, mask_width].
boxes: A tf.float32 tensor of size [num_masks, 4] containing the box
corners. Row i contains [ymin, xmin, ymax, xmax] of the box
corresponding to mask i. Note that the box corners are in
normalized coordinates.
image_height: Image height. The output mask will have the same height as
the image height.
image_width: Image width. The output mask will have the same width as the
image width.
Returns:
A tf.float32 tensor of size [num_masks, image_height, image_width].
"""
# TODO: Make this a public function.
def
transform_boxes_relative_to_boxes
(
boxes
,
reference_boxes
):
boxes
=
tf
.
reshape
(
boxes
,
[
-
1
,
2
,
2
])
min_corner
=
tf
.
expand_dims
(
reference_boxes
[:,
0
:
2
],
1
)
max_corner
=
tf
.
expand_dims
(
reference_boxes
[:,
2
:
4
],
1
)
transformed_boxes
=
(
boxes
-
min_corner
)
/
(
max_corner
-
min_corner
)
return
tf
.
reshape
(
transformed_boxes
,
[
-
1
,
4
])
box_masks
=
tf
.
expand_dims
(
box_masks
,
axis
=
3
)
num_boxes
=
tf
.
shape
(
box_masks
)[
0
]
unit_boxes
=
tf
.
concat
(
[
tf
.
zeros
([
num_boxes
,
2
]),
tf
.
ones
([
num_boxes
,
2
])],
axis
=
1
)
reverse_boxes
=
transform_boxes_relative_to_boxes
(
unit_boxes
,
boxes
)
image_masks
=
tf
.
image
.
crop_and_resize
(
image
=
box_masks
,
boxes
=
reverse_boxes
,
box_ind
=
tf
.
range
(
num_boxes
),
crop_size
=
[
image_height
,
image_width
],
extrapolation_value
=
0.0
)
return
tf
.
squeeze
(
image_masks
,
axis
=
3
)
object_detection/utils/ops_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.ops."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
ops
class
NormalizedToImageCoordinatesTest
(
tf
.
test
.
TestCase
):
def
test_normalized_to_image_coordinates
(
self
):
normalized_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
1
,
4
))
normalized_boxes_np
=
np
.
array
([[[
0.0
,
0.0
,
1.0
,
1.0
]],
[[
0.5
,
0.5
,
1.0
,
1.0
]]])
image_shape
=
tf
.
convert_to_tensor
([
1
,
4
,
4
,
3
],
dtype
=
tf
.
int32
)
absolute_boxes
=
ops
.
normalized_to_image_coordinates
(
normalized_boxes
,
image_shape
,
parallel_iterations
=
2
)
expected_boxes
=
np
.
array
([[[
0
,
0
,
4
,
4
]],
[[
2
,
2
,
4
,
4
]]])
with
self
.
test_session
()
as
sess
:
absolute_boxes
=
sess
.
run
(
absolute_boxes
,
feed_dict
=
{
normalized_boxes
:
normalized_boxes_np
})
self
.
assertAllEqual
(
absolute_boxes
,
expected_boxes
)
class
MeshgridTest
(
tf
.
test
.
TestCase
):
def
test_meshgrid_numpy_comparison
(
self
):
"""Tests meshgrid op with vectors, for which it should match numpy."""
x
=
np
.
arange
(
4
)
y
=
np
.
arange
(
6
)
exp_xgrid
,
exp_ygrid
=
np
.
meshgrid
(
x
,
y
)
xgrid
,
ygrid
=
ops
.
meshgrid
(
x
,
y
)
with
self
.
test_session
()
as
sess
:
xgrid_output
,
ygrid_output
=
sess
.
run
([
xgrid
,
ygrid
])
self
.
assertAllEqual
(
xgrid_output
,
exp_xgrid
)
self
.
assertAllEqual
(
ygrid_output
,
exp_ygrid
)
def
test_meshgrid_multidimensional
(
self
):
np
.
random
.
seed
(
18
)
x
=
np
.
random
.
rand
(
4
,
1
,
2
).
astype
(
np
.
float32
)
y
=
np
.
random
.
rand
(
2
,
3
).
astype
(
np
.
float32
)
xgrid
,
ygrid
=
ops
.
meshgrid
(
x
,
y
)
grid_shape
=
list
(
y
.
shape
)
+
list
(
x
.
shape
)
self
.
assertEqual
(
xgrid
.
get_shape
().
as_list
(),
grid_shape
)
self
.
assertEqual
(
ygrid
.
get_shape
().
as_list
(),
grid_shape
)
with
self
.
test_session
()
as
sess
:
xgrid_output
,
ygrid_output
=
sess
.
run
([
xgrid
,
ygrid
])
# Check the shape of the output grids
self
.
assertEqual
(
xgrid_output
.
shape
,
tuple
(
grid_shape
))
self
.
assertEqual
(
ygrid_output
.
shape
,
tuple
(
grid_shape
))
# Check a few elements
test_elements
=
[((
3
,
0
,
0
),
(
1
,
2
)),
((
2
,
0
,
1
),
(
0
,
0
)),
((
0
,
0
,
0
),
(
1
,
1
))]
for
xind
,
yind
in
test_elements
:
# These are float equality tests, but the meshgrid op should not introduce
# rounding.
self
.
assertEqual
(
xgrid_output
[
yind
+
xind
],
x
[
xind
])
self
.
assertEqual
(
ygrid_output
[
yind
+
xind
],
y
[
yind
])
class
OpsTestPadToMultiple
(
tf
.
test
.
TestCase
):
def
test_zero_padding
(
self
):
tensor
=
tf
.
constant
([[[[
0.
],
[
0.
]],
[[
0.
],
[
0.
]]]])
padded_tensor
=
ops
.
pad_to_multiple
(
tensor
,
1
)
with
self
.
test_session
()
as
sess
:
padded_tensor_out
=
sess
.
run
(
padded_tensor
)
self
.
assertEqual
((
1
,
2
,
2
,
1
),
padded_tensor_out
.
shape
)
def
test_no_padding
(
self
):
tensor
=
tf
.
constant
([[[[
0.
],
[
0.
]],
[[
0.
],
[
0.
]]]])
padded_tensor
=
ops
.
pad_to_multiple
(
tensor
,
2
)
with
self
.
test_session
()
as
sess
:
padded_tensor_out
=
sess
.
run
(
padded_tensor
)
self
.
assertEqual
((
1
,
2
,
2
,
1
),
padded_tensor_out
.
shape
)
def
test_padding
(
self
):
tensor
=
tf
.
constant
([[[[
0.
],
[
0.
]],
[[
0.
],
[
0.
]]]])
padded_tensor
=
ops
.
pad_to_multiple
(
tensor
,
4
)
with
self
.
test_session
()
as
sess
:
padded_tensor_out
=
sess
.
run
(
padded_tensor
)
self
.
assertEqual
((
1
,
4
,
4
,
1
),
padded_tensor_out
.
shape
)
class
OpsTestPaddedOneHotEncoding
(
tf
.
test
.
TestCase
):
def
test_correct_one_hot_tensor_with_no_pad
(
self
):
indices
=
tf
.
constant
([
1
,
2
,
3
,
5
])
one_hot_tensor
=
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
6
,
left_pad
=
0
)
expected_tensor
=
np
.
array
([[
0
,
1
,
0
,
0
,
0
,
0
],
[
0
,
0
,
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
]],
np
.
float32
)
with
self
.
test_session
()
as
sess
:
out_one_hot_tensor
=
sess
.
run
(
one_hot_tensor
)
self
.
assertAllClose
(
out_one_hot_tensor
,
expected_tensor
,
rtol
=
1e-10
,
atol
=
1e-10
)
def
test_correct_one_hot_tensor_with_pad_one
(
self
):
indices
=
tf
.
constant
([
1
,
2
,
3
,
5
])
one_hot_tensor
=
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
6
,
left_pad
=
1
)
expected_tensor
=
np
.
array
([[
0
,
0
,
1
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
1
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
,
1
]],
np
.
float32
)
with
self
.
test_session
()
as
sess
:
out_one_hot_tensor
=
sess
.
run
(
one_hot_tensor
)
self
.
assertAllClose
(
out_one_hot_tensor
,
expected_tensor
,
rtol
=
1e-10
,
atol
=
1e-10
)
def
test_correct_one_hot_tensor_with_pad_three
(
self
):
indices
=
tf
.
constant
([
1
,
2
,
3
,
5
])
one_hot_tensor
=
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
6
,
left_pad
=
3
)
expected_tensor
=
np
.
array
([[
0
,
0
,
0
,
0
,
1
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
,
1
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
]],
np
.
float32
)
with
self
.
test_session
()
as
sess
:
out_one_hot_tensor
=
sess
.
run
(
one_hot_tensor
)
self
.
assertAllClose
(
out_one_hot_tensor
,
expected_tensor
,
rtol
=
1e-10
,
atol
=
1e-10
)
def
test_correct_padded_one_hot_tensor_with_empty_indices
(
self
):
depth
=
6
pad
=
2
indices
=
tf
.
constant
([])
one_hot_tensor
=
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
depth
,
left_pad
=
pad
)
expected_tensor
=
np
.
zeros
((
0
,
depth
+
pad
))
with
self
.
test_session
()
as
sess
:
out_one_hot_tensor
=
sess
.
run
(
one_hot_tensor
)
self
.
assertAllClose
(
out_one_hot_tensor
,
expected_tensor
,
rtol
=
1e-10
,
atol
=
1e-10
)
def
test_return_none_on_zero_depth
(
self
):
indices
=
tf
.
constant
([
1
,
2
,
3
,
4
,
5
])
one_hot_tensor
=
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
0
,
left_pad
=
2
)
self
.
assertEqual
(
one_hot_tensor
,
None
)
def
test_raise_value_error_on_rank_two_input
(
self
):
indices
=
tf
.
constant
(
1.0
,
shape
=
(
2
,
3
))
with
self
.
assertRaises
(
ValueError
):
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
6
,
left_pad
=
2
)
def
test_raise_value_error_on_negative_pad
(
self
):
indices
=
tf
.
constant
(
1.0
,
shape
=
(
2
,
3
))
with
self
.
assertRaises
(
ValueError
):
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
6
,
left_pad
=-
1
)
def
test_raise_value_error_on_float_pad
(
self
):
indices
=
tf
.
constant
(
1.0
,
shape
=
(
2
,
3
))
with
self
.
assertRaises
(
ValueError
):
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
6
,
left_pad
=
0.1
)
def
test_raise_value_error_on_float_depth
(
self
):
indices
=
tf
.
constant
(
1.0
,
shape
=
(
2
,
3
))
with
self
.
assertRaises
(
ValueError
):
ops
.
padded_one_hot_encoding
(
indices
,
depth
=
0.1
,
left_pad
=
2
)
class
OpsDenseToSparseBoxesTest
(
tf
.
test
.
TestCase
):
def
test_return_all_boxes_when_all_input_boxes_are_valid
(
self
):
num_classes
=
4
num_valid_boxes
=
3
code_size
=
4
dense_location_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
num_valid_boxes
,
code_size
))
dense_num_boxes_placeholder
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
num_classes
))
box_locations
,
box_classes
=
ops
.
dense_to_sparse_boxes
(
dense_location_placeholder
,
dense_num_boxes_placeholder
,
num_classes
)
feed_dict
=
{
dense_location_placeholder
:
np
.
random
.
uniform
(
size
=
[
num_valid_boxes
,
code_size
]),
dense_num_boxes_placeholder
:
np
.
array
([
1
,
0
,
0
,
2
],
dtype
=
np
.
int32
)}
expected_box_locations
=
feed_dict
[
dense_location_placeholder
]
expected_box_classses
=
np
.
array
([
0
,
3
,
3
])
with
self
.
test_session
()
as
sess
:
box_locations
,
box_classes
=
sess
.
run
([
box_locations
,
box_classes
],
feed_dict
=
feed_dict
)
self
.
assertAllClose
(
box_locations
,
expected_box_locations
,
rtol
=
1e-6
,
atol
=
1e-6
)
self
.
assertAllEqual
(
box_classes
,
expected_box_classses
)
def
test_return_only_valid_boxes_when_input_contains_invalid_boxes
(
self
):
num_classes
=
4
num_valid_boxes
=
3
num_boxes
=
10
code_size
=
4
dense_location_placeholder
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
num_boxes
,
code_size
))
dense_num_boxes_placeholder
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
num_classes
))
box_locations
,
box_classes
=
ops
.
dense_to_sparse_boxes
(
dense_location_placeholder
,
dense_num_boxes_placeholder
,
num_classes
)
feed_dict
=
{
dense_location_placeholder
:
np
.
random
.
uniform
(
size
=
[
num_boxes
,
code_size
]),
dense_num_boxes_placeholder
:
np
.
array
([
1
,
0
,
0
,
2
],
dtype
=
np
.
int32
)}
expected_box_locations
=
(
feed_dict
[
dense_location_placeholder
]
[:
num_valid_boxes
])
expected_box_classses
=
np
.
array
([
0
,
3
,
3
])
with
self
.
test_session
()
as
sess
:
box_locations
,
box_classes
=
sess
.
run
([
box_locations
,
box_classes
],
feed_dict
=
feed_dict
)
self
.
assertAllClose
(
box_locations
,
expected_box_locations
,
rtol
=
1e-6
,
atol
=
1e-6
)
self
.
assertAllEqual
(
box_classes
,
expected_box_classses
)
class
OpsTestIndicesToDenseVector
(
tf
.
test
.
TestCase
):
def
test_indices_to_dense_vector
(
self
):
size
=
10000
num_indices
=
np
.
random
.
randint
(
size
)
rand_indices
=
np
.
random
.
permutation
(
np
.
arange
(
size
))[
0
:
num_indices
]
expected_output
=
np
.
zeros
(
size
,
dtype
=
np
.
float32
)
expected_output
[
rand_indices
]
=
1.
tf_rand_indices
=
tf
.
constant
(
rand_indices
)
indicator
=
ops
.
indices_to_dense_vector
(
tf_rand_indices
,
size
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
indicator
)
self
.
assertAllEqual
(
output
,
expected_output
)
self
.
assertEqual
(
output
.
dtype
,
expected_output
.
dtype
)
def
test_indices_to_dense_vector_size_at_inference
(
self
):
size
=
5000
num_indices
=
250
all_indices
=
np
.
arange
(
size
)
rand_indices
=
np
.
random
.
permutation
(
all_indices
)[
0
:
num_indices
]
expected_output
=
np
.
zeros
(
size
,
dtype
=
np
.
float32
)
expected_output
[
rand_indices
]
=
1.
tf_all_indices
=
tf
.
placeholder
(
tf
.
int32
)
tf_rand_indices
=
tf
.
constant
(
rand_indices
)
indicator
=
ops
.
indices_to_dense_vector
(
tf_rand_indices
,
tf
.
shape
(
tf_all_indices
)[
0
])
feed_dict
=
{
tf_all_indices
:
all_indices
}
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
indicator
,
feed_dict
=
feed_dict
)
self
.
assertAllEqual
(
output
,
expected_output
)
self
.
assertEqual
(
output
.
dtype
,
expected_output
.
dtype
)
def
test_indices_to_dense_vector_int
(
self
):
size
=
500
num_indices
=
25
rand_indices
=
np
.
random
.
permutation
(
np
.
arange
(
size
))[
0
:
num_indices
]
expected_output
=
np
.
zeros
(
size
,
dtype
=
np
.
int64
)
expected_output
[
rand_indices
]
=
1
tf_rand_indices
=
tf
.
constant
(
rand_indices
)
indicator
=
ops
.
indices_to_dense_vector
(
tf_rand_indices
,
size
,
1
,
dtype
=
tf
.
int64
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
indicator
)
self
.
assertAllEqual
(
output
,
expected_output
)
self
.
assertEqual
(
output
.
dtype
,
expected_output
.
dtype
)
def
test_indices_to_dense_vector_custom_values
(
self
):
size
=
100
num_indices
=
10
rand_indices
=
np
.
random
.
permutation
(
np
.
arange
(
size
))[
0
:
num_indices
]
indices_value
=
np
.
random
.
rand
(
1
)
default_value
=
np
.
random
.
rand
(
1
)
expected_output
=
np
.
float32
(
np
.
ones
(
size
)
*
default_value
)
expected_output
[
rand_indices
]
=
indices_value
tf_rand_indices
=
tf
.
constant
(
rand_indices
)
indicator
=
ops
.
indices_to_dense_vector
(
tf_rand_indices
,
size
,
indices_value
=
indices_value
,
default_value
=
default_value
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
indicator
)
self
.
assertAllClose
(
output
,
expected_output
)
self
.
assertEqual
(
output
.
dtype
,
expected_output
.
dtype
)
def
test_indices_to_dense_vector_all_indices_as_input
(
self
):
size
=
500
num_indices
=
500
rand_indices
=
np
.
random
.
permutation
(
np
.
arange
(
size
))[
0
:
num_indices
]
expected_output
=
np
.
ones
(
size
,
dtype
=
np
.
float32
)
tf_rand_indices
=
tf
.
constant
(
rand_indices
)
indicator
=
ops
.
indices_to_dense_vector
(
tf_rand_indices
,
size
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
indicator
)
self
.
assertAllEqual
(
output
,
expected_output
)
self
.
assertEqual
(
output
.
dtype
,
expected_output
.
dtype
)
def
test_indices_to_dense_vector_empty_indices_as_input
(
self
):
size
=
500
rand_indices
=
[]
expected_output
=
np
.
zeros
(
size
,
dtype
=
np
.
float32
)
tf_rand_indices
=
tf
.
constant
(
rand_indices
)
indicator
=
ops
.
indices_to_dense_vector
(
tf_rand_indices
,
size
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
indicator
)
self
.
assertAllEqual
(
output
,
expected_output
)
self
.
assertEqual
(
output
.
dtype
,
expected_output
.
dtype
)
class
GroundtruthFilterTest
(
tf
.
test
.
TestCase
):
def
test_filter_groundtruth
(
self
):
input_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
input_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
input_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_is_crowd
=
tf
.
placeholder
(
tf
.
bool
,
shape
=
(
None
,))
input_area
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,))
input_difficult
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,))
input_label_types
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
None
,))
valid_indices
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_tensors
=
{
fields
.
InputDataFields
.
image
:
input_image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
input_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
input_classes
,
fields
.
InputDataFields
.
groundtruth_is_crowd
:
input_is_crowd
,
fields
.
InputDataFields
.
groundtruth_area
:
input_area
,
fields
.
InputDataFields
.
groundtruth_difficult
:
input_difficult
,
fields
.
InputDataFields
.
groundtruth_label_types
:
input_label_types
}
output_tensors
=
ops
.
retain_groundtruth
(
input_tensors
,
valid_indices
)
image_tensor
=
np
.
random
.
rand
(
224
,
224
,
3
)
feed_dict
=
{
input_image
:
image_tensor
,
input_boxes
:
np
.
array
([[
0.2
,
0.4
,
0.1
,
0.8
],
[
0.2
,
0.4
,
1.0
,
0.8
]],
dtype
=
np
.
float
),
input_classes
:
np
.
array
([
1
,
2
],
dtype
=
np
.
int32
),
input_is_crowd
:
np
.
array
([
False
,
True
],
dtype
=
np
.
bool
),
input_area
:
np
.
array
([
32
,
48
],
dtype
=
np
.
float32
),
input_difficult
:
np
.
array
([
True
,
False
],
dtype
=
np
.
bool
),
input_label_types
:
np
.
array
([
'APPROPRIATE'
,
'INCORRECT'
],
dtype
=
np
.
string_
),
valid_indices
:
np
.
array
([
0
],
dtype
=
np
.
int32
)
}
expected_tensors
=
{
fields
.
InputDataFields
.
image
:
image_tensor
,
fields
.
InputDataFields
.
groundtruth_boxes
:
[[
0.2
,
0.4
,
0.1
,
0.8
]],
fields
.
InputDataFields
.
groundtruth_classes
:
[
1
],
fields
.
InputDataFields
.
groundtruth_is_crowd
:
[
False
],
fields
.
InputDataFields
.
groundtruth_area
:
[
32
],
fields
.
InputDataFields
.
groundtruth_difficult
:
[
True
],
fields
.
InputDataFields
.
groundtruth_label_types
:
[
'APPROPRIATE'
]
}
with
self
.
test_session
()
as
sess
:
output_tensors
=
sess
.
run
(
output_tensors
,
feed_dict
=
feed_dict
)
for
key
in
[
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_area
]:
self
.
assertAllClose
(
expected_tensors
[
key
],
output_tensors
[
key
])
for
key
in
[
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_is_crowd
,
fields
.
InputDataFields
.
groundtruth_label_types
]:
self
.
assertAllEqual
(
expected_tensors
[
key
],
output_tensors
[
key
])
def
test_filter_with_missing_fields
(
self
):
input_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
input_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_tensors
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
input_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
input_classes
}
valid_indices
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
feed_dict
=
{
input_boxes
:
np
.
array
([[
0.2
,
0.4
,
0.1
,
0.8
],
[
0.2
,
0.4
,
1.0
,
0.8
]],
dtype
=
np
.
float
),
input_classes
:
np
.
array
([
1
,
2
],
dtype
=
np
.
int32
),
valid_indices
:
np
.
array
([
0
],
dtype
=
np
.
int32
)
}
expected_tensors
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
[[
0.2
,
0.4
,
0.1
,
0.8
]],
fields
.
InputDataFields
.
groundtruth_classes
:
[
1
]
}
output_tensors
=
ops
.
retain_groundtruth
(
input_tensors
,
valid_indices
)
with
self
.
test_session
()
as
sess
:
output_tensors
=
sess
.
run
(
output_tensors
,
feed_dict
=
feed_dict
)
for
key
in
[
fields
.
InputDataFields
.
groundtruth_boxes
]:
self
.
assertAllClose
(
expected_tensors
[
key
],
output_tensors
[
key
])
for
key
in
[
fields
.
InputDataFields
.
groundtruth_classes
]:
self
.
assertAllEqual
(
expected_tensors
[
key
],
output_tensors
[
key
])
def
test_filter_with_empty_fields
(
self
):
input_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
input_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_is_crowd
=
tf
.
placeholder
(
tf
.
bool
,
shape
=
(
None
,))
input_area
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,))
input_difficult
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,))
valid_indices
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_tensors
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
input_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
input_classes
,
fields
.
InputDataFields
.
groundtruth_is_crowd
:
input_is_crowd
,
fields
.
InputDataFields
.
groundtruth_area
:
input_area
,
fields
.
InputDataFields
.
groundtruth_difficult
:
input_difficult
}
output_tensors
=
ops
.
retain_groundtruth
(
input_tensors
,
valid_indices
)
feed_dict
=
{
input_boxes
:
np
.
array
([[
0.2
,
0.4
,
0.1
,
0.8
],
[
0.2
,
0.4
,
1.0
,
0.8
]],
dtype
=
np
.
float
),
input_classes
:
np
.
array
([
1
,
2
],
dtype
=
np
.
int32
),
input_is_crowd
:
np
.
array
([
False
,
True
],
dtype
=
np
.
bool
),
input_area
:
np
.
array
([],
dtype
=
np
.
float32
),
input_difficult
:
np
.
array
([],
dtype
=
np
.
float32
),
valid_indices
:
np
.
array
([
0
],
dtype
=
np
.
int32
)
}
expected_tensors
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
[[
0.2
,
0.4
,
0.1
,
0.8
]],
fields
.
InputDataFields
.
groundtruth_classes
:
[
1
],
fields
.
InputDataFields
.
groundtruth_is_crowd
:
[
False
],
fields
.
InputDataFields
.
groundtruth_area
:
[],
fields
.
InputDataFields
.
groundtruth_difficult
:
[]
}
with
self
.
test_session
()
as
sess
:
output_tensors
=
sess
.
run
(
output_tensors
,
feed_dict
=
feed_dict
)
for
key
in
[
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_area
]:
self
.
assertAllClose
(
expected_tensors
[
key
],
output_tensors
[
key
])
for
key
in
[
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_is_crowd
]:
self
.
assertAllEqual
(
expected_tensors
[
key
],
output_tensors
[
key
])
def
test_filter_with_empty_groundtruth_boxes
(
self
):
input_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
input_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_is_crowd
=
tf
.
placeholder
(
tf
.
bool
,
shape
=
(
None
,))
input_area
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,))
input_difficult
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,))
valid_indices
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_tensors
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
input_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
input_classes
,
fields
.
InputDataFields
.
groundtruth_is_crowd
:
input_is_crowd
,
fields
.
InputDataFields
.
groundtruth_area
:
input_area
,
fields
.
InputDataFields
.
groundtruth_difficult
:
input_difficult
}
output_tensors
=
ops
.
retain_groundtruth
(
input_tensors
,
valid_indices
)
feed_dict
=
{
input_boxes
:
np
.
array
([],
dtype
=
np
.
float
).
reshape
(
0
,
4
),
input_classes
:
np
.
array
([],
dtype
=
np
.
int32
),
input_is_crowd
:
np
.
array
([],
dtype
=
np
.
bool
),
input_area
:
np
.
array
([],
dtype
=
np
.
float32
),
input_difficult
:
np
.
array
([],
dtype
=
np
.
float32
),
valid_indices
:
np
.
array
([],
dtype
=
np
.
int32
)
}
with
self
.
test_session
()
as
sess
:
output_tensors
=
sess
.
run
(
output_tensors
,
feed_dict
=
feed_dict
)
for
key
in
input_tensors
:
if
key
==
fields
.
InputDataFields
.
groundtruth_boxes
:
self
.
assertAllEqual
([
0
,
4
],
output_tensors
[
key
].
shape
)
else
:
self
.
assertAllEqual
([
0
],
output_tensors
[
key
].
shape
)
class
RetainGroundTruthWithPositiveClasses
(
tf
.
test
.
TestCase
):
def
test_filter_groundtruth_with_positive_classes
(
self
):
input_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
input_boxes
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
4
))
input_classes
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_is_crowd
=
tf
.
placeholder
(
tf
.
bool
,
shape
=
(
None
,))
input_area
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,))
input_difficult
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,))
input_label_types
=
tf
.
placeholder
(
tf
.
string
,
shape
=
(
None
,))
valid_indices
=
tf
.
placeholder
(
tf
.
int32
,
shape
=
(
None
,))
input_tensors
=
{
fields
.
InputDataFields
.
image
:
input_image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
input_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
input_classes
,
fields
.
InputDataFields
.
groundtruth_is_crowd
:
input_is_crowd
,
fields
.
InputDataFields
.
groundtruth_area
:
input_area
,
fields
.
InputDataFields
.
groundtruth_difficult
:
input_difficult
,
fields
.
InputDataFields
.
groundtruth_label_types
:
input_label_types
}
output_tensors
=
ops
.
retain_groundtruth_with_positive_classes
(
input_tensors
)
image_tensor
=
np
.
random
.
rand
(
224
,
224
,
3
)
feed_dict
=
{
input_image
:
image_tensor
,
input_boxes
:
np
.
array
([[
0.2
,
0.4
,
0.1
,
0.8
],
[
0.2
,
0.4
,
1.0
,
0.8
]],
dtype
=
np
.
float
),
input_classes
:
np
.
array
([
1
,
0
],
dtype
=
np
.
int32
),
input_is_crowd
:
np
.
array
([
False
,
True
],
dtype
=
np
.
bool
),
input_area
:
np
.
array
([
32
,
48
],
dtype
=
np
.
float32
),
input_difficult
:
np
.
array
([
True
,
False
],
dtype
=
np
.
bool
),
input_label_types
:
np
.
array
([
'APPROPRIATE'
,
'INCORRECT'
],
dtype
=
np
.
string_
),
valid_indices
:
np
.
array
([
0
],
dtype
=
np
.
int32
)
}
expected_tensors
=
{
fields
.
InputDataFields
.
image
:
image_tensor
,
fields
.
InputDataFields
.
groundtruth_boxes
:
[[
0.2
,
0.4
,
0.1
,
0.8
]],
fields
.
InputDataFields
.
groundtruth_classes
:
[
1
],
fields
.
InputDataFields
.
groundtruth_is_crowd
:
[
False
],
fields
.
InputDataFields
.
groundtruth_area
:
[
32
],
fields
.
InputDataFields
.
groundtruth_difficult
:
[
True
],
fields
.
InputDataFields
.
groundtruth_label_types
:
[
'APPROPRIATE'
]
}
with
self
.
test_session
()
as
sess
:
output_tensors
=
sess
.
run
(
output_tensors
,
feed_dict
=
feed_dict
)
for
key
in
[
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_area
]:
self
.
assertAllClose
(
expected_tensors
[
key
],
output_tensors
[
key
])
for
key
in
[
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_is_crowd
,
fields
.
InputDataFields
.
groundtruth_label_types
]:
self
.
assertAllEqual
(
expected_tensors
[
key
],
output_tensors
[
key
])
class
GroundtruthFilterWithNanBoxTest
(
tf
.
test
.
TestCase
):
def
test_filter_groundtruth_with_nan_box_coordinates
(
self
):
input_tensors
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
[[
np
.
nan
,
np
.
nan
,
np
.
nan
,
np
.
nan
],
[
0.2
,
0.4
,
0.1
,
0.8
]],
fields
.
InputDataFields
.
groundtruth_classes
:
[
1
,
2
],
fields
.
InputDataFields
.
groundtruth_is_crowd
:
[
False
,
True
],
fields
.
InputDataFields
.
groundtruth_area
:
[
100.0
,
238.7
]
}
expected_tensors
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
[[
0.2
,
0.4
,
0.1
,
0.8
]],
fields
.
InputDataFields
.
groundtruth_classes
:
[
2
],
fields
.
InputDataFields
.
groundtruth_is_crowd
:
[
True
],
fields
.
InputDataFields
.
groundtruth_area
:
[
238.7
]
}
output_tensors
=
ops
.
filter_groundtruth_with_nan_box_coordinates
(
input_tensors
)
with
self
.
test_session
()
as
sess
:
output_tensors
=
sess
.
run
(
output_tensors
)
for
key
in
[
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_area
]:
self
.
assertAllClose
(
expected_tensors
[
key
],
output_tensors
[
key
])
for
key
in
[
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_is_crowd
]:
self
.
assertAllEqual
(
expected_tensors
[
key
],
output_tensors
[
key
])
class
OpsTestNormalizeToTarget
(
tf
.
test
.
TestCase
):
def
test_create_normalize_to_target
(
self
):
inputs
=
tf
.
random_uniform
([
5
,
10
,
12
,
3
])
target_norm_value
=
4.0
dim
=
3
with
self
.
test_session
():
output
=
ops
.
normalize_to_target
(
inputs
,
target_norm_value
,
dim
)
self
.
assertEqual
(
output
.
op
.
name
,
'NormalizeToTarget/mul'
)
var_name
=
tf
.
contrib
.
framework
.
get_variables
()[
0
].
name
self
.
assertEqual
(
var_name
,
'NormalizeToTarget/weights:0'
)
def
test_invalid_dim
(
self
):
inputs
=
tf
.
random_uniform
([
5
,
10
,
12
,
3
])
target_norm_value
=
4.0
dim
=
10
with
self
.
assertRaisesRegexp
(
ValueError
,
'dim must be non-negative but smaller than the input rank.'
):
ops
.
normalize_to_target
(
inputs
,
target_norm_value
,
dim
)
def
test_invalid_target_norm_values
(
self
):
inputs
=
tf
.
random_uniform
([
5
,
10
,
12
,
3
])
target_norm_value
=
[
4.0
,
4.0
]
dim
=
3
with
self
.
assertRaisesRegexp
(
ValueError
,
'target_norm_value must be a float or a list of floats'
):
ops
.
normalize_to_target
(
inputs
,
target_norm_value
,
dim
)
def
test_correct_output_shape
(
self
):
inputs
=
tf
.
random_uniform
([
5
,
10
,
12
,
3
])
target_norm_value
=
4.0
dim
=
3
with
self
.
test_session
():
output
=
ops
.
normalize_to_target
(
inputs
,
target_norm_value
,
dim
)
self
.
assertEqual
(
output
.
get_shape
().
as_list
(),
inputs
.
get_shape
().
as_list
())
def
test_correct_initial_output_values
(
self
):
inputs
=
tf
.
constant
([[[[
3
,
4
],
[
7
,
24
]],
[[
5
,
-
12
],
[
-
1
,
0
]]]],
tf
.
float32
)
target_norm_value
=
10.0
dim
=
3
expected_output
=
[[[[
30
/
5.0
,
40
/
5.0
],
[
70
/
25.0
,
240
/
25.0
]],
[[
50
/
13.0
,
-
120
/
13.0
],
[
-
10
,
0
]]]]
with
self
.
test_session
()
as
sess
:
normalized_inputs
=
ops
.
normalize_to_target
(
inputs
,
target_norm_value
,
dim
)
sess
.
run
(
tf
.
global_variables_initializer
())
output
=
normalized_inputs
.
eval
()
self
.
assertAllClose
(
output
,
expected_output
)
def
test_multiple_target_norm_values
(
self
):
inputs
=
tf
.
constant
([[[[
3
,
4
],
[
7
,
24
]],
[[
5
,
-
12
],
[
-
1
,
0
]]]],
tf
.
float32
)
target_norm_value
=
[
10.0
,
20.0
]
dim
=
3
expected_output
=
[[[[
30
/
5.0
,
80
/
5.0
],
[
70
/
25.0
,
480
/
25.0
]],
[[
50
/
13.0
,
-
240
/
13.0
],
[
-
10
,
0
]]]]
with
self
.
test_session
()
as
sess
:
normalized_inputs
=
ops
.
normalize_to_target
(
inputs
,
target_norm_value
,
dim
)
sess
.
run
(
tf
.
global_variables_initializer
())
output
=
normalized_inputs
.
eval
()
self
.
assertAllClose
(
output
,
expected_output
)
class
OpsTestPositionSensitiveCropRegions
(
tf
.
test
.
TestCase
):
def
test_position_sensitive
(
self
):
num_spatial_bins
=
[
3
,
2
]
image_shape
=
[
1
,
3
,
2
,
6
]
# First channel is 1's, second channel is 2's, etc.
image
=
tf
.
constant
(
range
(
1
,
3
*
2
+
1
)
*
6
,
dtype
=
tf
.
float32
,
shape
=
image_shape
)
boxes
=
tf
.
random_uniform
((
2
,
4
))
box_ind
=
tf
.
constant
([
0
,
0
],
dtype
=
tf
.
int32
)
# The result for both boxes should be [[1, 2], [3, 4], [5, 6]]
# before averaging.
expected_output
=
np
.
array
([
3.5
,
3.5
]).
reshape
([
2
,
1
,
1
,
1
])
for
crop_size_mult
in
range
(
1
,
3
):
crop_size
=
[
3
*
crop_size_mult
,
2
*
crop_size_mult
]
ps_crop_and_pool
=
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
True
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
ps_crop_and_pool
)
self
.
assertAllClose
(
output
,
expected_output
)
def
test_position_sensitive_with_equal_channels
(
self
):
num_spatial_bins
=
[
2
,
2
]
image_shape
=
[
1
,
3
,
3
,
4
]
crop_size
=
[
2
,
2
]
image
=
tf
.
constant
(
range
(
1
,
3
*
3
+
1
),
dtype
=
tf
.
float32
,
shape
=
[
1
,
3
,
3
,
1
])
tiled_image
=
tf
.
tile
(
image
,
[
1
,
1
,
1
,
image_shape
[
3
]])
boxes
=
tf
.
random_uniform
((
3
,
4
))
box_ind
=
tf
.
constant
([
0
,
0
,
0
],
dtype
=
tf
.
int32
)
# All channels are equal so position-sensitive crop and resize should
# work as the usual crop and resize for just one channel.
crop
=
tf
.
image
.
crop_and_resize
(
image
,
boxes
,
box_ind
,
crop_size
)
crop_and_pool
=
tf
.
reduce_mean
(
crop
,
[
1
,
2
],
keep_dims
=
True
)
ps_crop_and_pool
=
ops
.
position_sensitive_crop_regions
(
tiled_image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
True
)
with
self
.
test_session
()
as
sess
:
expected_output
,
output
=
sess
.
run
((
crop_and_pool
,
ps_crop_and_pool
))
self
.
assertAllClose
(
output
,
expected_output
)
def
test_position_sensitive_with_single_bin
(
self
):
num_spatial_bins
=
[
1
,
1
]
image_shape
=
[
2
,
3
,
3
,
4
]
crop_size
=
[
2
,
2
]
image
=
tf
.
random_uniform
(
image_shape
)
boxes
=
tf
.
random_uniform
((
6
,
4
))
box_ind
=
tf
.
constant
([
0
,
0
,
0
,
1
,
1
,
1
],
dtype
=
tf
.
int32
)
# When a single bin is used, position-sensitive crop and pool should be
# the same as non-position sensitive crop and pool.
crop
=
tf
.
image
.
crop_and_resize
(
image
,
boxes
,
box_ind
,
crop_size
)
crop_and_pool
=
tf
.
reduce_mean
(
crop
,
[
1
,
2
],
keep_dims
=
True
)
ps_crop_and_pool
=
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
True
)
with
self
.
test_session
()
as
sess
:
expected_output
,
output
=
sess
.
run
((
crop_and_pool
,
ps_crop_and_pool
))
self
.
assertAllClose
(
output
,
expected_output
)
def
test_raise_value_error_on_num_bins_less_than_one
(
self
):
num_spatial_bins
=
[
1
,
-
1
]
image_shape
=
[
1
,
1
,
1
,
2
]
crop_size
=
[
2
,
2
]
image
=
tf
.
constant
(
1
,
dtype
=
tf
.
float32
,
shape
=
image_shape
)
boxes
=
tf
.
constant
([[
0
,
0
,
1
,
1
]],
dtype
=
tf
.
float32
)
box_ind
=
tf
.
constant
([
0
],
dtype
=
tf
.
int32
)
with
self
.
assertRaisesRegexp
(
ValueError
,
'num_spatial_bins should be >= 1'
):
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
True
)
def
test_raise_value_error_on_non_divisible_crop_size
(
self
):
num_spatial_bins
=
[
2
,
3
]
image_shape
=
[
1
,
1
,
1
,
6
]
crop_size
=
[
3
,
2
]
image
=
tf
.
constant
(
1
,
dtype
=
tf
.
float32
,
shape
=
image_shape
)
boxes
=
tf
.
constant
([[
0
,
0
,
1
,
1
]],
dtype
=
tf
.
float32
)
box_ind
=
tf
.
constant
([
0
],
dtype
=
tf
.
int32
)
with
self
.
assertRaisesRegexp
(
ValueError
,
'crop_size should be divisible by num_spatial_bins'
):
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
True
)
def
test_raise_value_error_on_non_divisible_num_channels
(
self
):
num_spatial_bins
=
[
2
,
2
]
image_shape
=
[
1
,
1
,
1
,
5
]
crop_size
=
[
2
,
2
]
image
=
tf
.
constant
(
1
,
dtype
=
tf
.
float32
,
shape
=
image_shape
)
boxes
=
tf
.
constant
([[
0
,
0
,
1
,
1
]],
dtype
=
tf
.
float32
)
box_ind
=
tf
.
constant
([
0
],
dtype
=
tf
.
int32
)
with
self
.
assertRaisesRegexp
(
ValueError
,
'Dimension size must be evenly divisible by 4 but is 5'
):
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
True
)
def
test_position_sensitive_with_global_pool_false
(
self
):
num_spatial_bins
=
[
3
,
2
]
image_shape
=
[
1
,
3
,
2
,
6
]
num_boxes
=
2
# First channel is 1's, second channel is 2's, etc.
image
=
tf
.
constant
(
range
(
1
,
3
*
2
+
1
)
*
6
,
dtype
=
tf
.
float32
,
shape
=
image_shape
)
boxes
=
tf
.
random_uniform
((
num_boxes
,
4
))
box_ind
=
tf
.
constant
([
0
,
0
],
dtype
=
tf
.
int32
)
expected_output
=
[]
# Expected output, when crop_size = [3, 2].
expected_output
.
append
(
np
.
expand_dims
(
np
.
tile
(
np
.
array
([[
1
,
2
],
[
3
,
4
],
[
5
,
6
]]),
(
num_boxes
,
1
,
1
)),
axis
=-
1
))
# Expected output, when crop_size = [6, 4].
expected_output
.
append
(
np
.
expand_dims
(
np
.
tile
(
np
.
array
([[
1
,
1
,
2
,
2
],
[
1
,
1
,
2
,
2
],
[
3
,
3
,
4
,
4
],
[
3
,
3
,
4
,
4
],
[
5
,
5
,
6
,
6
],
[
5
,
5
,
6
,
6
]]),
(
num_boxes
,
1
,
1
)),
axis
=-
1
))
for
crop_size_mult
in
range
(
1
,
3
):
crop_size
=
[
3
*
crop_size_mult
,
2
*
crop_size_mult
]
ps_crop
=
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
False
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
ps_crop
)
self
.
assertAllEqual
(
output
,
expected_output
[
crop_size_mult
-
1
])
def
test_position_sensitive_with_global_pool_false_and_known_boxes
(
self
):
num_spatial_bins
=
[
2
,
2
]
image_shape
=
[
2
,
2
,
2
,
4
]
crop_size
=
[
2
,
2
]
image
=
tf
.
constant
(
range
(
1
,
2
*
2
*
4
+
1
)
*
2
,
dtype
=
tf
.
float32
,
shape
=
image_shape
)
# First box contains whole image, and second box contains only first row.
boxes
=
tf
.
constant
(
np
.
array
([[
0.
,
0.
,
1.
,
1.
],
[
0.
,
0.
,
0.5
,
1.
]]),
dtype
=
tf
.
float32
)
box_ind
=
tf
.
constant
([
0
,
1
],
dtype
=
tf
.
int32
)
expected_output
=
[]
# Expected output, when the box containing whole image.
expected_output
.
append
(
np
.
reshape
(
np
.
array
([[
4
,
7
],
[
10
,
13
]]),
(
1
,
2
,
2
,
1
))
)
# Expected output, when the box containing only first row.
expected_output
.
append
(
np
.
reshape
(
np
.
array
([[
3
,
6
],
[
7
,
10
]]),
(
1
,
2
,
2
,
1
))
)
expected_output
=
np
.
concatenate
(
expected_output
,
axis
=
0
)
ps_crop
=
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
False
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
ps_crop
)
self
.
assertAllEqual
(
output
,
expected_output
)
def
test_position_sensitive_with_global_pool_false_and_single_bin
(
self
):
num_spatial_bins
=
[
1
,
1
]
image_shape
=
[
2
,
3
,
3
,
4
]
crop_size
=
[
1
,
1
]
image
=
tf
.
random_uniform
(
image_shape
)
boxes
=
tf
.
random_uniform
((
6
,
4
))
box_ind
=
tf
.
constant
([
0
,
0
,
0
,
1
,
1
,
1
],
dtype
=
tf
.
int32
)
# Since single_bin is used and crop_size = [1, 1] (i.e., no crop resize),
# the outputs are the same whatever the global_pool value is.
ps_crop_and_pool
=
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
True
)
ps_crop
=
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
False
)
with
self
.
test_session
()
as
sess
:
pooled_output
,
unpooled_output
=
sess
.
run
((
ps_crop_and_pool
,
ps_crop
))
self
.
assertAllClose
(
pooled_output
,
unpooled_output
)
def
test_position_sensitive_with_global_pool_false_and_do_global_pool
(
self
):
num_spatial_bins
=
[
3
,
2
]
image_shape
=
[
1
,
3
,
2
,
6
]
num_boxes
=
2
# First channel is 1's, second channel is 2's, etc.
image
=
tf
.
constant
(
range
(
1
,
3
*
2
+
1
)
*
6
,
dtype
=
tf
.
float32
,
shape
=
image_shape
)
boxes
=
tf
.
random_uniform
((
num_boxes
,
4
))
box_ind
=
tf
.
constant
([
0
,
0
],
dtype
=
tf
.
int32
)
expected_output
=
[]
# Expected output, when crop_size = [3, 2].
expected_output
.
append
(
np
.
mean
(
np
.
expand_dims
(
np
.
tile
(
np
.
array
([[
1
,
2
],
[
3
,
4
],
[
5
,
6
]]),
(
num_boxes
,
1
,
1
)),
axis
=-
1
),
axis
=
(
1
,
2
),
keepdims
=
True
))
# Expected output, when crop_size = [6, 4].
expected_output
.
append
(
np
.
mean
(
np
.
expand_dims
(
np
.
tile
(
np
.
array
([[
1
,
1
,
2
,
2
],
[
1
,
1
,
2
,
2
],
[
3
,
3
,
4
,
4
],
[
3
,
3
,
4
,
4
],
[
5
,
5
,
6
,
6
],
[
5
,
5
,
6
,
6
]]),
(
num_boxes
,
1
,
1
)),
axis
=-
1
),
axis
=
(
1
,
2
),
keepdims
=
True
))
for
crop_size_mult
in
range
(
1
,
3
):
crop_size
=
[
3
*
crop_size_mult
,
2
*
crop_size_mult
]
# Perform global_pooling after running the function with
# global_pool=False.
ps_crop
=
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
False
)
ps_crop_and_pool
=
tf
.
reduce_mean
(
ps_crop
,
reduction_indices
=
(
1
,
2
),
keep_dims
=
True
)
with
self
.
test_session
()
as
sess
:
output
=
sess
.
run
(
ps_crop_and_pool
)
self
.
assertAllEqual
(
output
,
expected_output
[
crop_size_mult
-
1
])
def
test_raise_value_error_on_non_square_block_size
(
self
):
num_spatial_bins
=
[
3
,
2
]
image_shape
=
[
1
,
3
,
2
,
6
]
crop_size
=
[
6
,
2
]
image
=
tf
.
constant
(
1
,
dtype
=
tf
.
float32
,
shape
=
image_shape
)
boxes
=
tf
.
constant
([[
0
,
0
,
1
,
1
]],
dtype
=
tf
.
float32
)
box_ind
=
tf
.
constant
([
0
],
dtype
=
tf
.
int32
)
with
self
.
assertRaisesRegexp
(
ValueError
,
'Only support square bin crop size for now.'
):
ops
.
position_sensitive_crop_regions
(
image
,
boxes
,
box_ind
,
crop_size
,
num_spatial_bins
,
global_pool
=
False
)
class
ReframeBoxMasksToImageMasksTest
(
tf
.
test
.
TestCase
):
def
testZeroImageOnEmptyMask
(
self
):
box_masks
=
tf
.
constant
([[[
0
,
0
],
[
0
,
0
]]],
dtype
=
tf
.
float32
)
boxes
=
tf
.
constant
([[
0.0
,
0.0
,
1.0
,
1.0
]],
dtype
=
tf
.
float32
)
image_masks
=
ops
.
reframe_box_masks_to_image_masks
(
box_masks
,
boxes
,
image_height
=
4
,
image_width
=
4
)
np_expected_image_masks
=
np
.
array
([[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]],
dtype
=
np
.
float32
)
with
self
.
test_session
()
as
sess
:
np_image_masks
=
sess
.
run
(
image_masks
)
self
.
assertAllClose
(
np_image_masks
,
np_expected_image_masks
)
def
testMaskIsCenteredInImageWhenBoxIsCentered
(
self
):
box_masks
=
tf
.
constant
([[[
1
,
1
],
[
1
,
1
]]],
dtype
=
tf
.
float32
)
boxes
=
tf
.
constant
([[
0.25
,
0.25
,
0.75
,
0.75
]],
dtype
=
tf
.
float32
)
image_masks
=
ops
.
reframe_box_masks_to_image_masks
(
box_masks
,
boxes
,
image_height
=
4
,
image_width
=
4
)
np_expected_image_masks
=
np
.
array
([[[
0
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
0
,
0
,
0
]]],
dtype
=
np
.
float32
)
with
self
.
test_session
()
as
sess
:
np_image_masks
=
sess
.
run
(
image_masks
)
self
.
assertAllClose
(
np_image_masks
,
np_expected_image_masks
)
def
testMaskOffCenterRemainsOffCenterInImage
(
self
):
box_masks
=
tf
.
constant
([[[
1
,
0
],
[
0
,
1
]]],
dtype
=
tf
.
float32
)
boxes
=
tf
.
constant
([[
0.25
,
0.5
,
0.75
,
1.0
]],
dtype
=
tf
.
float32
)
image_masks
=
ops
.
reframe_box_masks_to_image_masks
(
box_masks
,
boxes
,
image_height
=
4
,
image_width
=
4
)
np_expected_image_masks
=
np
.
array
([[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0.6111111
,
0.16666669
],
[
0
,
0
,
0.3888889
,
0.83333337
],
[
0
,
0
,
0
,
0
]]],
dtype
=
np
.
float32
)
with
self
.
test_session
()
as
sess
:
np_image_masks
=
sess
.
run
(
image_masks
)
self
.
assertAllClose
(
np_image_masks
,
np_expected_image_masks
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/utils/per_image_evaluation.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Evaluate Object Detection result on a single image.
Annotate each detected result as true positives or false positive according to
a predefined IOU ratio. Non Maximum Supression is used by default. Multi class
detection is supported by default.
"""
import
numpy
as
np
from
object_detection.utils
import
np_box_list
from
object_detection.utils
import
np_box_list_ops
class
PerImageEvaluation
(
object
):
"""Evaluate detection result of a single image."""
def
__init__
(
self
,
num_groundtruth_classes
,
matching_iou_threshold
=
0.5
,
nms_iou_threshold
=
0.3
,
nms_max_output_boxes
=
50
):
"""Initialized PerImageEvaluation by evaluation parameters.
Args:
num_groundtruth_classes: Number of ground truth object classes
matching_iou_threshold: A ratio of area intersection to union, which is
the threshold to consider whether a detection is true positive or not
nms_iou_threshold: IOU threshold used in Non Maximum Suppression.
nms_max_output_boxes: Number of maximum output boxes in NMS.
"""
self
.
matching_iou_threshold
=
matching_iou_threshold
self
.
nms_iou_threshold
=
nms_iou_threshold
self
.
nms_max_output_boxes
=
nms_max_output_boxes
self
.
num_groundtruth_classes
=
num_groundtruth_classes
def
compute_object_detection_metrics
(
self
,
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
,
groundtruth_is_difficult_lists
):
"""Compute Object Detection related metrics from a single image.
Args:
detected_boxes: A float numpy array of shape [N, 4], representing N
regions of detected object regions.
Each row is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N, 1], representing
the confidence scores of the detected N object instances.
detected_class_labels: A integer numpy array of shape [N, 1], repreneting
the class labels of the detected N object instances.
groundtruth_boxes: A float numpy array of shape [M, 4], representing M
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
Returns:
scores: A list of C float numpy arrays. Each numpy array is of
shape [K, 1], representing K scores detected with object class
label c
tp_fp_labels: A list of C boolean numpy arrays. Each numpy array
is of shape [K, 1], representing K True/False positive label of
object instances detected with class label c
is_class_correctly_detected_in_image: a numpy integer array of
shape [C, 1], indicating whether the correponding class has a least
one instance being correctly detected in the image
"""
detected_boxes
,
detected_scores
,
detected_class_labels
=
(
self
.
_remove_invalid_boxes
(
detected_boxes
,
detected_scores
,
detected_class_labels
))
scores
,
tp_fp_labels
=
self
.
_compute_tp_fp
(
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
,
groundtruth_is_difficult_lists
)
is_class_correctly_detected_in_image
=
self
.
_compute_cor_loc
(
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
)
return
scores
,
tp_fp_labels
,
is_class_correctly_detected_in_image
def
_compute_cor_loc
(
self
,
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
):
"""Compute CorLoc score for object detection result.
Args:
detected_boxes: A float numpy array of shape [N, 4], representing N
regions of detected object regions.
Each row is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N, 1], representing
the confidence scores of the detected N object instances.
detected_class_labels: A integer numpy array of shape [N, 1], repreneting
the class labels of the detected N object instances.
groundtruth_boxes: A float numpy array of shape [M, 4], representing M
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
Returns:
is_class_correctly_detected_in_image: a numpy integer array of
shape [C, 1], indicating whether the correponding class has a least
one instance being correctly detected in the image
"""
is_class_correctly_detected_in_image
=
np
.
zeros
(
self
.
num_groundtruth_classes
,
dtype
=
int
)
for
i
in
range
(
self
.
num_groundtruth_classes
):
gt_boxes_at_ith_class
=
groundtruth_boxes
[
groundtruth_class_labels
==
i
,
:]
detected_boxes_at_ith_class
=
detected_boxes
[
detected_class_labels
==
i
,
:]
detected_scores_at_ith_class
=
detected_scores
[
detected_class_labels
==
i
]
is_class_correctly_detected_in_image
[
i
]
=
(
self
.
_compute_is_aclass_correctly_detected_in_image
(
detected_boxes_at_ith_class
,
detected_scores_at_ith_class
,
gt_boxes_at_ith_class
))
return
is_class_correctly_detected_in_image
def
_compute_is_aclass_correctly_detected_in_image
(
self
,
detected_boxes
,
detected_scores
,
groundtruth_boxes
):
"""Compute CorLoc score for a single class.
Args:
detected_boxes: A numpy array of shape [N, 4] representing detected box
coordinates
detected_scores: A 1-d numpy array of length N representing classification
score
groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
box coordinates
Returns:
is_class_correctly_detected_in_image: An integer 1 or 0 denoting whether a
class is correctly detected in the image or not
"""
if
detected_boxes
.
size
>
0
:
if
groundtruth_boxes
.
size
>
0
:
max_score_id
=
np
.
argmax
(
detected_scores
)
detected_boxlist
=
np_box_list
.
BoxList
(
np
.
expand_dims
(
detected_boxes
[
max_score_id
,
:],
axis
=
0
))
gt_boxlist
=
np_box_list
.
BoxList
(
groundtruth_boxes
)
iou
=
np_box_list_ops
.
iou
(
detected_boxlist
,
gt_boxlist
)
if
np
.
max
(
iou
)
>=
self
.
matching_iou_threshold
:
return
1
return
0
def
_compute_tp_fp
(
self
,
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
,
groundtruth_is_difficult_lists
):
"""Labels true/false positives of detections of an image across all classes.
Args:
detected_boxes: A float numpy array of shape [N, 4], representing N
regions of detected object regions.
Each row is of the format [y_min, x_min, y_max, x_max]
detected_scores: A float numpy array of shape [N, 1], representing
the confidence scores of the detected N object instances.
detected_class_labels: A integer numpy array of shape [N, 1], repreneting
the class labels of the detected N object instances.
groundtruth_boxes: A float numpy array of shape [M, 4], representing M
regions of object instances in ground truth
groundtruth_class_labels: An integer numpy array of shape [M, 1],
representing M class labels of object instances in ground truth
groundtruth_is_difficult_lists: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
Returns:
result_scores: A list of float numpy arrays. Each numpy array is of
shape [K, 1], representing K scores detected with object class
label c
result_tp_fp_labels: A list of boolean numpy array. Each numpy array is of
shape [K, 1], representing K True/False positive label of object
instances detected with class label c
"""
result_scores
=
[]
result_tp_fp_labels
=
[]
for
i
in
range
(
self
.
num_groundtruth_classes
):
gt_boxes_at_ith_class
=
groundtruth_boxes
[(
groundtruth_class_labels
==
i
),
:]
groundtruth_is_difficult_list_at_ith_class
=
(
groundtruth_is_difficult_lists
[
groundtruth_class_labels
==
i
])
detected_boxes_at_ith_class
=
detected_boxes
[(
detected_class_labels
==
i
),
:]
detected_scores_at_ith_class
=
detected_scores
[
detected_class_labels
==
i
]
scores
,
tp_fp_labels
=
self
.
_compute_tp_fp_for_single_class
(
detected_boxes_at_ith_class
,
detected_scores_at_ith_class
,
gt_boxes_at_ith_class
,
groundtruth_is_difficult_list_at_ith_class
)
result_scores
.
append
(
scores
)
result_tp_fp_labels
.
append
(
tp_fp_labels
)
return
result_scores
,
result_tp_fp_labels
def
_remove_invalid_boxes
(
self
,
detected_boxes
,
detected_scores
,
detected_class_labels
):
valid_indices
=
np
.
logical_and
(
detected_boxes
[:,
0
]
<
detected_boxes
[:,
2
],
detected_boxes
[:,
1
]
<
detected_boxes
[:,
3
])
return
(
detected_boxes
[
valid_indices
,
:],
detected_scores
[
valid_indices
],
detected_class_labels
[
valid_indices
])
def
_compute_tp_fp_for_single_class
(
self
,
detected_boxes
,
detected_scores
,
groundtruth_boxes
,
groundtruth_is_difficult_list
):
"""Labels boxes detected with the same class from the same image as tp/fp.
Args:
detected_boxes: A numpy array of shape [N, 4] representing detected box
coordinates
detected_scores: A 1-d numpy array of length N representing classification
score
groundtruth_boxes: A numpy array of shape [M, 4] representing ground truth
box coordinates
groundtruth_is_difficult_list: A boolean numpy array of length M denoting
whether a ground truth box is a difficult instance or not
Returns:
scores: A numpy array representing the detection scores
tp_fp_labels: a boolean numpy array indicating whether a detection is a
true positive.
"""
if
detected_boxes
.
size
==
0
:
return
np
.
array
([],
dtype
=
float
),
np
.
array
([],
dtype
=
bool
)
detected_boxlist
=
np_box_list
.
BoxList
(
detected_boxes
)
detected_boxlist
.
add_field
(
'scores'
,
detected_scores
)
detected_boxlist
=
np_box_list_ops
.
non_max_suppression
(
detected_boxlist
,
self
.
nms_max_output_boxes
,
self
.
nms_iou_threshold
)
scores
=
detected_boxlist
.
get_field
(
'scores'
)
if
groundtruth_boxes
.
size
==
0
:
return
scores
,
np
.
zeros
(
detected_boxlist
.
num_boxes
(),
dtype
=
bool
)
gt_boxlist
=
np_box_list
.
BoxList
(
groundtruth_boxes
)
iou
=
np_box_list_ops
.
iou
(
detected_boxlist
,
gt_boxlist
)
max_overlap_gt_ids
=
np
.
argmax
(
iou
,
axis
=
1
)
is_gt_box_detected
=
np
.
zeros
(
gt_boxlist
.
num_boxes
(),
dtype
=
bool
)
tp_fp_labels
=
np
.
zeros
(
detected_boxlist
.
num_boxes
(),
dtype
=
bool
)
is_matched_to_difficult_box
=
np
.
zeros
(
detected_boxlist
.
num_boxes
(),
dtype
=
bool
)
for
i
in
range
(
detected_boxlist
.
num_boxes
()):
gt_id
=
max_overlap_gt_ids
[
i
]
if
iou
[
i
,
gt_id
]
>=
self
.
matching_iou_threshold
:
if
not
groundtruth_is_difficult_list
[
gt_id
]:
if
not
is_gt_box_detected
[
gt_id
]:
tp_fp_labels
[
i
]
=
True
is_gt_box_detected
[
gt_id
]
=
True
else
:
is_matched_to_difficult_box
[
i
]
=
True
return
scores
[
~
is_matched_to_difficult_box
],
tp_fp_labels
[
~
is_matched_to_difficult_box
]
object_detection/utils/per_image_evaluation_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.per_image_evaluation."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.utils
import
per_image_evaluation
class
SingleClassTpFpWithDifficultBoxesTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
num_groundtruth_classes
=
1
matching_iou_threshold
=
0.5
nms_iou_threshold
=
1.0
nms_max_output_boxes
=
10000
self
.
eval
=
per_image_evaluation
.
PerImageEvaluation
(
num_groundtruth_classes
,
matching_iou_threshold
,
nms_iou_threshold
,
nms_max_output_boxes
)
self
.
detected_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
2
],
[
0
,
0
,
3
,
3
]],
dtype
=
float
)
self
.
detected_scores
=
np
.
array
([
0.6
,
0.8
,
0.5
],
dtype
=
float
)
self
.
groundtruth_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
10
,
10
]],
dtype
=
float
)
def
test_match_to_not_difficult_box
(
self
):
groundtruth_groundtruth_is_difficult_list
=
np
.
array
([
False
,
True
],
dtype
=
bool
)
scores
,
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
self
.
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
)
expected_scores
=
np
.
array
([
0.8
,
0.6
,
0.5
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
False
,
True
,
False
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
def
test_match_to_difficult_box
(
self
):
groundtruth_groundtruth_is_difficult_list
=
np
.
array
([
True
,
False
],
dtype
=
bool
)
scores
,
tp_fp_labels
=
self
.
eval
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
self
.
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
)
expected_scores
=
np
.
array
([
0.8
,
0.5
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
False
,
False
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
class
SingleClassTpFpNoDifficultBoxesTest
(
tf
.
test
.
TestCase
):
def
setUp
(
self
):
num_groundtruth_classes
=
1
matching_iou_threshold1
=
0.5
matching_iou_threshold2
=
0.1
nms_iou_threshold
=
1.0
nms_max_output_boxes
=
10000
self
.
eval1
=
per_image_evaluation
.
PerImageEvaluation
(
num_groundtruth_classes
,
matching_iou_threshold1
,
nms_iou_threshold
,
nms_max_output_boxes
)
self
.
eval2
=
per_image_evaluation
.
PerImageEvaluation
(
num_groundtruth_classes
,
matching_iou_threshold2
,
nms_iou_threshold
,
nms_max_output_boxes
)
self
.
detected_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
2
],
[
0
,
0
,
3
,
3
]],
dtype
=
float
)
self
.
detected_scores
=
np
.
array
([
0.6
,
0.8
,
0.5
],
dtype
=
float
)
def
test_no_true_positives
(
self
):
groundtruth_boxes
=
np
.
array
([[
100
,
100
,
105
,
105
]],
dtype
=
float
)
groundtruth_groundtruth_is_difficult_list
=
np
.
zeros
(
1
,
dtype
=
bool
)
scores
,
tp_fp_labels
=
self
.
eval1
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
)
expected_scores
=
np
.
array
([
0.8
,
0.6
,
0.5
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
False
,
False
,
False
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
def
test_one_true_positives_with_large_iou_threshold
(
self
):
groundtruth_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
float
)
groundtruth_groundtruth_is_difficult_list
=
np
.
zeros
(
1
,
dtype
=
bool
)
scores
,
tp_fp_labels
=
self
.
eval1
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
)
expected_scores
=
np
.
array
([
0.8
,
0.6
,
0.5
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
False
,
True
,
False
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
def
test_one_true_positives_with_very_small_iou_threshold
(
self
):
groundtruth_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
]],
dtype
=
float
)
groundtruth_groundtruth_is_difficult_list
=
np
.
zeros
(
1
,
dtype
=
bool
)
scores
,
tp_fp_labels
=
self
.
eval2
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
)
expected_scores
=
np
.
array
([
0.8
,
0.6
,
0.5
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
True
,
False
,
False
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
def
test_two_true_positives_with_large_iou_threshold
(
self
):
groundtruth_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
3.5
,
3.5
]],
dtype
=
float
)
groundtruth_groundtruth_is_difficult_list
=
np
.
zeros
(
2
,
dtype
=
bool
)
scores
,
tp_fp_labels
=
self
.
eval1
.
_compute_tp_fp_for_single_class
(
self
.
detected_boxes
,
self
.
detected_scores
,
groundtruth_boxes
,
groundtruth_groundtruth_is_difficult_list
)
expected_scores
=
np
.
array
([
0.8
,
0.6
,
0.5
],
dtype
=
float
)
expected_tp_fp_labels
=
np
.
array
([
False
,
True
,
True
],
dtype
=
bool
)
self
.
assertTrue
(
np
.
allclose
(
expected_scores
,
scores
))
self
.
assertTrue
(
np
.
allclose
(
expected_tp_fp_labels
,
tp_fp_labels
))
class
MultiClassesTpFpTest
(
tf
.
test
.
TestCase
):
def
test_tp_fp
(
self
):
num_groundtruth_classes
=
3
matching_iou_threshold
=
0.5
nms_iou_threshold
=
1.0
nms_max_output_boxes
=
10000
eval1
=
per_image_evaluation
.
PerImageEvaluation
(
num_groundtruth_classes
,
matching_iou_threshold
,
nms_iou_threshold
,
nms_max_output_boxes
)
detected_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
10
,
10
,
5
,
5
],
[
0
,
0
,
2
,
2
],
[
5
,
10
,
10
,
5
],
[
10
,
5
,
5
,
10
],
[
0
,
0
,
3
,
3
]],
dtype
=
float
)
detected_scores
=
np
.
array
([
0.8
,
0.1
,
0.8
,
0.9
,
0.7
,
0.8
],
dtype
=
float
)
detected_class_labels
=
np
.
array
([
0
,
1
,
1
,
2
,
0
,
2
],
dtype
=
int
)
groundtruth_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
3.5
,
3.5
]],
dtype
=
float
)
groundtruth_class_labels
=
np
.
array
([
0
,
2
],
dtype
=
int
)
groundtruth_groundtruth_is_difficult_list
=
np
.
zeros
(
2
,
dtype
=
float
)
scores
,
tp_fp_labels
,
_
=
eval1
.
compute_object_detection_metrics
(
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
,
groundtruth_groundtruth_is_difficult_list
)
expected_scores
=
[
np
.
array
([
0.8
],
dtype
=
float
)]
*
3
expected_tp_fp_labels
=
[
np
.
array
([
True
]),
np
.
array
([
False
]),
np
.
array
([
True
])]
for
i
in
range
(
len
(
expected_scores
)):
self
.
assertTrue
(
np
.
allclose
(
expected_scores
[
i
],
scores
[
i
]))
self
.
assertTrue
(
np
.
array_equal
(
expected_tp_fp_labels
[
i
],
tp_fp_labels
[
i
]))
class
CorLocTest
(
tf
.
test
.
TestCase
):
def
test_compute_corloc_with_normal_iou_threshold
(
self
):
num_groundtruth_classes
=
3
matching_iou_threshold
=
0.5
nms_iou_threshold
=
1.0
nms_max_output_boxes
=
10000
eval1
=
per_image_evaluation
.
PerImageEvaluation
(
num_groundtruth_classes
,
matching_iou_threshold
,
nms_iou_threshold
,
nms_max_output_boxes
)
detected_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
2
],
[
0
,
0
,
3
,
3
],
[
0
,
0
,
5
,
5
]],
dtype
=
float
)
detected_scores
=
np
.
array
([
0.9
,
0.9
,
0.1
,
0.9
],
dtype
=
float
)
detected_class_labels
=
np
.
array
([
0
,
1
,
0
,
2
],
dtype
=
int
)
groundtruth_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
3
,
3
],
[
0
,
0
,
6
,
6
]],
dtype
=
float
)
groundtruth_class_labels
=
np
.
array
([
0
,
0
,
2
],
dtype
=
int
)
is_class_correctly_detected_in_image
=
eval1
.
_compute_cor_loc
(
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
)
expected_result
=
np
.
array
([
1
,
0
,
1
],
dtype
=
int
)
self
.
assertTrue
(
np
.
array_equal
(
expected_result
,
is_class_correctly_detected_in_image
))
def
test_compute_corloc_with_very_large_iou_threshold
(
self
):
num_groundtruth_classes
=
3
matching_iou_threshold
=
0.9
nms_iou_threshold
=
1.0
nms_max_output_boxes
=
10000
eval1
=
per_image_evaluation
.
PerImageEvaluation
(
num_groundtruth_classes
,
matching_iou_threshold
,
nms_iou_threshold
,
nms_max_output_boxes
)
detected_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
2
,
2
],
[
0
,
0
,
3
,
3
],
[
0
,
0
,
5
,
5
]],
dtype
=
float
)
detected_scores
=
np
.
array
([
0.9
,
0.9
,
0.1
,
0.9
],
dtype
=
float
)
detected_class_labels
=
np
.
array
([
0
,
1
,
0
,
2
],
dtype
=
int
)
groundtruth_boxes
=
np
.
array
([[
0
,
0
,
1
,
1
],
[
0
,
0
,
3
,
3
],
[
0
,
0
,
6
,
6
]],
dtype
=
float
)
groundtruth_class_labels
=
np
.
array
([
0
,
0
,
2
],
dtype
=
int
)
is_class_correctly_detected_in_image
=
eval1
.
_compute_cor_loc
(
detected_boxes
,
detected_scores
,
detected_class_labels
,
groundtruth_boxes
,
groundtruth_class_labels
)
expected_result
=
np
.
array
([
1
,
0
,
0
],
dtype
=
int
)
self
.
assertTrue
(
np
.
array_equal
(
expected_result
,
is_class_correctly_detected_in_image
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/utils/shape_utils.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils used to manipulate tensor shapes."""
import
tensorflow
as
tf
def
_is_tensor
(
t
):
"""Returns a boolean indicating whether the input is a tensor.
Args:
t: the input to be tested.
Returns:
a boolean that indicates whether t is a tensor.
"""
return
isinstance
(
t
,
(
tf
.
Tensor
,
tf
.
SparseTensor
,
tf
.
Variable
))
def
_set_dim_0
(
t
,
d0
):
"""Sets the 0-th dimension of the input tensor.
Args:
t: the input tensor, assuming the rank is at least 1.
d0: an integer indicating the 0-th dimension of the input tensor.
Returns:
the tensor t with the 0-th dimension set.
"""
t_shape
=
t
.
get_shape
().
as_list
()
t_shape
[
0
]
=
d0
t
.
set_shape
(
t_shape
)
return
t
def
pad_tensor
(
t
,
length
):
"""Pads the input tensor with 0s along the first dimension up to the length.
Args:
t: the input tensor, assuming the rank is at least 1.
length: a tensor of shape [1] or an integer, indicating the first dimension
of the input tensor t after padding, assuming length <= t.shape[0].
Returns:
padded_t: the padded tensor, whose first dimension is length. If the length
is an integer, the first dimension of padded_t is set to length
statically.
"""
t_rank
=
tf
.
rank
(
t
)
t_shape
=
tf
.
shape
(
t
)
t_d0
=
t_shape
[
0
]
pad_d0
=
tf
.
expand_dims
(
length
-
t_d0
,
0
)
pad_shape
=
tf
.
cond
(
tf
.
greater
(
t_rank
,
1
),
lambda
:
tf
.
concat
([
pad_d0
,
t_shape
[
1
:]],
0
),
lambda
:
tf
.
expand_dims
(
length
-
t_d0
,
0
))
padded_t
=
tf
.
concat
([
t
,
tf
.
zeros
(
pad_shape
,
dtype
=
t
.
dtype
)],
0
)
if
not
_is_tensor
(
length
):
padded_t
=
_set_dim_0
(
padded_t
,
length
)
return
padded_t
def
clip_tensor
(
t
,
length
):
"""Clips the input tensor along the first dimension up to the length.
Args:
t: the input tensor, assuming the rank is at least 1.
length: a tensor of shape [1] or an integer, indicating the first dimension
of the input tensor t after clipping, assuming length <= t.shape[0].
Returns:
clipped_t: the clipped tensor, whose first dimension is length. If the
length is an integer, the first dimension of clipped_t is set to length
statically.
"""
clipped_t
=
tf
.
gather
(
t
,
tf
.
range
(
length
))
if
not
_is_tensor
(
length
):
clipped_t
=
_set_dim_0
(
clipped_t
,
length
)
return
clipped_t
def
pad_or_clip_tensor
(
t
,
length
):
"""Pad or clip the input tensor along the first dimension.
Args:
t: the input tensor, assuming the rank is at least 1.
length: a tensor of shape [1] or an integer, indicating the first dimension
of the input tensor t after processing.
Returns:
processed_t: the processed tensor, whose first dimension is length. If the
length is an integer, the first dimension of the processed tensor is set
to length statically.
"""
processed_t
=
tf
.
cond
(
tf
.
greater
(
tf
.
shape
(
t
)[
0
],
length
),
lambda
:
clip_tensor
(
t
,
length
),
lambda
:
pad_tensor
(
t
,
length
))
if
not
_is_tensor
(
length
):
processed_t
=
_set_dim_0
(
processed_t
,
length
)
return
processed_t
object_detection/utils/shape_utils_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.shape_utils."""
import
tensorflow
as
tf
from
object_detection.utils
import
shape_utils
class
UtilTest
(
tf
.
test
.
TestCase
):
def
test_pad_tensor_using_integer_input
(
self
):
t1
=
tf
.
constant
([
1
],
dtype
=
tf
.
int32
)
pad_t1
=
shape_utils
.
pad_tensor
(
t1
,
2
)
t2
=
tf
.
constant
([[
0.1
,
0.2
]],
dtype
=
tf
.
float32
)
pad_t2
=
shape_utils
.
pad_tensor
(
t2
,
2
)
self
.
assertEqual
(
2
,
pad_t1
.
get_shape
()[
0
])
self
.
assertEqual
(
2
,
pad_t2
.
get_shape
()[
0
])
with
self
.
test_session
()
as
sess
:
pad_t1_result
,
pad_t2_result
=
sess
.
run
([
pad_t1
,
pad_t2
])
self
.
assertAllEqual
([
1
,
0
],
pad_t1_result
)
self
.
assertAllClose
([[
0.1
,
0.2
],
[
0
,
0
]],
pad_t2_result
)
def
test_pad_tensor_using_tensor_input
(
self
):
t1
=
tf
.
constant
([
1
],
dtype
=
tf
.
int32
)
pad_t1
=
shape_utils
.
pad_tensor
(
t1
,
tf
.
constant
(
2
))
t2
=
tf
.
constant
([[
0.1
,
0.2
]],
dtype
=
tf
.
float32
)
pad_t2
=
shape_utils
.
pad_tensor
(
t2
,
tf
.
constant
(
2
))
with
self
.
test_session
()
as
sess
:
pad_t1_result
,
pad_t2_result
=
sess
.
run
([
pad_t1
,
pad_t2
])
self
.
assertAllEqual
([
1
,
0
],
pad_t1_result
)
self
.
assertAllClose
([[
0.1
,
0.2
],
[
0
,
0
]],
pad_t2_result
)
def
test_clip_tensor_using_integer_input
(
self
):
t1
=
tf
.
constant
([
1
,
2
,
3
],
dtype
=
tf
.
int32
)
clip_t1
=
shape_utils
.
clip_tensor
(
t1
,
2
)
t2
=
tf
.
constant
([[
0.1
,
0.2
],
[
0.2
,
0.4
],
[
0.5
,
0.8
]],
dtype
=
tf
.
float32
)
clip_t2
=
shape_utils
.
clip_tensor
(
t2
,
2
)
self
.
assertEqual
(
2
,
clip_t1
.
get_shape
()[
0
])
self
.
assertEqual
(
2
,
clip_t2
.
get_shape
()[
0
])
with
self
.
test_session
()
as
sess
:
clip_t1_result
,
clip_t2_result
=
sess
.
run
([
clip_t1
,
clip_t2
])
self
.
assertAllEqual
([
1
,
2
],
clip_t1_result
)
self
.
assertAllClose
([[
0.1
,
0.2
],
[
0.2
,
0.4
]],
clip_t2_result
)
def
test_clip_tensor_using_tensor_input
(
self
):
t1
=
tf
.
constant
([
1
,
2
,
3
],
dtype
=
tf
.
int32
)
clip_t1
=
shape_utils
.
clip_tensor
(
t1
,
tf
.
constant
(
2
))
t2
=
tf
.
constant
([[
0.1
,
0.2
],
[
0.2
,
0.4
],
[
0.5
,
0.8
]],
dtype
=
tf
.
float32
)
clip_t2
=
shape_utils
.
clip_tensor
(
t2
,
tf
.
constant
(
2
))
with
self
.
test_session
()
as
sess
:
clip_t1_result
,
clip_t2_result
=
sess
.
run
([
clip_t1
,
clip_t2
])
self
.
assertAllEqual
([
1
,
2
],
clip_t1_result
)
self
.
assertAllClose
([[
0.1
,
0.2
],
[
0.2
,
0.4
]],
clip_t2_result
)
def
test_pad_or_clip_tensor_using_integer_input
(
self
):
t1
=
tf
.
constant
([
1
],
dtype
=
tf
.
int32
)
tt1
=
shape_utils
.
pad_or_clip_tensor
(
t1
,
2
)
t2
=
tf
.
constant
([[
0.1
,
0.2
]],
dtype
=
tf
.
float32
)
tt2
=
shape_utils
.
pad_or_clip_tensor
(
t2
,
2
)
t3
=
tf
.
constant
([
1
,
2
,
3
],
dtype
=
tf
.
int32
)
tt3
=
shape_utils
.
clip_tensor
(
t3
,
2
)
t4
=
tf
.
constant
([[
0.1
,
0.2
],
[
0.2
,
0.4
],
[
0.5
,
0.8
]],
dtype
=
tf
.
float32
)
tt4
=
shape_utils
.
clip_tensor
(
t4
,
2
)
self
.
assertEqual
(
2
,
tt1
.
get_shape
()[
0
])
self
.
assertEqual
(
2
,
tt2
.
get_shape
()[
0
])
self
.
assertEqual
(
2
,
tt3
.
get_shape
()[
0
])
self
.
assertEqual
(
2
,
tt4
.
get_shape
()[
0
])
with
self
.
test_session
()
as
sess
:
tt1_result
,
tt2_result
,
tt3_result
,
tt4_result
=
sess
.
run
(
[
tt1
,
tt2
,
tt3
,
tt4
])
self
.
assertAllEqual
([
1
,
0
],
tt1_result
)
self
.
assertAllClose
([[
0.1
,
0.2
],
[
0
,
0
]],
tt2_result
)
self
.
assertAllEqual
([
1
,
2
],
tt3_result
)
self
.
assertAllClose
([[
0.1
,
0.2
],
[
0.2
,
0.4
]],
tt4_result
)
def
test_pad_or_clip_tensor_using_tensor_input
(
self
):
t1
=
tf
.
constant
([
1
],
dtype
=
tf
.
int32
)
tt1
=
shape_utils
.
pad_or_clip_tensor
(
t1
,
tf
.
constant
(
2
))
t2
=
tf
.
constant
([[
0.1
,
0.2
]],
dtype
=
tf
.
float32
)
tt2
=
shape_utils
.
pad_or_clip_tensor
(
t2
,
tf
.
constant
(
2
))
t3
=
tf
.
constant
([
1
,
2
,
3
],
dtype
=
tf
.
int32
)
tt3
=
shape_utils
.
clip_tensor
(
t3
,
tf
.
constant
(
2
))
t4
=
tf
.
constant
([[
0.1
,
0.2
],
[
0.2
,
0.4
],
[
0.5
,
0.8
]],
dtype
=
tf
.
float32
)
tt4
=
shape_utils
.
clip_tensor
(
t4
,
tf
.
constant
(
2
))
with
self
.
test_session
()
as
sess
:
tt1_result
,
tt2_result
,
tt3_result
,
tt4_result
=
sess
.
run
(
[
tt1
,
tt2
,
tt3
,
tt4
])
self
.
assertAllEqual
([
1
,
0
],
tt1_result
)
self
.
assertAllClose
([[
0.1
,
0.2
],
[
0
,
0
]],
tt2_result
)
self
.
assertAllEqual
([
1
,
2
],
tt3_result
)
self
.
assertAllClose
([[
0.1
,
0.2
],
[
0.2
,
0.4
]],
tt4_result
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/utils/static_shape.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Helper functions to access TensorShape values.
The rank 4 tensor_shape must be of the form [batch_size, height, width, depth].
"""
def
get_batch_size
(
tensor_shape
):
"""Returns batch size from the tensor shape.
Args:
tensor_shape: A rank 4 TensorShape.
Returns:
An integer representing the batch size of the tensor.
"""
tensor_shape
.
assert_has_rank
(
rank
=
4
)
return
tensor_shape
[
0
].
value
def
get_height
(
tensor_shape
):
"""Returns height from the tensor shape.
Args:
tensor_shape: A rank 4 TensorShape.
Returns:
An integer representing the height of the tensor.
"""
tensor_shape
.
assert_has_rank
(
rank
=
4
)
return
tensor_shape
[
1
].
value
def
get_width
(
tensor_shape
):
"""Returns width from the tensor shape.
Args:
tensor_shape: A rank 4 TensorShape.
Returns:
An integer representing the width of the tensor.
"""
tensor_shape
.
assert_has_rank
(
rank
=
4
)
return
tensor_shape
[
2
].
value
def
get_depth
(
tensor_shape
):
"""Returns depth from the tensor shape.
Args:
tensor_shape: A rank 4 TensorShape.
Returns:
An integer representing the depth of the tensor.
"""
tensor_shape
.
assert_has_rank
(
rank
=
4
)
return
tensor_shape
[
3
].
value
object_detection/utils/static_shape_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.static_shape."""
import
tensorflow
as
tf
from
object_detection.utils
import
static_shape
class
StaticShapeTest
(
tf
.
test
.
TestCase
):
def
test_return_correct_batchSize
(
self
):
tensor_shape
=
tf
.
TensorShape
(
dims
=
[
32
,
299
,
384
,
3
])
self
.
assertEqual
(
32
,
static_shape
.
get_batch_size
(
tensor_shape
))
def
test_return_correct_height
(
self
):
tensor_shape
=
tf
.
TensorShape
(
dims
=
[
32
,
299
,
384
,
3
])
self
.
assertEqual
(
299
,
static_shape
.
get_height
(
tensor_shape
))
def
test_return_correct_width
(
self
):
tensor_shape
=
tf
.
TensorShape
(
dims
=
[
32
,
299
,
384
,
3
])
self
.
assertEqual
(
384
,
static_shape
.
get_width
(
tensor_shape
))
def
test_return_correct_depth
(
self
):
tensor_shape
=
tf
.
TensorShape
(
dims
=
[
32
,
299
,
384
,
3
])
self
.
assertEqual
(
3
,
static_shape
.
get_depth
(
tensor_shape
))
def
test_die_on_tensor_shape_with_rank_three
(
self
):
tensor_shape
=
tf
.
TensorShape
(
dims
=
[
32
,
299
,
384
])
with
self
.
assertRaises
(
ValueError
):
static_shape
.
get_batch_size
(
tensor_shape
)
static_shape
.
get_height
(
tensor_shape
)
static_shape
.
get_width
(
tensor_shape
)
static_shape
.
get_depth
(
tensor_shape
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/utils/test_utils.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains functions which are convenient for unit testing."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
anchor_generator
from
object_detection.core
import
box_coder
from
object_detection.core
import
box_list
from
object_detection.core
import
box_predictor
from
object_detection.core
import
matcher
class
MockBoxCoder
(
box_coder
.
BoxCoder
):
"""Simple `difference` BoxCoder."""
@
property
def
code_size
(
self
):
return
4
def
_encode
(
self
,
boxes
,
anchors
):
return
boxes
.
get
()
-
anchors
.
get
()
def
_decode
(
self
,
rel_codes
,
anchors
):
return
box_list
.
BoxList
(
rel_codes
+
anchors
.
get
())
class
MockBoxPredictor
(
box_predictor
.
BoxPredictor
):
"""Simple box predictor that ignores inputs and outputs all zeros."""
def
__init__
(
self
,
is_training
,
num_classes
):
super
(
MockBoxPredictor
,
self
).
__init__
(
is_training
,
num_classes
)
def
_predict
(
self
,
image_features
,
num_predictions_per_location
):
batch_size
=
image_features
.
get_shape
().
as_list
()[
0
]
num_anchors
=
(
image_features
.
get_shape
().
as_list
()[
1
]
*
image_features
.
get_shape
().
as_list
()[
2
])
code_size
=
4
zero
=
tf
.
reduce_sum
(
0
*
image_features
)
box_encodings
=
zero
+
tf
.
zeros
(
(
batch_size
,
num_anchors
,
1
,
code_size
),
dtype
=
tf
.
float32
)
class_predictions_with_background
=
zero
+
tf
.
zeros
(
(
batch_size
,
num_anchors
,
self
.
num_classes
+
1
),
dtype
=
tf
.
float32
)
return
{
box_predictor
.
BOX_ENCODINGS
:
box_encodings
,
box_predictor
.
CLASS_PREDICTIONS_WITH_BACKGROUND
:
class_predictions_with_background
}
class
MockAnchorGenerator
(
anchor_generator
.
AnchorGenerator
):
"""Mock anchor generator."""
def
name_scope
(
self
):
return
'MockAnchorGenerator'
def
num_anchors_per_location
(
self
):
return
[
1
]
def
_generate
(
self
,
feature_map_shape_list
):
num_anchors
=
sum
([
shape
[
0
]
*
shape
[
1
]
for
shape
in
feature_map_shape_list
])
return
box_list
.
BoxList
(
tf
.
zeros
((
num_anchors
,
4
),
dtype
=
tf
.
float32
))
class
MockMatcher
(
matcher
.
Matcher
):
"""Simple matcher that matches first anchor to first groundtruth box."""
def
_match
(
self
,
similarity_matrix
):
return
tf
.
constant
([
0
,
-
1
,
-
1
,
-
1
],
dtype
=
tf
.
int32
)
def
create_diagonal_gradient_image
(
height
,
width
,
depth
):
"""Creates pyramid image. Useful for testing.
For example, pyramid_image(5, 6, 1) looks like:
# [[[ 5. 4. 3. 2. 1. 0.]
# [ 6. 5. 4. 3. 2. 1.]
# [ 7. 6. 5. 4. 3. 2.]
# [ 8. 7. 6. 5. 4. 3.]
# [ 9. 8. 7. 6. 5. 4.]]]
Args:
height: height of image
width: width of image
depth: depth of image
Returns:
pyramid image
"""
row
=
np
.
arange
(
height
)
col
=
np
.
arange
(
width
)[::
-
1
]
image_layer
=
np
.
expand_dims
(
row
,
1
)
+
col
image_layer
=
np
.
expand_dims
(
image_layer
,
2
)
image
=
image_layer
for
i
in
range
(
1
,
depth
):
image
=
np
.
concatenate
((
image
,
image_layer
*
pow
(
10
,
i
)),
2
)
return
image
.
astype
(
np
.
float32
)
def
create_random_boxes
(
num_boxes
,
max_height
,
max_width
):
"""Creates random bounding boxes of specific maximum height and width.
Args:
num_boxes: number of boxes.
max_height: maximum height of boxes.
max_width: maximum width of boxes.
Returns:
boxes: numpy array of shape [num_boxes, 4]. Each row is in form
[y_min, x_min, y_max, x_max].
"""
y_1
=
np
.
random
.
uniform
(
size
=
(
1
,
num_boxes
))
*
max_height
y_2
=
np
.
random
.
uniform
(
size
=
(
1
,
num_boxes
))
*
max_height
x_1
=
np
.
random
.
uniform
(
size
=
(
1
,
num_boxes
))
*
max_width
x_2
=
np
.
random
.
uniform
(
size
=
(
1
,
num_boxes
))
*
max_width
boxes
=
np
.
zeros
(
shape
=
(
num_boxes
,
4
))
boxes
[:,
0
]
=
np
.
minimum
(
y_1
,
y_2
)
boxes
[:,
1
]
=
np
.
minimum
(
x_1
,
x_2
)
boxes
[:,
2
]
=
np
.
maximum
(
y_1
,
y_2
)
boxes
[:,
3
]
=
np
.
maximum
(
x_1
,
x_2
)
return
boxes
.
astype
(
np
.
float32
)
object_detection/utils/test_utils_test.py
0 → 100644
View file @
f282f6ef
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.test_utils."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.utils
import
test_utils
class
TestUtilsTest
(
tf
.
test
.
TestCase
):
def
test_diagonal_gradient_image
(
self
):
"""Tests if a good pyramid image is created."""
pyramid_image
=
test_utils
.
create_diagonal_gradient_image
(
3
,
4
,
2
)
# Test which is easy to understand.
expected_first_channel
=
np
.
array
([[
3
,
2
,
1
,
0
],
[
4
,
3
,
2
,
1
],
[
5
,
4
,
3
,
2
]],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
np
.
squeeze
(
pyramid_image
[:,
:,
0
]),
expected_first_channel
)
# Actual test.
expected_image
=
np
.
array
([[[
3
,
30
],
[
2
,
20
],
[
1
,
10
],
[
0
,
0
]],
[[
4
,
40
],
[
3
,
30
],
[
2
,
20
],
[
1
,
10
]],
[[
5
,
50
],
[
4
,
40
],
[
3
,
30
],
[
2
,
20
]]],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
pyramid_image
,
expected_image
)
def
test_random_boxes
(
self
):
"""Tests if valid random boxes are created."""
num_boxes
=
1000
max_height
=
3
max_width
=
5
boxes
=
test_utils
.
create_random_boxes
(
num_boxes
,
max_height
,
max_width
)
true_column
=
np
.
ones
(
shape
=
(
num_boxes
))
==
1
self
.
assertAllEqual
(
boxes
[:,
0
]
<
boxes
[:,
2
],
true_column
)
self
.
assertAllEqual
(
boxes
[:,
1
]
<
boxes
[:,
3
],
true_column
)
self
.
assertTrue
(
boxes
[:,
0
].
min
()
>=
0
)
self
.
assertTrue
(
boxes
[:,
1
].
min
()
>=
0
)
self
.
assertTrue
(
boxes
[:,
2
].
max
()
<=
max_height
)
self
.
assertTrue
(
boxes
[:,
3
].
max
()
<=
max_width
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
Prev
1
…
11
12
13
14
15
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment