Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
fd7b6887
Unverified
Commit
fd7b6887
authored
Feb 09, 2018
by
Jonathan Huang
Committed by
GitHub
Feb 09, 2018
Browse files
Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
parents
f98ec55e
1efe98bb
Changes
200
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2749 additions
and
717 deletions
+2749
-717
research/object_detection/core/matcher_test.py
research/object_detection/core/matcher_test.py
+43
-1
research/object_detection/core/model.py
research/object_detection/core/model.py
+43
-10
research/object_detection/core/post_processing.py
research/object_detection/core/post_processing.py
+39
-9
research/object_detection/core/post_processing_test.py
research/object_detection/core/post_processing_test.py
+119
-0
research/object_detection/core/preprocessor.py
research/object_detection/core/preprocessor.py
+548
-111
research/object_detection/core/preprocessor_cache.py
research/object_detection/core/preprocessor_cache.py
+102
-0
research/object_detection/core/preprocessor_test.py
research/object_detection/core/preprocessor_test.py
+292
-10
research/object_detection/core/standard_fields.py
research/object_detection/core/standard_fields.py
+10
-1
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+89
-86
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+594
-436
research/object_detection/data/BUILD
research/object_detection/data/BUILD
+9
-0
research/object_detection/data_decoders/BUILD
research/object_detection/data_decoders/BUILD
+6
-4
research/object_detection/data_decoders/tf_example_decoder.py
...arch/object_detection/data_decoders/tf_example_decoder.py
+102
-13
research/object_detection/data_decoders/tf_example_decoder_test.py
...object_detection/data_decoders/tf_example_decoder_test.py
+243
-11
research/object_detection/dataset_tools/BUILD
research/object_detection/dataset_tools/BUILD
+47
-22
research/object_detection/dataset_tools/__init__.py
research/object_detection/dataset_tools/__init__.py
+0
-1
research/object_detection/dataset_tools/create_coco_tf_record.py
...h/object_detection/dataset_tools/create_coco_tf_record.py
+273
-0
research/object_detection/dataset_tools/create_coco_tf_record_test.py
...ect_detection/dataset_tools/create_coco_tf_record_test.py
+188
-0
research/object_detection/dataset_tools/create_kitti_tf_record.py
.../object_detection/dataset_tools/create_kitti_tf_record.py
+1
-1
research/object_detection/dataset_tools/create_kitti_tf_record_test.py
...ct_detection/dataset_tools/create_kitti_tf_record_test.py
+1
-1
No files found.
research/object_detection/core/matcher_test.py
View file @
fd7b6887
...
...
@@ -20,7 +20,7 @@ import tensorflow as tf
from
object_detection.core
import
matcher
class
Anchor
Match
er
Test
(
tf
.
test
.
TestCase
):
class
MatchTest
(
tf
.
test
.
TestCase
):
def
test_get_correct_matched_columnIndices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
...
...
@@ -145,6 +145,48 @@ class AnchorMatcherTest(tf.test.TestCase):
self
.
assertAllEqual
(
all_indices_sorted
,
np
.
arange
(
num_matches
,
dtype
=
np
.
int32
))
def
test_scalar_gather_based_on_match
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
input_tensor
=
tf
.
constant
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
],
dtype
=
tf
.
float32
)
expected_gathered_tensor
=
[
3
,
1
,
100
,
0
,
100
,
5
,
200
]
match
=
matcher
.
Match
(
match_results
)
gathered_tensor
=
match
.
gather_based_on_match
(
input_tensor
,
unmatched_value
=
100.
,
ignored_value
=
200.
)
self
.
assertEquals
(
gathered_tensor
.
dtype
,
tf
.
float32
)
with
self
.
test_session
():
gathered_tensor_out
=
gathered_tensor
.
eval
()
self
.
assertAllEqual
(
expected_gathered_tensor
,
gathered_tensor_out
)
def
test_multidimensional_gather_based_on_match
(
self
):
match_results
=
tf
.
constant
([
1
,
-
1
,
-
2
])
input_tensor
=
tf
.
constant
([[
0
,
0.5
,
0
,
0.5
],
[
0
,
0
,
0.5
,
0.5
]],
dtype
=
tf
.
float32
)
expected_gathered_tensor
=
[[
0
,
0
,
0.5
,
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
match
=
matcher
.
Match
(
match_results
)
gathered_tensor
=
match
.
gather_based_on_match
(
input_tensor
,
unmatched_value
=
tf
.
zeros
(
4
),
ignored_value
=
tf
.
zeros
(
4
))
self
.
assertEquals
(
gathered_tensor
.
dtype
,
tf
.
float32
)
with
self
.
test_session
():
gathered_tensor_out
=
gathered_tensor
.
eval
()
self
.
assertAllEqual
(
expected_gathered_tensor
,
gathered_tensor_out
)
def
test_multidimensional_gather_based_on_match_with_matmul_gather_op
(
self
):
match_results
=
tf
.
constant
([
1
,
-
1
,
-
2
])
input_tensor
=
tf
.
constant
([[
0
,
0.5
,
0
,
0.5
],
[
0
,
0
,
0.5
,
0.5
]],
dtype
=
tf
.
float32
)
expected_gathered_tensor
=
[[
0
,
0
,
0.5
,
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
match
=
matcher
.
Match
(
match_results
,
use_matmul_gather
=
True
)
gathered_tensor
=
match
.
gather_based_on_match
(
input_tensor
,
unmatched_value
=
tf
.
zeros
(
4
),
ignored_value
=
tf
.
zeros
(
4
))
self
.
assertEquals
(
gathered_tensor
.
dtype
,
tf
.
float32
)
with
self
.
test_session
()
as
sess
:
self
.
assertTrue
(
all
([
op
.
name
is
not
'Gather'
for
op
in
sess
.
graph
.
get_operations
()]))
gathered_tensor_out
=
gathered_tensor
.
eval
()
self
.
assertAllEqual
(
expected_gathered_tensor
,
gathered_tensor_out
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/core/model.py
View file @
fd7b6887
...
...
@@ -39,6 +39,17 @@ resize/reshaping necessary (see docstring for the preprocess function).
Output classes are always integers in the range [0, num_classes). Any mapping
of these integers to semantic labels is to be handled outside of this class.
Images are resized in the `preprocess` method. All of `preprocess`, `predict`,
and `postprocess` should be reentrant.
The `preprocess` method runs `image_resizer_fn` that returns resized_images and
`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros,
true_image_shapes indicate the slices that contain the image without padding.
This is useful for padding images to be a fixed size for batching.
The `postprocess` method uses the true image shapes to clip predictions that lie
outside of images.
By default, DetectionModels produce bounding box detections; However, we support
a handful of auxiliary annotations associated with each bounding box, namely,
instance masks and keypoints.
...
...
@@ -106,12 +117,12 @@ class DetectionModel(object):
This function is responsible for any scaling/shifting of input values that
is necessary prior to running the detector on an input image.
It is also responsible for any resizing that might be necessary
as images
are assumed to arrive in arbitrary sizes. While this function
could
conceivably be part of the predict method (below), it is often
convenient
to keep these separate --- for example, we may want to preprocess
on one
device, place onto a queue, and let another device (e.g., the GPU)
handle
prediction.
It is also responsible for any resizing
, padding
that might be necessary
as images
are assumed to arrive in arbitrary sizes. While this function
could
conceivably be part of the predict method (below), it is often
convenient
to keep these separate --- for example, we may want to preprocess
on one
device, place onto a queue, and let another device (e.g., the GPU)
handle
prediction.
A few important notes about the preprocess function:
+ We assume that this operation does not have any trainable variables nor
...
...
@@ -134,11 +145,15 @@ class DetectionModel(object):
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
"""
pass
@
abstractmethod
def
predict
(
self
,
preprocessed_inputs
):
def
predict
(
self
,
preprocessed_inputs
,
true_image_shapes
):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
...
...
@@ -146,6 +161,10 @@ class DetectionModel(object):
Args:
preprocessed_inputs: a [batch, height, width, channels] float32 tensor
representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
prediction_dict: a dictionary holding prediction tensors to be
...
...
@@ -154,7 +173,7 @@ class DetectionModel(object):
pass
@
abstractmethod
def
postprocess
(
self
,
prediction_dict
,
**
params
):
def
postprocess
(
self
,
prediction_dict
,
true_image_shapes
,
**
params
):
"""Convert predicted output tensors to final detections.
Outputs adhere to the following conventions:
...
...
@@ -172,6 +191,10 @@ class DetectionModel(object):
Args:
prediction_dict: a dictionary holding prediction tensors.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
**params: Additional keyword arguments for specific implementations of
DetectionModel.
...
...
@@ -190,7 +213,7 @@ class DetectionModel(object):
pass
@
abstractmethod
def
loss
(
self
,
prediction_dict
):
def
loss
(
self
,
prediction_dict
,
true_image_shapes
):
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
...
...
@@ -198,6 +221,10 @@ class DetectionModel(object):
Args:
prediction_dict: a dictionary holding predicted tensors
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
...
...
@@ -209,7 +236,8 @@ class DetectionModel(object):
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
=
None
,
groundtruth_keypoints_list
=
None
):
groundtruth_keypoints_list
=
None
,
groundtruth_weights_list
=
None
):
"""Provide groundtruth tensors.
Args:
...
...
@@ -230,10 +258,15 @@ class DetectionModel(object):
shape [num_boxes, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
"""
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
boxes
]
=
groundtruth_boxes_list
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
classes
]
=
groundtruth_classes_list
if
groundtruth_weights_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
weights
]
=
groundtruth_weights_list
if
groundtruth_masks_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
masks
]
=
groundtruth_masks_list
...
...
research/object_detection/core/post_processing.py
View file @
fd7b6887
...
...
@@ -20,6 +20,7 @@ import tensorflow as tf
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
shape_utils
def
multiclass_non_max_suppression
(
boxes
,
...
...
@@ -31,6 +32,7 @@ def multiclass_non_max_suppression(boxes,
clip_window
=
None
,
change_coordinate_frame
=
False
,
masks
=
None
,
boundaries
=
None
,
additional_fields
=
None
,
scope
=
None
):
"""Multi-class version of non maximum suppression.
...
...
@@ -66,6 +68,9 @@ def multiclass_non_max_suppression(boxes,
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
containing box masks. `q` can be either number of classes or 1 depending
on whether a separate mask is predicted per class.
boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
tensor containing box boundaries. `q` can be either number of classes or 1
depending on whether a separate boundary is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose first dimensions are all of size `k`. After non-maximum
suppression, all tensors corresponding to the selected boxes will be
...
...
@@ -114,6 +119,8 @@ def multiclass_non_max_suppression(boxes,
per_class_boxes_list
=
tf
.
unstack
(
boxes
,
axis
=
1
)
if
masks
is
not
None
:
per_class_masks_list
=
tf
.
unstack
(
masks
,
axis
=
1
)
if
boundaries
is
not
None
:
per_class_boundaries_list
=
tf
.
unstack
(
boundaries
,
axis
=
1
)
boxes_ids
=
(
range
(
num_classes
)
if
len
(
per_class_boxes_list
)
>
1
else
[
0
]
*
num_classes
)
for
class_idx
,
boxes_idx
in
zip
(
range
(
num_classes
),
boxes_ids
):
...
...
@@ -128,6 +135,10 @@ def multiclass_non_max_suppression(boxes,
per_class_masks
=
per_class_masks_list
[
boxes_idx
]
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
masks
,
per_class_masks
)
if
boundaries
is
not
None
:
per_class_boundaries
=
per_class_boundaries_list
[
boxes_idx
]
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
boundaries
,
per_class_boundaries
)
if
additional_fields
is
not
None
:
for
key
,
tensor
in
additional_fields
.
items
():
boxlist_and_class_scores
.
add_field
(
key
,
tensor
)
...
...
@@ -194,9 +205,12 @@ def batch_multiclass_non_max_suppression(boxes,
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip boxes to before performing non-max
suppression.
clip_window: A float32 tensor of shape [batch_size, 4] where each entry is
of the form [y_min, x_min, y_max, x_max] representing the window to clip
boxes to before performing non-max suppression. This argument can also be
a tensor of shape [4] in which case, the same clip window is applied to
all images in the batch. If clip_widow is None, all boxes are used to
perform non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
...
...
@@ -242,7 +256,9 @@ def batch_multiclass_non_max_suppression(boxes,
if
q
!=
1
and
q
!=
num_classes
:
raise
ValueError
(
'third dimension of boxes must be either 1 or equal '
'to the third dimension of scores'
)
if
change_coordinate_frame
and
clip_window
is
None
:
raise
ValueError
(
'if change_coordinate_frame is True, then a clip_window'
'must be specified.'
)
original_masks
=
masks
original_additional_fields
=
additional_fields
with
tf
.
name_scope
(
scope
,
'BatchMultiClassNonMaxSuppression'
):
...
...
@@ -266,6 +282,16 @@ def batch_multiclass_non_max_suppression(boxes,
masks_shape
=
tf
.
stack
([
batch_size
,
num_anchors
,
1
,
0
,
0
])
masks
=
tf
.
zeros
(
masks_shape
)
if
clip_window
is
None
:
clip_window
=
tf
.
stack
([
tf
.
reduce_min
(
boxes
[:,
:,
:,
0
]),
tf
.
reduce_min
(
boxes
[:,
:,
:,
1
]),
tf
.
reduce_max
(
boxes
[:,
:,
:,
2
]),
tf
.
reduce_max
(
boxes
[:,
:,
:,
3
])
])
if
clip_window
.
shape
.
ndims
==
1
:
clip_window
=
tf
.
tile
(
tf
.
expand_dims
(
clip_window
,
0
),
[
batch_size
,
1
])
if
additional_fields
is
None
:
additional_fields
=
{}
...
...
@@ -283,6 +309,9 @@ def batch_multiclass_non_max_suppression(boxes,
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
per_image_clip_window - A 1D float32 tensor of the form
[ymin, xmin, ymax, xmax] representing the window to clip the boxes
to.
per_image_additional_fields - (optional) A variable number of float32
tensors each with size [num_anchors, ...].
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
...
...
@@ -311,9 +340,10 @@ def batch_multiclass_non_max_suppression(boxes,
per_image_boxes
=
args
[
0
]
per_image_scores
=
args
[
1
]
per_image_masks
=
args
[
2
]
per_image_clip_window
=
args
[
3
]
per_image_additional_fields
=
{
key
:
value
for
key
,
value
in
zip
(
additional_fields
,
args
[
3
:
-
1
])
for
key
,
value
in
zip
(
additional_fields
,
args
[
4
:
-
1
])
}
per_image_num_valid_boxes
=
args
[
-
1
]
per_image_boxes
=
tf
.
reshape
(
...
...
@@ -345,7 +375,7 @@ def batch_multiclass_non_max_suppression(boxes,
iou_thresh
,
max_size_per_class
,
max_total_size
,
clip_window
=
clip_window
,
clip_window
=
per_image_
clip_window
,
change_coordinate_frame
=
change_coordinate_frame
,
masks
=
per_image_masks
,
additional_fields
=
per_image_additional_fields
)
...
...
@@ -367,10 +397,10 @@ def batch_multiclass_non_max_suppression(boxes,
num_additional_fields
=
len
(
additional_fields
)
num_nmsed_outputs
=
4
+
num_additional_fields
batch_outputs
=
tf
.
map_fn
(
batch_outputs
=
shape_utils
.
static_or_dynamic_
map_fn
(
_single_image_nms_fn
,
elems
=
([
boxes
,
scores
,
masks
]
+
list
(
additional_fields
.
values
())
+
[
num_valid_boxes
]),
elems
=
([
boxes
,
scores
,
masks
,
clip_window
]
+
list
(
additional_fields
.
values
())
+
[
num_valid_boxes
]),
dtype
=
(
num_nmsed_outputs
*
[
tf
.
float32
]
+
[
tf
.
int32
]),
parallel_iterations
=
parallel_iterations
)
...
...
research/object_detection/core/post_processing_test.py
View file @
fd7b6887
...
...
@@ -571,6 +571,125 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
self
.
assertAllClose
(
num_detections
,
[
2
,
3
])
def
test_batch_multiclass_nms_with_per_batch_clip_window
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
clip_window
=
tf
.
constant
([
0.
,
0.
,
200.
,
200.
])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
np
.
array
([[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]])
exp_nms_scores
=
np
.
array
([[.
95
,
.
9
,
0
,
0
],
[.
5
,
.
3
,
0
,
0
]])
exp_nms_classes
=
np
.
array
([[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
clip_window
=
clip_window
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
self
.
assertAllEqual
(
nmsed_classes
.
shape
.
as_list
(),
exp_nms_classes
.
shape
)
self
.
assertEqual
(
num_detections
.
shape
.
as_list
(),
[
2
])
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
num_detections
])
self
.
assertAllClose
(
nmsed_boxes
,
exp_nms_corners
)
self
.
assertAllClose
(
nmsed_scores
,
exp_nms_scores
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
self
.
assertAllClose
(
num_detections
,
[
2
,
2
])
def
test_batch_multiclass_nms_with_per_image_clip_window
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
clip_window
=
tf
.
constant
([[
0.
,
0.
,
5.
,
5.
],
[
0.
,
0.
,
200.
,
200.
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
np
.
array
([[[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]])
exp_nms_scores
=
np
.
array
([[.
9
,
0.
,
0.
,
0.
],
[.
5
,
.
3
,
0
,
0
]])
exp_nms_classes
=
np
.
array
([[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
clip_window
=
clip_window
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
self
.
assertAllEqual
(
nmsed_classes
.
shape
.
as_list
(),
exp_nms_classes
.
shape
)
self
.
assertEqual
(
num_detections
.
shape
.
as_list
(),
[
2
])
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
num_detections
])
self
.
assertAllClose
(
nmsed_boxes
,
exp_nms_corners
)
self
.
assertAllClose
(
nmsed_scores
,
exp_nms_scores
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
self
.
assertAllClose
(
num_detections
,
[
1
,
2
])
def
test_batch_multiclass_nms_with_masks
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
...
...
research/object_detection/core/preprocessor.py
View file @
fd7b6887
...
...
@@ -35,6 +35,27 @@ in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1]
To preprocess multiple images with the same operations in cases where
nondeterministic operations are used, a preprocessor_cache.PreprocessorCache
object can be passed into the preprocess function or individual operations.
All nondeterministic operations except random_jitter_boxes support caching.
E.g.
Let tensor_dict{1,2,3,4,5} be copies of the same inputs.
Let preprocess_options contain nondeterministic operation(s) excluding
random_jitter_boxes.
cache1 = preprocessor_cache.PreprocessorCache()
cache2 = preprocessor_cache.PreprocessorCache()
a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1)
b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1)
c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2)
d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2)
e = preprocess(tensor_dict5, preprocess_options)
Then correspondings tensors of object pairs (a,b) and (c,d)
are guaranteed to be equal element-wise, but the equality of any other object
pair cannot be determined.
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then
...
...
@@ -42,6 +63,8 @@ we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4.
"""
import
functools
import
inspect
import
sys
import
tensorflow
as
tf
...
...
@@ -50,44 +73,79 @@ from tensorflow.python.ops import control_flow_ops
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
keypoint_ops
from
object_detection.core
import
preprocessor_cache
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
shape_utils
def
_apply_with_random_selector
(
x
,
func
,
num_cases
):
def
_apply_with_random_selector
(
x
,
func
,
num_cases
,
preprocess_vars_cache
=
None
,
key
=
''
):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
If both preprocess_vars_cache AND key are the same between two calls, sel will
be the same value in both calls.
Args:
x: input Tensor.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: variable identifier for preprocess_vars_cache.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
rand_sel
=
tf
.
random_uniform
([],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
rand_sel
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
SELECTOR
,
preprocess_vars_cache
,
key
)
# Pass the real x only to one of the func calls.
return
control_flow_ops
.
merge
([
func
(
control_flow_ops
.
switch
(
x
,
tf
.
equal
(
rand_sel
,
case
))[
1
],
case
)
for
case
in
range
(
num_cases
)])[
0
]
def
_apply_with_random_selector_tuples
(
x
,
func
,
num_cases
):
def
_apply_with_random_selector_tuples
(
x
,
func
,
num_cases
,
preprocess_vars_cache
=
None
,
key
=
''
):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
If both preprocess_vars_cache AND key are the same between two calls, sel will
be the same value in both calls.
Args:
x: A tuple of input tensors.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: variable identifier for preprocess_vars_cache.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
num_inputs
=
len
(
x
)
rand_sel
=
tf
.
random_uniform
([],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
# Pass the real x only to one of the func calls.
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
rand_sel
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
SELECTOR_TUPLES
,
preprocess_vars_cache
,
key
)
# Pass the real x only to one of the func calls.
tuples
=
[
list
()
for
t
in
x
]
for
case
in
range
(
num_cases
):
new_x
=
[
control_flow_ops
.
switch
(
t
,
tf
.
equal
(
rand_sel
,
case
))[
1
]
for
t
in
x
]
...
...
@@ -100,6 +158,37 @@ def _apply_with_random_selector_tuples(x, func, num_cases):
return
tuple
(
tuples
)
def
_get_or_create_preprocess_rand_vars
(
generator_func
,
function_id
,
preprocess_vars_cache
,
key
=
''
):
"""Returns a tensor stored in preprocess_vars_cache or using generator_func.
If the tensor was previously generated and appears in the PreprocessorCache,
the previously generated tensor will be returned. Otherwise, a new tensor
is generated using generator_func and stored in the cache.
Args:
generator_func: A 0-argument function that generates a tensor.
function_id: identifier for the preprocessing function used.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: identifier for the variable stored.
Returns:
The generated tensor.
"""
if
preprocess_vars_cache
is
not
None
:
var
=
preprocess_vars_cache
.
get
(
function_id
,
key
)
if
var
is
None
:
var
=
generator_func
()
preprocess_vars_cache
.
update
(
function_id
,
key
,
var
)
else
:
var
=
generator_func
()
return
var
def
_random_integer
(
minval
,
maxval
,
seed
):
"""Returns a random 0-D tensor between minval and maxval.
...
...
@@ -115,6 +204,40 @@ def _random_integer(minval, maxval, seed):
[],
minval
=
minval
,
maxval
=
maxval
,
dtype
=
tf
.
int32
,
seed
=
seed
)
# TODO: This method is needed because the current
# tf.image.rgb_to_grayscale method does not support quantization. Replace with
# tf.image.rgb_to_grayscale after quantization support is added.
def
_rgb_to_grayscale
(
images
,
name
=
None
):
"""Converts one or more images from RGB to Grayscale.
Outputs a tensor of the same `DType` and rank as `images`. The size of the
last dimension of the output is 1, containing the Grayscale value of the
pixels.
Args:
images: The RGB tensor to convert. Last dimension must have size 3 and
should contain RGB values.
name: A name for the operation (optional).
Returns:
The converted grayscale image(s).
"""
with
tf
.
name_scope
(
name
,
'rgb_to_grayscale'
,
[
images
])
as
name
:
images
=
tf
.
convert_to_tensor
(
images
,
name
=
'images'
)
# Remember original dtype to so we can convert back if needed
orig_dtype
=
images
.
dtype
flt_image
=
tf
.
image
.
convert_image_dtype
(
images
,
tf
.
float32
)
# Reference for converting between RGB and grayscale.
# https://en.wikipedia.org/wiki/Luma_%28video%29
rgb_weights
=
[
0.2989
,
0.5870
,
0.1140
]
rank_1
=
tf
.
expand_dims
(
tf
.
rank
(
images
)
-
1
,
0
)
gray_float
=
tf
.
reduce_sum
(
flt_image
*
rgb_weights
,
rank_1
,
keepdims
=
True
)
gray_float
.
set_shape
(
images
.
get_shape
()[:
-
1
].
concatenate
([
1
]))
return
tf
.
image
.
convert_image_dtype
(
gray_float
,
orig_dtype
,
name
=
name
)
def
normalize_image
(
image
,
original_minval
,
original_maxval
,
target_minval
,
target_maxval
):
"""Normalizes pixel values in the image.
...
...
@@ -312,7 +435,8 @@ def random_horizontal_flip(image,
masks
=
None
,
keypoints
=
None
,
keypoint_flip_permutation
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly flips the image and detections horizontally.
The probability of flipping the image is 50%.
...
...
@@ -333,6 +457,10 @@ def random_horizontal_flip(image,
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
...
...
@@ -364,7 +492,12 @@ def random_horizontal_flip(image,
with
tf
.
name_scope
(
'RandomHorizontalFlip'
,
values
=
[
image
,
boxes
]):
result
=
[]
# random variable defining whether to do flip or not
do_a_flip_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_a_flip_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
HORIZONTAL_FLIP
,
preprocess_vars_cache
)
do_a_flip_random
=
tf
.
greater
(
do_a_flip_random
,
0.5
)
# flip image
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
...
...
@@ -399,7 +532,8 @@ def random_vertical_flip(image,
masks
=
None
,
keypoints
=
None
,
keypoint_flip_permutation
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly flips the image and detections vertically.
The probability of flipping the image is 50%.
...
...
@@ -420,6 +554,10 @@ def random_vertical_flip(image,
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
...
...
@@ -451,7 +589,11 @@ def random_vertical_flip(image,
with
tf
.
name_scope
(
'RandomVerticalFlip'
,
values
=
[
image
,
boxes
]):
result
=
[]
# random variable defining whether to do flip or not
do_a_flip_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_a_flip_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
VERTICAL_FLIP
,
preprocess_vars_cache
)
do_a_flip_random
=
tf
.
greater
(
do_a_flip_random
,
0.5
)
# flip image
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
...
...
@@ -485,7 +627,8 @@ def random_rotation90(image,
boxes
=
None
,
masks
=
None
,
keypoints
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly rotates the image and detections 90 degrees counter-clockwise.
The probability of rotating the image is 50%. This can be combined with
...
...
@@ -507,6 +650,10 @@ def random_rotation90(image,
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
...
...
@@ -532,7 +679,11 @@ def random_rotation90(image,
result
=
[]
# random variable defining whether to rotate by 90 degrees or not
do_a_rot90_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_a_rot90_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ROTATION90
,
preprocess_vars_cache
)
do_a_rot90_random
=
tf
.
greater
(
do_a_rot90_random
,
0.5
)
# flip image
image
=
tf
.
cond
(
do_a_rot90_random
,
lambda
:
_rot90_image
(
image
),
...
...
@@ -562,7 +713,11 @@ def random_rotation90(image,
return
tuple
(
result
)
def
random_pixel_value_scale
(
image
,
minval
=
0.9
,
maxval
=
1.1
,
seed
=
None
):
def
random_pixel_value_scale
(
image
,
minval
=
0.9
,
maxval
=
1.1
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Scales each value in the pixels of the image.
This function scales each pixel independent of the other ones.
...
...
@@ -575,17 +730,24 @@ def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
minval: lower ratio of scaling pixel values.
maxval: upper ratio of scaling pixel values.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'RandomPixelValueScale'
,
values
=
[
image
]):
color_coef
=
tf
.
random_uniform
(
tf
.
shape
(
image
),
minval
=
minval
,
maxval
=
maxval
,
dtype
=
tf
.
float32
,
seed
=
seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
tf
.
shape
(
image
),
minval
=
minval
,
maxval
=
maxval
,
dtype
=
tf
.
float32
,
seed
=
seed
)
color_coef
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
PIXEL_VALUE_SCALE
,
preprocess_vars_cache
)
image
=
tf
.
multiply
(
image
,
color_coef
)
image
=
tf
.
clip_by_value
(
image
,
0.0
,
1.0
)
...
...
@@ -596,7 +758,8 @@ def random_image_scale(image,
masks
=
None
,
min_scale_ratio
=
0.5
,
max_scale_ratio
=
2.0
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Scales the image size.
Args:
...
...
@@ -607,6 +770,10 @@ def random_image_scale(image,
min_scale_ratio: minimum scaling ratio.
max_scale_ratio: maximum scaling ratio.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
...
...
@@ -618,10 +785,14 @@ def random_image_scale(image,
image_shape
=
tf
.
shape
(
image
)
image_height
=
image_shape
[
0
]
image_width
=
image_shape
[
1
]
size_coef
=
tf
.
random_uniform
([],
minval
=
min_scale_ratio
,
maxval
=
max_scale_ratio
,
dtype
=
tf
.
float32
,
seed
=
seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
minval
=
min_scale_ratio
,
maxval
=
max_scale_ratio
,
dtype
=
tf
.
float32
,
seed
=
seed
)
size_coef
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
IMAGE_SCALE
,
preprocess_vars_cache
)
image_newysize
=
tf
.
to_int32
(
tf
.
multiply
(
tf
.
to_float
(
image_height
),
size_coef
))
image_newxsize
=
tf
.
to_int32
(
...
...
@@ -636,7 +807,10 @@ def random_image_scale(image,
return
tuple
(
result
)
def
random_rgb_to_gray
(
image
,
probability
=
0.1
,
seed
=
None
):
def
random_rgb_to_gray
(
image
,
probability
=
0.1
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Changes the image from RGB to Grayscale with the given probability.
Args:
...
...
@@ -645,18 +819,25 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
probability: the probability of returning a grayscale image.
The probability should be a number between [0, 1].
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
def
_image_to_gray
(
image
):
image_gray1
=
tf
.
image
.
rgb_to_grayscale
(
image
)
image_gray1
=
_
rgb_to_grayscale
(
image
)
image_gray3
=
tf
.
image
.
grayscale_to_rgb
(
image_gray1
)
return
image_gray3
with
tf
.
name_scope
(
'RandomRGBtoGray'
,
values
=
[
image
]):
# random variable defining whether to do flip or not
do_gray_random
=
tf
.
random_uniform
([],
seed
=
seed
)
# random variable defining whether to change to grayscale or not
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_gray_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
RGB_TO_GRAY
,
preprocess_vars_cache
)
image
=
tf
.
cond
(
tf
.
greater
(
do_gray_random
,
probability
),
lambda
:
image
,
...
...
@@ -665,7 +846,10 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
return
image
def
random_adjust_brightness
(
image
,
max_delta
=
0.2
):
def
random_adjust_brightness
(
image
,
max_delta
=
0.2
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adjusts brightness.
Makes sure the output image is still between 0 and 1.
...
...
@@ -674,18 +858,34 @@ def random_adjust_brightness(image, max_delta=0.2):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: how much to change the brightness. A value between [0, 1).
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
boxes: boxes which is the same shape as input boxes.
"""
with
tf
.
name_scope
(
'RandomAdjustBrightness'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
-
max_delta
,
max_delta
,
seed
=
seed
)
delta
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADJUST_BRIGHTNESS
,
preprocess_vars_cache
)
image
=
tf
.
image
.
adjust_brightness
(
image
,
delta
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
def
random_adjust_contrast
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
):
def
random_adjust_contrast
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adjusts contrast.
Makes sure the output image is still between 0 and 1.
...
...
@@ -697,17 +897,31 @@ def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
max_delta: how much to change the contrast. Contrast will change with a
value between min_delta and max_delta. This value will be
multiplied to the current contrast of the image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'RandomAdjustContrast'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_contrast
(
image
,
min_delta
,
max_delta
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
min_delta
,
max_delta
,
seed
=
seed
)
contrast_factor
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADJUST_CONTRAST
,
preprocess_vars_cache
)
image
=
tf
.
image
.
adjust_contrast
(
image
,
contrast_factor
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
def
random_adjust_hue
(
image
,
max_delta
=
0.02
):
def
random_adjust_hue
(
image
,
max_delta
=
0.02
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adjusts hue.
Makes sure the output image is still between 0 and 1.
...
...
@@ -716,17 +930,31 @@ def random_adjust_hue(image, max_delta=0.02):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: change hue randomly with a value between 0 and max_delta.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'RandomAdjustHue'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
-
max_delta
,
max_delta
,
seed
=
seed
)
delta
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADJUST_HUE
,
preprocess_vars_cache
)
image
=
tf
.
image
.
adjust_hue
(
image
,
delta
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
def
random_adjust_saturation
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
):
def
random_adjust_saturation
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adjusts saturation.
Makes sure the output image is still between 0 and 1.
...
...
@@ -738,17 +966,28 @@ def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
max_delta: how much to change the saturation. Saturation will change with a
value between min_delta and max_delta. This value will be
multiplied to the current saturation of the image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'RandomAdjustSaturation'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_saturation
(
image
,
min_delta
,
max_delta
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
min_delta
,
max_delta
,
seed
=
seed
)
saturation_factor
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADJUST_SATURATION
,
preprocess_vars_cache
)
image
=
tf
.
image
.
adjust_saturation
(
image
,
saturation_factor
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
def
random_distort_color
(
image
,
color_ordering
=
0
):
def
random_distort_color
(
image
,
color_ordering
=
0
,
preprocess_vars_cache
=
None
):
"""Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast
...
...
@@ -758,6 +997,10 @@ def random_distort_color(image, color_ordering=0):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0, 1).
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same shape as input image.
...
...
@@ -767,20 +1010,34 @@ def random_distort_color(image, color_ordering=0):
"""
with
tf
.
name_scope
(
'RandomDistortColor'
,
values
=
[
image
]):
if
color_ordering
==
0
:
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
=
32.
/
255.
)
image
=
tf
.
image
.
random_saturation
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
=
0.2
)
image
=
tf
.
image
.
random_contrast
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
=
random_adjust_brightness
(
image
,
max_delta
=
32.
/
255.
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_saturation
(
image
,
min_delta
=
0.5
,
max_delta
=
1.5
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_hue
(
image
,
max_delta
=
0.2
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_contrast
(
image
,
min_delta
=
0.5
,
max_delta
=
1.5
,
preprocess_vars_cache
=
preprocess_vars_cache
)
elif
color_ordering
==
1
:
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
=
32.
/
255.
)
image
=
tf
.
image
.
random_contrast
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
=
tf
.
image
.
random_saturation
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
=
0.2
)
image
=
random_adjust_brightness
(
image
,
max_delta
=
32.
/
255.
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_contrast
(
image
,
min_delta
=
0.5
,
max_delta
=
1.5
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_saturation
(
image
,
min_delta
=
0.5
,
max_delta
=
1.5
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_hue
(
image
,
max_delta
=
0.2
,
preprocess_vars_cache
=
preprocess_vars_cache
)
else
:
raise
ValueError
(
'color_ordering must be in {0, 1}'
)
# The random_* ops do not necessarily clamp.
image
=
tf
.
clip_by_value
(
image
,
0.0
,
1.0
)
return
image
...
...
@@ -845,7 +1102,8 @@ def _strict_random_crop_image(image,
min_object_covered
=
1.0
,
aspect_ratio_range
=
(
0.75
,
1.33
),
area_range
=
(
0.1
,
1.0
),
overlap_thresh
=
0.3
):
overlap_thresh
=
0.3
,
preprocess_vars_cache
=
None
):
"""Performs random crop.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
...
...
@@ -878,6 +1136,10 @@ def _strict_random_crop_image(image,
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
...
...
@@ -900,7 +1162,8 @@ def _strict_random_crop_image(image,
tf
.
clip_by_value
(
boxes
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
),
1
)
sample_distorted_bounding_box
=
tf
.
image
.
sample_distorted_bounding_box
(
generator_func
=
functools
.
partial
(
tf
.
image
.
sample_distorted_bounding_box
,
image_shape
,
bounding_boxes
=
boxes_expanded
,
min_object_covered
=
min_object_covered
,
...
...
@@ -909,6 +1172,13 @@ def _strict_random_crop_image(image,
max_attempts
=
100
,
use_image_if_no_bounding_boxes
=
True
)
# for ssd cropping, each value of min_object_covered has its own
# cached random variable
sample_distorted_bounding_box
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
STRICT_CROP_IMAGE
,
preprocess_vars_cache
,
key
=
min_object_covered
)
im_box_begin
,
im_box_size
,
im_box
=
sample_distorted_bounding_box
new_image
=
tf
.
slice
(
image
,
im_box_begin
,
im_box_size
)
...
...
@@ -984,7 +1254,8 @@ def random_crop_image(image,
area_range
=
(
0.1
,
1.0
),
overlap_thresh
=
0.3
,
random_coef
=
0.0
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly crops the image.
Given the input image and its bounding boxes, this op randomly
...
...
@@ -1029,6 +1300,10 @@ def random_crop_image(image,
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: Image shape will be [new_height, new_width, channels].
...
...
@@ -1056,13 +1331,17 @@ def random_crop_image(image,
min_object_covered
=
min_object_covered
,
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
overlap_thresh
=
overlap_thresh
)
overlap_thresh
=
overlap_thresh
,
preprocess_vars_cache
=
preprocess_vars_cache
)
# avoids tf.cond to make faster RCNN training on borg. See b/140057645.
if
random_coef
<
sys
.
float_info
.
min
:
result
=
strict_random_crop_image_fn
()
else
:
do_a_crop_random
=
tf
.
random_uniform
([],
seed
=
seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_a_crop_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
CROP_IMAGE
,
preprocess_vars_cache
)
do_a_crop_random
=
tf
.
greater
(
do_a_crop_random
,
random_coef
)
outputs
=
[
image
,
boxes
,
labels
]
...
...
@@ -1084,7 +1363,8 @@ def random_pad_image(image,
min_image_size
=
None
,
max_image_size
=
None
,
pad_color
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly pads the image.
This function randomly pads the image with zeros. The final size of the
...
...
@@ -1110,8 +1390,11 @@ def random_pad_image(image,
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the input
image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: Image shape will be [new_height, new_width, channels].
...
...
@@ -1155,6 +1438,12 @@ def random_pad_image(image,
lambda
:
_random_integer
(
0
,
target_width
-
image_width
,
seed
),
lambda
:
tf
.
constant
(
0
,
dtype
=
tf
.
int32
))
gen_func
=
lambda
:
(
target_height
,
target_width
,
offset_height
,
offset_width
)
params
=
_get_or_create_preprocess_rand_vars
(
gen_func
,
preprocessor_cache
.
PreprocessorCache
.
PAD_IMAGE
,
preprocess_vars_cache
)
target_height
,
target_width
,
offset_height
,
offset_width
=
params
new_image
=
tf
.
image
.
pad_to_bounding_box
(
image
,
offset_height
=
offset_height
,
...
...
@@ -1200,7 +1489,8 @@ def random_crop_pad_image(image,
min_padded_size_ratio
=
(
1.0
,
1.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
pad_color
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly crops and pads the image.
Given an input image and its bounding boxes, this op first randomly crops
...
...
@@ -1241,6 +1531,10 @@ def random_crop_pad_image(image,
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
padded_image: padded image.
...
...
@@ -1263,7 +1557,8 @@ def random_crop_pad_image(image,
area_range
=
area_range
,
overlap_thresh
=
overlap_thresh
,
random_coef
=
random_coef
,
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
cropped_image
,
cropped_boxes
,
cropped_labels
=
result
[:
3
]
...
...
@@ -1280,7 +1575,8 @@ def random_crop_pad_image(image,
min_image_size
=
min_image_size
,
max_image_size
=
max_image_size
,
pad_color
=
pad_color
,
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
cropped_padded_output
=
(
padded_image
,
padded_boxes
,
cropped_labels
)
...
...
@@ -1299,7 +1595,8 @@ def random_crop_to_aspect_ratio(image,
keypoints
=
None
,
aspect_ratio
=
1.0
,
overlap_thresh
=
0.3
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly crops an image to the specified aspect ratio.
Randomly crops the a portion of the image such that the crop is of the
...
...
@@ -1331,6 +1628,10 @@ def random_crop_to_aspect_ratio(image,
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
...
...
@@ -1374,6 +1675,13 @@ def random_crop_to_aspect_ratio(image,
# offset_height is randomly chosen from [0, offset_height - target_height)
offset_height
=
_random_integer
(
0
,
orig_height
-
target_height
+
1
,
seed
)
offset_width
=
_random_integer
(
0
,
orig_width
-
target_width
+
1
,
seed
)
generator_func
=
lambda
:
(
offset_height
,
offset_width
)
offset_height
,
offset_width
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
CROP_TO_ASPECT_RATIO
,
preprocess_vars_cache
)
new_image
=
tf
.
image
.
crop_to_bounding_box
(
image
,
offset_height
,
offset_width
,
target_height
,
target_width
)
...
...
@@ -1436,7 +1744,8 @@ def random_pad_to_aspect_ratio(image,
aspect_ratio
=
1.0
,
min_padded_size_ratio
=
(
1.0
,
1.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly zero pads an image to the specified aspect ratio.
Pads the image so that the resulting image will have the specified aspect
...
...
@@ -1464,6 +1773,10 @@ def random_pad_to_aspect_ratio(image,
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
...
...
@@ -1510,7 +1823,13 @@ def random_pad_to_aspect_ratio(image,
min_scale
=
tf
.
maximum
(
min_height
/
target_height
,
min_width
/
target_width
)
max_scale
=
tf
.
minimum
(
max_height
/
target_height
,
max_width
/
target_width
)
scale
=
tf
.
random_uniform
([],
min_scale
,
max_scale
,
seed
=
seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
min_scale
,
max_scale
,
seed
=
seed
)
scale
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
PAD_TO_ASPECT_RATIO
,
preprocess_vars_cache
)
target_height
=
scale
*
target_height
target_width
=
scale
*
target_width
...
...
@@ -1549,7 +1868,8 @@ def random_black_patches(image,
max_black_patches
=
10
,
probability
=
0.5
,
size_to_image_ratio
=
0.1
,
random_seed
=
None
):
random_seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adds some black patches to the image.
This op adds up to max_black_patches square black patches of a fixed size
...
...
@@ -1566,15 +1886,20 @@ def random_black_patches(image,
box_size = size_to_image_ratio *
min(image_width, image_height)
random_seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image
"""
def
add_black_patch_to_image
(
image
):
def
add_black_patch_to_image
(
image
,
idx
):
"""Function for adding one patch to the image.
Args:
image: image
idx: counter for number of patches that could have been added
Returns:
image with a randomly added black box
...
...
@@ -1586,10 +1911,19 @@ def random_black_patches(image,
tf
.
multiply
(
tf
.
minimum
(
tf
.
to_float
(
image_height
),
tf
.
to_float
(
image_width
)),
size_to_image_ratio
))
normalized_y_min
=
tf
.
random_uniform
(
[],
minval
=
0.0
,
maxval
=
(
1.0
-
size_to_image_ratio
),
seed
=
random_seed
)
normalized_x_min
=
tf
.
random_uniform
(
[],
minval
=
0.0
,
maxval
=
(
1.0
-
size_to_image_ratio
),
seed
=
random_seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
minval
=
0.0
,
maxval
=
(
1.0
-
size_to_image_ratio
),
seed
=
random_seed
)
normalized_y_min
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADD_BLACK_PATCH
,
preprocess_vars_cache
,
key
=
str
(
idx
)
+
'y'
)
normalized_x_min
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADD_BLACK_PATCH
,
preprocess_vars_cache
,
key
=
str
(
idx
)
+
'x'
)
y_min
=
tf
.
to_int32
(
normalized_y_min
*
tf
.
to_float
(
image_height
))
x_min
=
tf
.
to_int32
(
normalized_x_min
*
tf
.
to_float
(
image_width
))
black_box
=
tf
.
ones
([
box_size
,
box_size
,
3
],
dtype
=
tf
.
float32
)
...
...
@@ -1599,13 +1933,17 @@ def random_black_patches(image,
return
image
with
tf
.
name_scope
(
'RandomBlackPatchInImage'
,
values
=
[
image
]):
for
_
in
range
(
max_black_patches
):
random_prob
=
tf
.
random_uniform
(
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
,
seed
=
random_seed
)
for
idx
in
range
(
max_black_patches
):
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
,
seed
=
random_seed
)
random_prob
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
BLACK_PATCHES
,
preprocess_vars_cache
,
key
=
idx
)
image
=
tf
.
cond
(
tf
.
greater
(
random_prob
,
probability
),
lambda
:
image
,
lambda
:
add_black_patch_to_image
(
image
))
functools
.
partial
(
add_black_patch_to_image
,
image
=
image
,
idx
=
idx
))
return
image
...
...
@@ -1623,12 +1961,16 @@ def image_to_float(image):
return
image
def
random_resize_method
(
image
,
target_size
):
def
random_resize_method
(
image
,
target_size
,
preprocess_vars_cache
=
None
):
"""Uses a random resize method to resize the image to target size.
Args:
image: a rank 3 tensor.
target_size: a list of [target_height, target_width]
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
resized image.
...
...
@@ -1637,7 +1979,9 @@ def random_resize_method(image, target_size):
resized_image
=
_apply_with_random_selector
(
image
,
lambda
x
,
method
:
tf
.
image
.
resize_images
(
x
,
target_size
,
method
),
num_cases
=
4
)
num_cases
=
4
,
preprocess_vars_cache
=
preprocess_vars_cache
,
key
=
preprocessor_cache
.
PreprocessorCache
.
RESIZE_METHOD
)
return
resized_image
...
...
@@ -1647,6 +1991,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
image_shape
=
image
.
get_shape
().
as_list
()
orig_height
=
image_shape
[
0
]
orig_width
=
image_shape
[
1
]
num_channels
=
image_shape
[
2
]
orig_min_dim
=
min
(
orig_height
,
orig_width
)
# Calculates the larger of the possible sizes
large_scale_factor
=
min_dimension
/
float
(
orig_min_dim
)
...
...
@@ -1674,7 +2019,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
new_size
=
small_size
else
:
new_size
=
large_size
return
tf
.
constant
(
new_size
)
return
tf
.
constant
(
new_size
+
[
num_channels
]
)
def
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
):
...
...
@@ -1682,6 +2027,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
image_shape
=
tf
.
shape
(
image
)
orig_height
=
tf
.
to_float
(
image_shape
[
0
])
orig_width
=
tf
.
to_float
(
image_shape
[
1
])
num_channels
=
image_shape
[
2
]
orig_min_dim
=
tf
.
minimum
(
orig_height
,
orig_width
)
# Calculates the larger of the possible sizes
min_dimension
=
tf
.
constant
(
min_dimension
,
dtype
=
tf
.
float32
)
...
...
@@ -1711,7 +2057,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
lambda
:
small_size
,
lambda
:
large_size
)
else
:
new_size
=
large_size
return
new_size
return
tf
.
stack
(
tf
.
unstack
(
new_size
)
+
[
num_channels
])
def
resize_to_range
(
image
,
...
...
@@ -1719,7 +2065,8 @@ def resize_to_range(image,
min_dimension
=
None
,
max_dimension
=
None
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
align_corners
=
False
):
align_corners
=
False
,
pad_to_max_dimension
=
False
):
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
...
...
@@ -1740,15 +2087,22 @@ def resize_to_range(image,
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros
so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded
similarly.
Returns:
A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
If masks is not None, also outputs masks:
A 3D tensor of shape [num_instances, new_height, new_width]
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width].
resized_image_shape: A 1D tensor of shape [3] containing shape of the
resized image.
Raises:
ValueError: if the image is not a 3D tensor.
...
...
@@ -1762,16 +2116,27 @@ def resize_to_range(image,
else
:
new_size
=
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
)
new_image
=
tf
.
image
.
resize_images
(
image
,
new_size
,
method
=
method
,
align_corners
=
align_corners
)
image
,
new_size
[:
-
1
]
,
method
=
method
,
align_corners
=
align_corners
)
result
=
new_image
if
pad_to_max_dimension
:
new_image
=
tf
.
image
.
pad_to_bounding_box
(
new_image
,
0
,
0
,
max_dimension
,
max_dimension
)
result
=
[
new_image
]
if
masks
is
not
None
:
new_masks
=
tf
.
expand_dims
(
masks
,
3
)
new_masks
=
tf
.
image
.
resize_nearest_neighbor
(
new_masks
,
new_size
,
align_corners
=
align_corners
)
new_masks
=
tf
.
image
.
resize_images
(
new_masks
,
new_size
[:
-
1
],
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
,
align_corners
=
align_corners
)
new_masks
=
tf
.
squeeze
(
new_masks
,
3
)
result
=
[
new_image
,
new_masks
]
if
pad_to_max_dimension
:
new_masks
=
tf
.
image
.
pad_to_bounding_box
(
new_masks
,
0
,
0
,
max_dimension
,
max_dimension
)
result
.
append
(
new_masks
)
result
.
append
(
new_size
)
return
result
...
...
@@ -1789,10 +2154,13 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
min_dimension: minimum image dimension.
Returns:
a tuple containing the following:
Resized image. A tensor of size [new_height, new_width, channels].
(optional) Resized masks. A tensor of
size [num_instances, new_height, new_width].
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A tensor of size [new_height, new_width, channels].
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width]
resized_image_shape: A 1D tensor of shape [3] containing the shape of the
resized image.
Raises:
ValueError: if the image is not a 3D tensor.
...
...
@@ -1803,6 +2171,7 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
with
tf
.
name_scope
(
'ResizeGivenMinDimension'
,
values
=
[
image
,
min_dimension
]):
image_height
=
tf
.
shape
(
image
)[
0
]
image_width
=
tf
.
shape
(
image
)[
1
]
num_channels
=
tf
.
shape
(
image
)[
2
]
min_image_dimension
=
tf
.
minimum
(
image_height
,
image_width
)
min_target_dimension
=
tf
.
maximum
(
min_image_dimension
,
min_dimension
)
target_ratio
=
tf
.
to_float
(
min_target_dimension
)
/
tf
.
to_float
(
...
...
@@ -1813,13 +2182,16 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
tf
.
expand_dims
(
image
,
axis
=
0
),
size
=
[
target_height
,
target_width
],
align_corners
=
True
)
result
=
tf
.
squeeze
(
image
,
axis
=
0
)
result
=
[
tf
.
squeeze
(
image
,
axis
=
0
)]
if
masks
is
not
None
:
masks
=
tf
.
image
.
resize_nearest_neighbor
(
tf
.
expand_dims
(
masks
,
axis
=
3
),
size
=
[
target_height
,
target_width
],
align_corners
=
True
)
result
=
(
result
,
tf
.
squeeze
(
masks
,
axis
=
3
))
result
.
append
(
tf
.
squeeze
(
masks
,
axis
=
3
))
result
.
append
(
tf
.
stack
([
target_height
,
target_width
,
num_channels
]))
return
result
...
...
@@ -1854,6 +2226,8 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
return
tuple
(
result
)
# TODO: Investigate if instead the function should return None if
# masks is None.
# pylint: disable=g-doc-return-or-yield
def
resize_image
(
image
,
masks
=
None
,
...
...
@@ -1861,7 +2235,28 @@ def resize_image(image,
new_width
=
1024
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
align_corners
=
False
):
"""See `tf.image.resize_images` for detailed doc."""
"""Resizes images to the given height and width.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
new_height: (optional) (scalar) desired height of the image.
new_width: (optional) (scalar) desired width of the image.
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
Returns:
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A tensor of size [new_height, new_width, channels].
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width]
resized_image_shape: A 1D tensor of shape [3] containing the shape of the
resized image.
"""
with
tf
.
name_scope
(
'ResizeImage'
,
values
=
[
image
,
new_height
,
new_width
,
method
,
align_corners
]):
...
...
@@ -1869,7 +2264,8 @@ def resize_image(image,
image
,
[
new_height
,
new_width
],
method
=
method
,
align_corners
=
align_corners
)
result
=
new_image
image_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
image
)
result
=
[
new_image
]
if
masks
is
not
None
:
num_instances
=
tf
.
shape
(
masks
)[
0
]
new_size
=
tf
.
constant
([
new_height
,
new_width
],
dtype
=
tf
.
int32
)
...
...
@@ -1886,8 +2282,9 @@ def resize_image(image,
masks
=
tf
.
cond
(
num_instances
>
0
,
resize_masks_branch
,
reshape_masks_branch
)
result
=
[
new_image
,
masks
]
result
.
append
(
masks
)
result
.
append
(
tf
.
stack
([
new_height
,
new_width
,
image_shape
[
2
]]))
return
result
...
...
@@ -1946,7 +2343,7 @@ def rgb_to_gray(image):
Returns:
image: A single channel grayscale image -> [image, height, 1].
"""
return
tf
.
image
.
rgb_to_grayscale
(
image
)
return
_
rgb_to_grayscale
(
image
)
def
ssd_random_crop
(
image
,
...
...
@@ -1960,7 +2357,8 @@ def ssd_random_crop(image,
area_range
=
((
0.1
,
1.0
),)
*
7
,
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
7
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
...
...
@@ -1994,6 +2392,10 @@ def ssd_random_crop(image,
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
...
...
@@ -2046,14 +2448,17 @@ def ssd_random_crop(image,
area_range
=
area_range
[
index
],
overlap_thresh
=
overlap_thresh
[
index
],
random_coef
=
random_coef
[
index
],
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
result
=
_apply_with_random_selector_tuples
(
tuple
(
t
for
t
in
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
)
if
t
is
not
None
),
random_crop_selector
,
num_cases
=
len
(
min_object_covered
))
num_cases
=
len
(
min_object_covered
),
preprocess_vars_cache
=
preprocess_vars_cache
,
key
=
preprocessor_cache
.
PreprocessorCache
.
SSD_CROP_SELECTOR_ID
)
return
result
...
...
@@ -2069,7 +2474,8 @@ def ssd_random_crop_pad(image,
min_padded_size_ratio
=
((
1.0
,
1.0
),)
*
6
,
max_padded_size_ratio
=
((
2.0
,
2.0
),)
*
6
,
pad_color
=
(
None
,)
*
6
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
...
...
@@ -2105,6 +2511,10 @@ def ssd_random_crop_pad(image,
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: Image shape will be [new_height, new_width, channels].
...
...
@@ -2134,12 +2544,15 @@ def ssd_random_crop_pad(image,
min_padded_size_ratio
=
min_padded_size_ratio
[
index
],
max_padded_size_ratio
=
max_padded_size_ratio
[
index
],
pad_color
=
pad_color
[
index
],
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
return
_apply_with_random_selector_tuples
(
tuple
(
t
for
t
in
(
image
,
boxes
,
labels
,
label_scores
)
if
t
is
not
None
),
random_crop_pad_selector
,
num_cases
=
len
(
min_object_covered
))
num_cases
=
len
(
min_object_covered
),
preprocess_vars_cache
=
preprocess_vars_cache
,
key
=
preprocessor_cache
.
PreprocessorCache
.
SSD_CROP_PAD_SELECTOR_ID
)
def
ssd_random_crop_fixed_aspect_ratio
(
...
...
@@ -2154,7 +2567,8 @@ def ssd_random_crop_fixed_aspect_ratio(
area_range
=
((
0.1
,
1.0
),)
*
7
,
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
7
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
...
...
@@ -2191,6 +2605,10 @@ def ssd_random_crop_fixed_aspect_ratio(
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
...
...
@@ -2209,7 +2627,8 @@ def ssd_random_crop_fixed_aspect_ratio(
crop_result
=
ssd_random_crop
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
,
min_object_covered
,
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
)
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
,
preprocess_vars_cache
)
i
=
3
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_label_scores
=
None
...
...
@@ -2231,7 +2650,8 @@ def ssd_random_crop_fixed_aspect_ratio(
new_masks
,
new_keypoints
,
aspect_ratio
=
aspect_ratio
,
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
return
result
...
...
@@ -2251,7 +2671,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
random_coef
=
(
0.15
,)
*
7
,
min_padded_size_ratio
=
(
1.0
,
1.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Random crop and pad preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
...
...
@@ -2294,6 +2715,10 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
image: image which is the same rank as input image.
...
...
@@ -2310,7 +2735,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
"""
crop_result
=
ssd_random_crop
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
,
min_object_covered
,
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
)
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
,
preprocess_vars_cache
)
i
=
3
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_label_scores
=
None
...
...
@@ -2332,7 +2758,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
aspect_ratio
=
aspect_ratio
,
min_padded_size_ratio
=
min_padded_size_ratio
,
max_padded_size_ratio
=
max_padded_size_ratio
,
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
result
=
list
(
result
)
if
new_label_scores
is
not
None
:
...
...
@@ -2480,7 +2907,10 @@ def get_default_func_arg_map(include_label_scores=False,
return
prep_func_arg_map
def
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
None
):
def
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
None
,
preprocess_vars_cache
=
None
):
"""Preprocess images and bounding boxes.
Various types of preprocessing (to be implemented) based on the
...
...
@@ -2505,6 +2935,10 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
their values.
func_arg_map: mapping from preprocessing functions to arguments that they
expect to receive and return.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
tensor_dict: which contains the preprocessed images, bounding boxes, etc.
...
...
@@ -2544,6 +2978,9 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
return
tensor_dict
[
key
]
if
key
is
not
None
else
None
args
=
[
get_arg
(
a
)
for
a
in
arg_names
]
if
(
preprocess_vars_cache
is
not
None
and
'preprocess_vars_cache'
in
inspect
.
getargspec
(
func
).
args
):
params
[
'preprocess_vars_cache'
]
=
preprocess_vars_cache
results
=
func
(
*
args
,
**
params
)
if
not
isinstance
(
results
,
(
list
,
tuple
)):
results
=
(
results
,)
...
...
research/object_detection/core/preprocessor_cache.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Records previous preprocessing operations and allows them to be repeated.
Used with object_detection.core.preprocessor. Passing a PreprocessorCache
into individual data augmentation functions or the general preprocess() function
will store all randomly generated variables in the PreprocessorCache. When
a preprocessor function is called multiple times with the same
PreprocessorCache object, that function will perform the same augmentation
on all calls.
"""
from
collections
import
defaultdict
class
PreprocessorCache
(
object
):
"""Dictionary wrapper storing random variables generated during preprocessing.
"""
# Constant keys representing different preprocessing functions
ROTATION90
=
'rotation90'
HORIZONTAL_FLIP
=
'horizontal_flip'
VERTICAL_FLIP
=
'vertical_flip'
PIXEL_VALUE_SCALE
=
'pixel_value_scale'
IMAGE_SCALE
=
'image_scale'
RGB_TO_GRAY
=
'rgb_to_gray'
ADJUST_BRIGHTNESS
=
'adjust_brightness'
ADJUST_CONTRAST
=
'adjust_contrast'
ADJUST_HUE
=
'adjust_hue'
ADJUST_SATURATION
=
'adjust_saturation'
DISTORT_COLOR
=
'distort_color'
STRICT_CROP_IMAGE
=
'strict_crop_image'
CROP_IMAGE
=
'crop_image'
PAD_IMAGE
=
'pad_image'
CROP_TO_ASPECT_RATIO
=
'crop_to_aspect_ratio'
RESIZE_METHOD
=
'resize_method'
PAD_TO_ASPECT_RATIO
=
'pad_to_aspect_ratio'
BLACK_PATCHES
=
'black_patches'
ADD_BLACK_PATCH
=
'add_black_patch'
SELECTOR
=
'selector'
SELECTOR_TUPLES
=
'selector_tuples'
SSD_CROP_SELECTOR_ID
=
'ssd_crop_selector_id'
SSD_CROP_PAD_SELECTOR_ID
=
'ssd_crop_pad_selector_id'
# 23 permitted function ids
_VALID_FNS
=
[
ROTATION90
,
HORIZONTAL_FLIP
,
VERTICAL_FLIP
,
PIXEL_VALUE_SCALE
,
IMAGE_SCALE
,
RGB_TO_GRAY
,
ADJUST_BRIGHTNESS
,
ADJUST_CONTRAST
,
ADJUST_HUE
,
ADJUST_SATURATION
,
DISTORT_COLOR
,
STRICT_CROP_IMAGE
,
CROP_IMAGE
,
PAD_IMAGE
,
CROP_TO_ASPECT_RATIO
,
RESIZE_METHOD
,
PAD_TO_ASPECT_RATIO
,
BLACK_PATCHES
,
ADD_BLACK_PATCH
,
SELECTOR
,
SELECTOR_TUPLES
,
SSD_CROP_SELECTOR_ID
,
SSD_CROP_PAD_SELECTOR_ID
]
def
__init__
(
self
):
self
.
_history
=
defaultdict
(
dict
)
def
clear
(
self
):
"""Resets cache."""
self
.
_history
=
{}
def
get
(
self
,
function_id
,
key
):
"""Gets stored value given a function id and key.
Args:
function_id: identifier for the preprocessing function used.
key: identifier for the variable stored.
Returns:
value: the corresponding value, expected to be a tensor or
nested structure of tensors.
Raises:
ValueError: if function_id is not one of the 23 valid function ids.
"""
if
function_id
not
in
self
.
_VALID_FNS
:
raise
ValueError
(
'Function id not recognized: %s.'
%
str
(
function_id
))
return
self
.
_history
[
function_id
].
get
(
key
)
def
update
(
self
,
function_id
,
key
,
value
):
"""Adds a value to the dictionary.
Args:
function_id: identifier for the preprocessing function used.
key: identifier for the variable stored.
value: the value to store, expected to be a tensor or nested structure
of tensors.
Raises:
ValueError: if function_id is not one of the 23 valid function ids.
"""
if
function_id
not
in
self
.
_VALID_FNS
:
raise
ValueError
(
'Function id not recognized: %s.'
%
str
(
function_id
))
self
.
_history
[
function_id
][
key
]
=
value
research/object_detection/core/preprocessor_test.py
View file @
fd7b6887
...
...
@@ -21,6 +21,7 @@ import six
import
tensorflow
as
tf
from
object_detection.core
import
preprocessor
from
object_detection.core
import
preprocessor_cache
from
object_detection.core
import
standard_fields
as
fields
if
six
.
PY2
:
...
...
@@ -290,6 +291,15 @@ class PreprocessorTest(tf.test.TestCase):
def
expectedLabelsAfterThresholdingWithMissingScore
(
self
):
return
tf
.
constant
([
2
],
dtype
=
tf
.
float32
)
def
testRgbToGrayscale
(
self
):
images
=
self
.
createTestImages
()
grayscale_images
=
preprocessor
.
_rgb_to_grayscale
(
images
)
expected_images
=
tf
.
image
.
rgb_to_grayscale
(
images
)
with
self
.
test_session
()
as
sess
:
(
grayscale_images
,
expected_images
)
=
sess
.
run
(
[
grayscale_images
,
expected_images
])
self
.
assertAllEqual
(
expected_images
,
grayscale_images
)
def
testNormalizeImage
(
self
):
preprocess_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
...
...
@@ -435,6 +445,55 @@ class PreprocessorTest(tf.test.TestCase):
rotated_mask
,
expected_mask
=
sess
.
run
([
rotated_mask
,
expected_mask
])
self
.
assertAllEqual
(
rotated_mask
.
flatten
(),
expected_mask
.
flatten
())
def
_testPreprocessorCache
(
self
,
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
,
num_runs
=
4
):
cache
=
preprocessor_cache
.
PreprocessorCache
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
classes
=
self
.
createTestLabels
()
masks
=
self
.
createTestMasks
()
keypoints
=
self
.
createTestKeypoints
()
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
test_masks
,
include_keypoints
=
test_keypoints
)
out
=
[]
for
i
in
range
(
num_runs
):
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
}
num_outputs
=
1
if
test_boxes
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
=
boxes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
classes
num_outputs
+=
1
if
test_masks
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
masks
num_outputs
+=
1
if
test_keypoints
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
=
keypoints
num_outputs
+=
1
out
.
append
(
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
,
preprocessor_arg_map
,
cache
))
with
self
.
test_session
()
as
sess
:
to_run
=
[]
for
i
in
range
(
num_runs
):
to_run
.
append
(
out
[
i
][
fields
.
InputDataFields
.
image
])
if
test_boxes
:
to_run
.
append
(
out
[
i
][
fields
.
InputDataFields
.
groundtruth_boxes
])
if
test_masks
:
to_run
.
append
(
out
[
i
][
fields
.
InputDataFields
.
groundtruth_instance_masks
])
if
test_keypoints
:
to_run
.
append
(
out
[
i
][
fields
.
InputDataFields
.
groundtruth_keypoints
])
out_array
=
sess
.
run
(
to_run
)
for
i
in
range
(
num_outputs
,
len
(
out_array
)):
self
.
assertAllClose
(
out_array
[
i
],
out_array
[
i
-
num_outputs
])
def
testRandomHorizontalFlip
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
...
...
@@ -491,6 +550,16 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomHorizontalFlipWithCache
(
self
):
keypoint_flip_permutation
=
self
.
createKeypointFlipPermutation
()
preprocess_options
=
[
(
preprocessor
.
random_horizontal_flip
,
{
'keypoint_flip_permutation'
:
keypoint_flip_permutation
})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomHorizontalFlipWithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
image_height
=
3
...
...
@@ -578,6 +647,16 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomVerticalFlipWithCache
(
self
):
keypoint_flip_permutation
=
self
.
createKeypointFlipPermutation
()
preprocess_options
=
[
(
preprocessor
.
random_vertical_flip
,
{
'keypoint_flip_permutation'
:
keypoint_flip_permutation
})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomVerticalFlipWithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_vertical_flip
,
{})]
image_height
=
3
...
...
@@ -665,6 +744,13 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomRotation90WithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rotation90
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomRotation90WithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rotation90
,
{})]
image_height
=
3
...
...
@@ -716,6 +802,20 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
values_greater_
,
values_true_
)
self
.
assertAllClose
(
values_less_
,
values_true_
)
def
testRandomPixelValueScaleWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_pixel_value_scale
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomImageScale
(
self
):
preprocess_options
=
[(
preprocessor
.
random_image_scale
,
{})]
images_original
=
self
.
createTestImages
()
...
...
@@ -736,6 +836,13 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertTrue
(
images_original_shape_
[
2
]
*
2.0
>=
images_scaled_shape_
[
2
])
def
testRandomImageScaleWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_image_scale
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomRGBtoGray
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rgb_to_gray
,
{})]
images_original
=
self
.
createTestImages
()
...
...
@@ -769,6 +876,14 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
images_g_diff_
,
image_zero1_
)
self
.
assertAllClose
(
images_b_diff_
,
image_zero1_
)
def
testRandomRGBtoGrayWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rgb_to_gray
,
{
'probability'
:
0.5
})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomAdjustBrightness
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
...
@@ -789,6 +904,20 @@ class PreprocessorTest(tf.test.TestCase):
[
image_original_shape
,
image_bright_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_bright_shape_
)
def
testRandomAdjustBrightnessWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_adjust_brightness
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomAdjustContrast
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
...
@@ -809,6 +938,20 @@ class PreprocessorTest(tf.test.TestCase):
[
image_original_shape
,
image_contrast_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_contrast_shape_
)
def
testRandomAdjustContrastWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_adjust_contrast
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomAdjustHue
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
...
@@ -829,6 +972,20 @@ class PreprocessorTest(tf.test.TestCase):
[
image_original_shape
,
image_hue_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_hue_shape_
)
def
testRandomAdjustHueWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_adjust_hue
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomDistortColor
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
...
@@ -849,6 +1006,20 @@ class PreprocessorTest(tf.test.TestCase):
[
images_original_shape
,
images_distorted_color_shape
])
self
.
assertAllEqual
(
images_original_shape_
,
images_distorted_color_shape_
)
def
testRandomDistortColorWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_distort_color
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomJitterBoxes
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
random_jitter_boxes
,
{}))
...
...
@@ -900,6 +1071,21 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testRandomCropImageWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rgb_to_gray
,
{
'probability'
:
0.5
}),
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
,
}),
(
preprocessor
.
random_crop_image
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomCropImageGrayscale
(
self
):
preprocessing_options
=
[(
preprocessor
.
rgb_to_gray
,
{}),
(
preprocessor
.
normalize_image
,
{
...
...
@@ -1446,6 +1632,13 @@ class PreprocessorTest(tf.test.TestCase):
self
.
expectedKeypointsAfterThresholding
()])
self
.
assertAllClose
(
retained_keypoints_
,
expected_keypoints_
)
def
testRandomCropToAspectRatioWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_crop_to_aspect_ratio
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRunRandomCropToAspectRatioWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
...
...
@@ -1536,6 +1729,13 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
def
testRandomPadToAspectRatioWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_pad_to_aspect_ratio
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomPadToAspectRatioWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
...
...
@@ -1624,6 +1824,17 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
def
testRandomPadImageWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
,}),
(
preprocessor
.
random_pad_image
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRandomPadImage
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
...
...
@@ -1670,6 +1881,17 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertTrue
(
np
.
all
((
boxes_
[:,
3
]
-
boxes_
[:,
1
])
>=
(
padded_boxes_
[:,
3
]
-
padded_boxes_
[:,
1
])))
def
testRandomCropPadImageWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
,}),
(
preprocessor
.
random_crop_pad_image
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRandomCropPadImageWithRandomCoefOne
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
...
...
@@ -1788,6 +2010,22 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertEqual
(
images_shape_
[
1
],
padded_images_shape_
[
1
])
self
.
assertEqual
(
2
*
images_shape_
[
2
],
padded_images_shape_
[
2
])
def
testRandomBlackPatchesWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_black_patches
,
{
'size_to_image_ratio'
:
0.5
}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRandomBlackPatches
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
...
@@ -1812,6 +2050,22 @@ class PreprocessorTest(tf.test.TestCase):
[
images_shape
,
blacked_images_shape
])
self
.
assertAllEqual
(
images_shape_
,
blacked_images_shape_
)
def
testRandomResizeMethodWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_resize_method
,
{
'target_size'
:
(
75
,
150
)
}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRandomResizeMethod
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
...
@@ -1853,7 +2107,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_image
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_image
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
...
@@ -1880,7 +2134,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_image
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_image
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
...
@@ -1900,7 +2154,7 @@ class PreprocessorTest(tf.test.TestCase):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
in_image
=
tf
.
random_uniform
(
in_shape
)
out_image
=
preprocessor
.
resize_to_range
(
out_image
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
self
.
assertAllEqual
(
out_image
.
get_shape
().
as_list
(),
expected_shape
)
...
...
@@ -1913,7 +2167,7 @@ class PreprocessorTest(tf.test.TestCase):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
out_image
=
preprocessor
.
resize_to_range
(
out_image
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
with
self
.
test_session
()
as
sess
:
...
...
@@ -1938,7 +2192,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_range
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
self
.
assertAllEqual
(
out_masks
.
get_shape
().
as_list
(),
expected_mask_shape
)
self
.
assertAllEqual
(
out_image
.
get_shape
().
as_list
(),
expected_image_shape
)
...
...
@@ -1960,7 +2214,7 @@ class PreprocessorTest(tf.test.TestCase):
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
in_masks
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
None
))
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_range
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
...
@@ -1991,7 +2245,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_range
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
...
@@ -2016,7 +2270,7 @@ class PreprocessorTest(tf.test.TestCase):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
in_image
=
tf
.
random_uniform
(
in_shape
)
out_image
=
preprocessor
.
resize_to_range
(
out_image
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
...
...
@@ -2039,7 +2293,7 @@ class PreprocessorTest(tf.test.TestCase):
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
in_masks
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
None
))
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_min_dimension
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_min_dimension
(
in_image
,
in_masks
,
min_dimension
=
min_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
...
@@ -2069,7 +2323,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_min_dimension
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_min_dimension
(
in_image
,
in_masks
,
min_dimension
=
min_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
...
@@ -2144,6 +2398,20 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
([
0
,
1
,
1
,
0
,
1
],
one_hot
)
def
testSSDRandomCropWithCache
(
self
):
preprocess_options
=
[
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testSSDRandomCrop
(
self
):
preprocessing_options
=
[
(
preprocessor
.
normalize_image
,
{
...
...
@@ -2216,6 +2484,20 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testSSDRandomCropFixedAspectRatioWithCache
(
self
):
preprocess_options
=
[
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop_fixed_aspect_ratio
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
_testSSDRandomCropFixedAspectRatio
(
self
,
include_label_scores
,
include_instance_masks
,
...
...
research/object_detection/core/standard_fields.py
View file @
fd7b6887
...
...
@@ -57,6 +57,10 @@ class InputDataFields(object):
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores.
groundtruth_weights: groundtruth weight factor for bounding boxes.
num_groundtruth_boxes: number of groundtruth boxes.
true_image_shapes: true shapes of images in the resized images, as resized
images can be padded with zeros.
"""
image
=
'image'
original_image
=
'original_image'
...
...
@@ -79,10 +83,13 @@ class InputDataFields(object):
groundtruth_keypoints
=
'groundtruth_keypoints'
groundtruth_keypoint_visibilities
=
'groundtruth_keypoint_visibilities'
groundtruth_label_scores
=
'groundtruth_label_scores'
groundtruth_weights
=
'groundtruth_weights'
num_groundtruth_boxes
=
'num_groundtruth_boxes'
true_image_shape
=
'true_image_shape'
class
DetectionResultFields
(
object
):
"""Naming conve
r
ntions for storing the output of the detector.
"""Naming conventions for storing the output of the detector.
Attributes:
source_id: source of the original image.
...
...
@@ -162,6 +169,7 @@ class TfExampleFields(object):
object_is_crowd: [DEPRECATED, use object_group_of instead]
is the object a single object or a crowd
object_segment_area: the area of the segment.
object_weight: a weight factor for the object's bounding box.
instance_masks: instance segmentation masks.
instance_boundaries: instance boundaries.
instance_classes: Classes for each instance segmentation mask.
...
...
@@ -194,6 +202,7 @@ class TfExampleFields(object):
object_depiction
=
'image/object/depiction'
object_is_crowd
=
'image/object/is_crowd'
object_segment_area
=
'image/object/segment/area'
object_weight
=
'image/object/weight'
instance_masks
=
'image/segmentation/object'
instance_boundaries
=
'image/boundaries/object'
instance_classes
=
'image/segmentation/object/class'
...
...
research/object_detection/core/target_assigner.py
View file @
fd7b6887
...
...
@@ -37,19 +37,19 @@ from object_detection.box_coders import faster_rcnn_box_coder
from
object_detection.box_coders
import
mean_stddev_box_coder
from
object_detection.core
import
box_coder
as
bcoder
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
matcher
as
mat
from
object_detection.core
import
region_similarity_calculator
as
sim_calc
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.matchers
import
argmax_matcher
from
object_detection.matchers
import
bipartite_matcher
from
object_detection.utils
import
shape_utils
class
TargetAssigner
(
object
):
"""Target assigner to compute classification and regression targets."""
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
"""Construct Object Detection Target Assigner.
Args:
...
...
@@ -58,10 +58,8 @@ class TargetAssigner(object):
anchors.
box_coder: an object_detection.core.BoxCoder used to encode matching
groundtruth boxes with respect to anchors.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
anchors (default: 1.0)
. The weight must be in [0., 1.].
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
...
...
@@ -82,7 +80,6 @@ class TargetAssigner(object):
self
.
_similarity_calc
=
similarity_calc
self
.
_matcher
=
matcher
self
.
_box_coder
=
box_coder
self
.
_positive_class_weight
=
positive_class_weight
self
.
_negative_class_weight
=
negative_class_weight
if
unmatched_cls_target
is
None
:
self
.
_unmatched_cls_target
=
tf
.
constant
([
0
],
tf
.
float32
)
...
...
@@ -94,7 +91,7 @@ class TargetAssigner(object):
return
self
.
_box_coder
def
assign
(
self
,
anchors
,
groundtruth_boxes
,
groundtruth_labels
=
None
,
**
params
):
groundtruth_weights
=
None
,
**
params
):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
...
...
@@ -113,6 +110,9 @@ class TargetAssigner(object):
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of
the Matcher.
...
...
@@ -140,14 +140,21 @@ class TargetAssigner(object):
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
0
))
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
unmatched_shape_assert
=
tf
.
assert_equal
(
tf
.
shape
(
groundtruth_labels
)[
1
:],
tf
.
shape
(
self
.
_unmatched_cls_target
),
message
=
'Unmatched class target shape incompatible '
'with groundtruth labels shape!'
)
labels_and_box_shapes_assert
=
tf
.
assert_equal
(
tf
.
shape
(
groundtruth_labels
)[
0
],
groundtruth_boxes
.
num_boxes
(),
message
=
'Groundtruth boxes and labels have incompatible shapes!'
)
unmatched_shape_assert
=
shape_utils
.
assert_shape_equal
(
shape_utils
.
combined_static_and_dynamic_shape
(
groundtruth_labels
)[
1
:],
shape_utils
.
combined_static_and_dynamic_shape
(
self
.
_unmatched_cls_target
))
labels_and_box_shapes_assert
=
shape_utils
.
assert_shape_equal
(
shape_utils
.
combined_static_and_dynamic_shape
(
groundtruth_labels
)[:
1
],
shape_utils
.
combined_static_and_dynamic_shape
(
groundtruth_boxes
.
get
())[:
1
])
if
groundtruth_weights
is
None
:
num_gt_boxes
=
groundtruth_boxes
.
num_boxes_static
()
if
not
num_gt_boxes
:
num_gt_boxes
=
groundtruth_boxes
.
num_boxes
()
groundtruth_weights
=
tf
.
ones
([
num_gt_boxes
],
dtype
=
tf
.
float32
)
with
tf
.
control_dependencies
(
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
...
...
@@ -158,16 +165,16 @@ class TargetAssigner(object):
match
)
cls_targets
=
self
.
_create_classification_targets
(
groundtruth_labels
,
match
)
reg_weights
=
self
.
_create_regression_weights
(
match
)
cls_weights
=
self
.
_create_classification_weights
(
match
,
self
.
_positive_class_weight
,
self
.
_negative_class
_weight
)
reg_weights
=
self
.
_create_regression_weights
(
match
,
groundtruth_weights
)
cls_weights
=
self
.
_create_classification_weights
(
match
,
groundtruth
_weight
s
)
num_anchors
=
anchors
.
num_boxes_static
()
if
num_anchors
is
not
None
:
reg_targets
=
self
.
_reset_target_shape
(
reg_targets
,
num_anchors
)
cls_targets
=
self
.
_reset_target_shape
(
cls_targets
,
num_anchors
)
reg_weights
=
self
.
_reset_target_shape
(
reg_weights
,
num_anchors
)
cls_weights
=
self
.
_reset_target_shape
(
cls_weights
,
num_anchors
)
num_anchors
=
anchors
.
num_boxes_static
()
if
num_anchors
is
not
None
:
reg_targets
=
self
.
_reset_target_shape
(
reg_targets
,
num_anchors
)
cls_targets
=
self
.
_reset_target_shape
(
cls_targets
,
num_anchors
)
reg_weights
=
self
.
_reset_target_shape
(
reg_weights
,
num_anchors
)
cls_weights
=
self
.
_reset_target_shape
(
cls_weights
,
num_anchors
)
return
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
...
...
@@ -198,23 +205,31 @@ class TargetAssigner(object):
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_anchor_indices
=
match
.
matched_column_indices
()
unmatched_ignored_anchor_indices
=
(
match
.
unmatched_or_ignored_column_indices
())
matched_gt_indices
=
match
.
matched_row_indices
()
matched_anchors
=
box_list_ops
.
gather
(
anchors
,
matched_anchor_indices
)
matched_gt_boxes
=
box_list_ops
.
gather
(
groundtruth_boxes
,
matched_gt_indices
)
matched_reg_targets
=
self
.
_box_coder
.
encode
(
matched_gt_boxes
,
matched_anchors
)
matched_gt_boxes
=
match
.
gather_based_on_match
(
groundtruth_boxes
.
get
(),
unmatched_value
=
tf
.
zeros
(
4
),
ignored_value
=
tf
.
zeros
(
4
))
matched_gt_boxlist
=
box_list
.
BoxList
(
matched_gt_boxes
)
if
groundtruth_boxes
.
has_field
(
fields
.
BoxListFields
.
keypoints
):
groundtruth_keypoints
=
groundtruth_boxes
.
get_field
(
fields
.
BoxListFields
.
keypoints
)
matched_keypoints
=
match
.
gather_based_on_match
(
groundtruth_keypoints
,
unmatched_value
=
tf
.
zeros
(
groundtruth_keypoints
.
get_shape
()[
1
:]),
ignored_value
=
tf
.
zeros
(
groundtruth_keypoints
.
get_shape
()[
1
:]))
matched_gt_boxlist
.
add_field
(
fields
.
BoxListFields
.
keypoints
,
matched_keypoints
)
matched_reg_targets
=
self
.
_box_coder
.
encode
(
matched_gt_boxlist
,
anchors
)
match_results_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
match
.
match_results
)
# Zero out the unmatched and ignored regression targets.
unmatched_ignored_reg_targets
=
tf
.
tile
(
self
.
_default_regression_target
(),
tf
.
stack
([
tf
.
size
(
unmatched_ignored_anchor_indices
),
1
]))
reg_targets
=
tf
.
dynamic_stitch
(
[
matched_anchor_indices
,
unmatched_ignored_anchor_indices
],
[
matched_reg_targets
,
unmatched_ignored_reg_targets
])
# TODO: summarize the number of matches on average.
self
.
_default_regression_target
(),
[
match_results_shape
[
0
],
1
])
matched_anchors_mask
=
match
.
matched_column_indicator
()
reg_targets
=
tf
.
where
(
matched_anchors_mask
,
matched_reg_targets
,
unmatched_ignored_reg_targets
)
return
reg_targets
def
_default_regression_target
(
self
):
...
...
@@ -245,27 +260,16 @@ class TargetAssigner(object):
and groundtruth boxes.
Returns:
cls_targets:
a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the
subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has
shape [num_gt_boxes, d_1, d_2, ... d_k].
a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the
subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has
shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
matched_anchor_indices
=
match
.
matched_column_indices
()
unmatched_ignored_anchor_indices
=
(
match
.
unmatched_or_ignored_column_indices
())
matched_gt_indices
=
match
.
matched_row_indices
()
matched_cls_targets
=
tf
.
gather
(
groundtruth_labels
,
matched_gt_indices
)
ones
=
self
.
_unmatched_cls_target
.
shape
.
ndims
*
[
1
]
unmatched_ignored_cls_targets
=
tf
.
tile
(
tf
.
expand_dims
(
self
.
_unmatched_cls_target
,
0
),
tf
.
stack
([
tf
.
size
(
unmatched_ignored_anchor_indices
)]
+
ones
))
cls_targets
=
tf
.
dynamic_stitch
(
[
matched_anchor_indices
,
unmatched_ignored_anchor_indices
],
[
matched_cls_targets
,
unmatched_ignored_cls_targets
])
return
cls_targets
def
_create_regression_weights
(
self
,
match
):
return
match
.
gather_based_on_match
(
groundtruth_labels
,
unmatched_value
=
self
.
_unmatched_cls_target
,
ignored_value
=
self
.
_unmatched_cls_target
)
def
_create_regression_weights
(
self
,
match
,
groundtruth_weights
):
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
...
...
@@ -275,18 +279,18 @@ class TargetAssigner(object):
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
reg_weights: a float32 tensor with shape [num_anchors] representing
regression weights
a float32 tensor with shape [num_anchors] representing regression weights.
"""
re
g_weights
=
tf
.
cast
(
match
.
m
at
c
he
d_column_indicator
(),
tf
.
float32
)
return
reg_weights
re
turn
match
.
g
athe
r_based_on_match
(
groundtruth_weights
,
ignored_value
=
0.
,
unmatched_value
=
0.
)
def
_create_classification_weights
(
self
,
match
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
):
groundtruth_weights
):
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
...
...
@@ -299,25 +303,23 @@ class TargetAssigner(object):
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
positive_class
_weight:
weight to be associated to positive anchors
negative_class_weight: weight to be associated to negative anchors
groundtruth
_weight
s
:
a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
cls_weights:
a float32 tensor with shape [num_anchors] representing
classification
weights.
a float32 tensor with shape [num_anchors] representing
classification
weights.
"""
matched_indicator
=
tf
.
cast
(
match
.
matched_column_indicator
(),
tf
.
float32
)
ignore_indicator
=
tf
.
cast
(
match
.
ignored_column_indicator
(),
tf
.
float32
)
unmatched_indicator
=
1.0
-
matched_indicator
-
ignore_indicator
cls_weights
=
(
positive_class_weight
*
matched_indicator
+
negative_class_weight
*
unmatched_indicator
)
return
cls_weights
return
match
.
gather_based_on_match
(
groundtruth_weights
,
ignored_value
=
0.
,
unmatched_value
=
self
.
_negative_class_weight
)
def
get_box_coder
(
self
):
"""Get BoxCoder of this TargetAssigner.
Returns:
BoxCoder:
BoxCoder object.
BoxCoder object.
"""
return
self
.
_box_coder
...
...
@@ -325,7 +327,6 @@ class TargetAssigner(object):
# TODO: This method pulls in all the implementation dependencies into
# core. Therefore its best to have this factory method outside of core.
def
create_target_assigner
(
reference
,
stage
=
None
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
"""Factory function for creating standard target assigners.
...
...
@@ -333,8 +334,6 @@ def create_target_assigner(reference, stage=None,
Args:
reference: string referencing the type of TargetAssigner.
stage: string denoting stage: {proposal, detection}.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
...
...
@@ -383,7 +382,6 @@ def create_target_assigner(reference, stage=None,
raise
ValueError
(
'No valid combination of reference and stage.'
)
return
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
positive_class_weight
,
negative_class_weight
=
negative_class_weight
,
unmatched_cls_target
=
unmatched_cls_target
)
...
...
@@ -391,7 +389,8 @@ def create_target_assigner(reference, stage=None,
def
batch_assign_targets
(
target_assigner
,
anchors_batch
,
gt_box_batch
,
gt_class_targets_batch
):
gt_class_targets_batch
,
gt_weights_batch
=
None
):
"""Batched assignment of classification and regression targets.
Args:
...
...
@@ -404,6 +403,8 @@ def batch_assign_targets(target_assigner,
each tensor has shape [num_gt_boxes_i, classification_target_size] and
num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch.
gt_weights_batch: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
...
...
@@ -437,11 +438,13 @@ def batch_assign_targets(target_assigner,
reg_targets_list
=
[]
reg_weights_list
=
[]
match_list
=
[]
for
anchors
,
gt_boxes
,
gt_class_targets
in
zip
(
anchors_batch
,
gt_box_batch
,
gt_class_targets_batch
):
if
gt_weights_batch
is
None
:
gt_weights_batch
=
[
None
]
*
len
(
gt_class_targets_batch
)
for
anchors
,
gt_boxes
,
gt_class_targets
,
gt_weights
in
zip
(
anchors_batch
,
gt_box_batch
,
gt_class_targets_batch
,
gt_weights_batch
):
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
target_assigner
.
assign
(
anchors
,
gt_boxes
,
gt_class_targets
)
anchors
,
gt_boxes
,
gt_class_targets
,
gt_weights
)
cls_targets_list
.
append
(
cls_targets
)
cls_weights_list
.
append
(
cls_weights
)
reg_targets_list
.
append
(
reg_targets
)
...
...
research/object_detection/core/target_assigner_test.py
View file @
fd7b6887
...
...
@@ -17,135 +17,238 @@
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.box_coders
import
keypoint_box_coder
from
object_detection.box_coders
import
mean_stddev_box_coder
from
object_detection.core
import
box_list
from
object_detection.core
import
region_similarity_calculator
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
target_assigner
as
targetassigner
from
object_detection.matchers
import
argmax_matcher
from
object_detection.matchers
import
bipartite_matcher
from
object_detection.utils
import
test_case
class
TargetAssignerTest
(
t
f
.
t
est
.
TestCase
):
class
TargetAssignerTest
(
test
_case
.
TestCase
):
def
test_assign_agnostic
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
]])
prior_stddevs
=
tf
.
constant
(
3
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
3
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
exp_matching_anchors
=
[
0
,
1
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
num_valid_rows
=
2
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_with_ignored_matches
(
self
):
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_assign_class_agnostic_with_ignored_matches
(
self
):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.3
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0.0
,
0.5
,
.
9
,
1.0
]])
prior_stddevs
=
tf
.
constant
(
3
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.3
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0.0
,
0.5
,
.
9
,
1.0
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
3
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
0
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
exp_matching_anchors
=
[
0
,
1
]
result
=
target_assigner
.
assign
(
priors
,
boxes
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_assign_agnostic_with_keypoints
(
self
):
def
graph_fn
(
anchor_means
,
groundtruth_box_corners
,
groundtruth_keypoints
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
keypoint_box_coder
.
KeypointBoxCoder
(
num_keypoints
=
6
,
scale_factors
=
[
10.0
,
10.0
,
5.0
,
5.0
])
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
groundtruth_boxlist
.
add_field
(
fields
.
BoxListFields
.
keypoints
,
groundtruth_keypoints
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
1.0
],
[
0.0
,
0.5
,
.
9
,
1.0
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.45
,
0.45
,
0.95
,
0.95
]],
dtype
=
np
.
float32
)
groundtruth_keypoints
=
np
.
array
(
[[[
0.1
,
0.2
],
[
0.1
,
0.3
],
[
0.2
,
0.2
],
[
0.2
,
0.2
],
[
0.1
,
0.1
],
[
0.9
,
0
]],
[[
0
,
0.3
],
[
0.2
,
0.4
],
[
0.5
,
0.6
],
[
0
,
0.6
],
[
0.8
,
0.2
],
[
0.2
,
0.4
]]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
,
-
3
,
-
1
,
-
3
,
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
3
,
-
3
,
13
,
-
5
],
[
-
1
,
-
1
,
0
,
0
,
-
15
,
-
9
,
-
11
,
-
7
,
-
5
,
-
3
,
-
15
,
-
3
,
1
,
-
11
,
-
11
,
-
7
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
groundtruth_box_corners
,
groundtruth_keypoints
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_assign_class_agnostic_with_keypoints_and_ignored_matches
(
self
):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
def
graph_fn
(
anchor_means
,
groundtruth_box_corners
,
groundtruth_keypoints
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
keypoint_box_coder
.
KeypointBoxCoder
(
num_keypoints
=
6
,
scale_factors
=
[
10.0
,
10.0
,
5.0
,
5.0
])
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
groundtruth_boxlist
.
add_field
(
fields
.
BoxListFields
.
keypoints
,
groundtruth_keypoints
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
1.0
],
[
0.0
,
0.5
,
.
9
,
1.0
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.45
,
0.45
,
0.95
,
0.95
]],
dtype
=
np
.
float32
)
groundtruth_keypoints
=
np
.
array
(
[[[
0.1
,
0.2
],
[
0.1
,
0.3
],
[
0.2
,
0.2
],
[
0.2
,
0.2
],
[
0.1
,
0.1
],
[
0.9
,
0
]],
[[
0
,
0.3
],
[
0.2
,
0.4
],
[
0.5
,
0.6
],
[
0
,
0.6
],
[
0.8
,
0.2
],
[
0.2
,
0.4
]]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
,
-
3
,
-
1
,
-
3
,
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
3
,
-
3
,
13
,
-
5
],
[
-
1
,
-
1
,
0
,
0
,
-
15
,
-
9
,
-
11
,
-
7
,
-
5
,
-
3
,
-
15
,
-
3
,
1
,
-
11
,
-
11
,
-
7
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
groundtruth_box_corners
,
groundtruth_keypoints
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_assign_multiclass
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
,
groundtruth_labels
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
4
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]],
dtype
=
np
.
float32
)
groundtruth_labels
=
np
.
array
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
...
...
@@ -157,88 +260,98 @@ class TargetAssignerTest(tf.test.TestCase):
[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
.
5
,
.
2
]]
exp_reg_weights
=
[
1
,
1
,
0
,
1
]
exp_matching_anchors
=
[
0
,
1
,
3
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_multiclass_unequal_class_weights
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
0.5
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
exp_cls_weights
=
[
1
,
1
,
.
5
,
1
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
_
,
cls_weights
,
_
,
_
,
_
)
=
result
with
self
.
test_session
()
as
sess
:
cls_weights_out
=
sess
.
run
(
cls_weights
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_assign_multiclass_with_groundtruth_weights
(
self
):
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
,
groundtruth_weights
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
,
groundtruth_labels
,
groundtruth_weights
)
(
_
,
cls_weights
,
_
,
reg_weights
,
_
)
=
result
return
(
cls_weights
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
4
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]],
dtype
=
np
.
float32
)
groundtruth_labels
=
np
.
array
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
groundtruth_weights
=
np
.
array
([
0.3
,
0.
,
0.5
],
dtype
=
np
.
float32
)
exp_cls_weights
=
[
0.3
,
0.
,
1
,
0.5
]
# background class gets weight of 1.
exp_reg_weights
=
[
0.3
,
0.
,
0.
,
0.5
]
# background class gets weight of 0.
(
cls_weights_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
,
groundtruth_weights
])
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
def
test_assign_multidimensional_class_targets
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([[
0
,
0
],
[
0
,
0
]],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[[
0
,
1
],
[
1
,
0
]],
[[
1
,
0
],
[
0
,
1
]],
[[
0
,
1
],
[
1
,
.
5
]]],
tf
.
float32
)
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([[
0
,
0
],
[
0
,
0
]],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
,
groundtruth_labels
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
4
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]],
dtype
=
np
.
float32
)
groundtruth_labels
=
np
.
array
([[[
0
,
1
],
[
1
,
0
]],
[[
1
,
0
],
[
0
,
1
]],
[[
0
,
1
],
[
1
,
.
5
]]],
np
.
float32
)
exp_cls_targets
=
[[[
0
,
1
],
[
1
,
0
]],
[[
1
,
0
],
[
0
,
1
]],
...
...
@@ -250,52 +363,46 @@ class TargetAssignerTest(tf.test.TestCase):
[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
.
5
,
.
2
]]
exp_reg_weights
=
[
1
,
1
,
0
,
1
]
exp_matching_anchors
=
[
0
,
1
,
3
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_assign_empty_groundtruth
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners_expanded
=
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
]])
box_corners
=
tf
.
slice
(
box_corners_expanded
,
[
0
,
0
],
[
0
,
4
])
boxes
=
box_list
.
BoxList
(
box_corners
)
groundtruth_labels_expanded
=
tf
.
constant
([[
0
,
0
,
0
]],
tf
.
float32
)
groundtruth_labels
=
tf
.
slice
(
groundtruth_labels_expanded
,
[
0
,
0
],
[
0
,
3
])
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
0
,
0
,
0
],
tf
.
float32
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
,
groundtruth_labels
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_box_corners
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
groundtruth_labels
=
np
.
zeros
((
0
,
3
),
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
4
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
...
...
@@ -306,26 +413,18 @@ class TargetAssignerTest(tf.test.TestCase):
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
0
,
0
,
0
,
0
]
exp_matching_anchors
=
[]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_raises_error_on_incompatible_groundtruth_boxes_and_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
...
...
@@ -353,14 +452,9 @@ class TargetAssignerTest(tf.test.TestCase):
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesWithPredicateMatch
(
tf
.
errors
.
InvalidArgumentError
,
'Groundtruth boxes and labels have incompatible shapes!'
):
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
])
with
self
.
assertRaisesRegexp
(
ValueError
,
'Unequal shapes'
):
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
...
...
@@ -380,7 +474,6 @@ class TargetAssignerTest(tf.test.TestCase):
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[[
0
,
1
],
[
1
,
0
]]],
tf
.
float32
)
with
self
.
assertRaises
(
ValueError
):
...
...
@@ -388,61 +481,66 @@ class TargetAssignerTest(tf.test.TestCase):
num_valid_rows
=
3
)
class
BatchTargetAssignerTest
(
t
f
.
t
est
.
TestCase
):
class
BatchTargetAssignerTest
(
test
_case
.
TestCase
):
def
_get_agnostic_target_assigner
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
)
def
_get_multi_class_target_assigner
(
self
,
num_classes
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
]
+
num_classes
*
[
0
],
tf
.
float32
)
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
unmatched_cls_target
)
def
_get_multi_dimensional_target_assigner
(
self
,
target_dimensions
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
(
np
.
zeros
(
target_dimensions
),
tf
.
float32
)
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
unmatched_cls_target
)
def
test_batch_assign_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
[[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
))
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
None
,
None
]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
):
box_list1
=
box_list
.
BoxList
(
groundtruth_boxlist1
)
box_list2
=
box_list
.
BoxList
(
groundtruth_boxlist2
)
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
None
,
None
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
agnostic_target_assigner
=
self
.
_get_agnostic_target_assigner
()
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
targetassigner
.
batch_assign_targets
(
agnostic_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
gt_class_targets
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_boxlist1
=
np
.
array
([[
0.
,
0.
,
0.2
,
0.2
]],
dtype
=
np
.
float32
)
groundtruth_boxlist2
=
np
.
array
([[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]],
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
...
...
@@ -458,58 +556,128 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[[
0
],
[
1
],
[
1
],
[
0
]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
agnostic_target_assigner
=
self
.
_get_agnostic_target_assigner
()
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
agnostic_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
,
match_out_1
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
def
test_batch_assign_multiclass_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
[[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
))
gt_box_batch
=
[
box_list1
,
box_list2
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
class_targets1
=
tf
.
constant
([[
0
,
1
,
0
,
0
]],
tf
.
float32
)
class_targets2
=
tf
.
constant
([[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
]],
tf
.
float32
)
def
test_batch_assign_multiclass_targets
(
self
):
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
,
class_targets1
,
class_targets2
):
box_list1
=
box_list
.
BoxList
(
groundtruth_boxlist1
)
box_list2
=
box_list
.
BoxList
(
groundtruth_boxlist2
)
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
class_targets1
,
class_targets2
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
gt_class_targets
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_boxlist1
=
np
.
array
([[
0.
,
0.
,
0.2
,
0.2
]],
dtype
=
np
.
float32
)
groundtruth_boxlist2
=
np
.
array
([[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]],
dtype
=
np
.
float32
)
class_targets1
=
np
.
array
([[
0
,
1
,
0
,
0
]],
dtype
=
np
.
float32
)
class_targets2
=
np
.
array
([[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
]],
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
gt_class_targets
=
[
class_targets1
,
class_targets2
]
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,],
[
0
,
0
,
0
,
0
,],],
[[
0
,
0
,
0
,
0
,],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]]
exp_cls_targets
=
[[[
0
,
1
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]],
[[
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
],
[
1
,
0
,
0
,
0
]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
,
class_targets1
,
class_targets2
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
def
test_batch_assign_multiclass_targets_with_padded_groundtruth
(
self
):
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
,
class_targets1
,
class_targets2
,
groundtruth_weights1
,
groundtruth_weights2
):
box_list1
=
box_list
.
BoxList
(
groundtruth_boxlist1
)
box_list2
=
box_list
.
BoxList
(
groundtruth_boxlist2
)
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
class_targets1
,
class_targets2
]
gt_weights
=
[
groundtruth_weights1
,
groundtruth_weights2
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
gt_class_targets
,
gt_weights
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_boxlist1
=
np
.
array
([[
0.
,
0.
,
0.2
,
0.2
],
[
0.
,
0.
,
0.
,
0.
]],
dtype
=
np
.
float32
)
groundtruth_weights1
=
np
.
array
([
1
,
0
],
dtype
=
np
.
float32
)
groundtruth_boxlist2
=
np
.
array
([[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
],
[
0
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
groundtruth_weights2
=
np
.
array
([
1
,
1
,
0
],
dtype
=
np
.
float32
)
class_targets1
=
np
.
array
([[
0
,
1
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
class_targets2
=
np
.
array
([[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]
],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
],
[
0
,
0
,
0
,
0
,],
],
[[
0
,
0
,
0
,
0
,
],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
...
...
@@ -525,68 +693,70 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[
1
,
0
,
0
,
0
]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
,
match_out_1
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
,
class_targets1
,
class_targets2
,
groundtruth_weights1
,
groundtruth_weights2
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
def
test_batch_assign_multidimensional_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
[[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
))
gt_box_batch
=
[
box_list1
,
box_list2
]
class_targets1
=
tf
.
constant
([[[
0
,
1
,
1
],
[
1
,
1
,
0
]]],
tf
.
float32
)
class_targets2
=
tf
.
constant
([[[
0
,
1
,
1
],
[
1
,
1
,
0
]],
[[
0
,
0
,
1
],
[
0
,
0
,
1
]]],
tf
.
float32
)
gt_class_targets
=
[
class_targets1
,
class_targets2
]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
,
class_targets1
,
class_targets2
):
box_list1
=
box_list
.
BoxList
(
groundtruth_boxlist1
)
box_list2
=
box_list
.
BoxList
(
groundtruth_boxlist2
)
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
class_targets1
,
class_targets2
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
multiclass_target_assigner
=
self
.
_get_multi_dimensional_target_assigner
(
target_dimensions
=
(
2
,
3
))
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
gt_class_targets
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_boxlist1
=
np
.
array
([[
0.
,
0.
,
0.2
,
0.2
]],
dtype
=
np
.
float32
)
groundtruth_boxlist2
=
np
.
array
([[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]],
dtype
=
np
.
float32
)
class_targets1
=
np
.
array
([[
0
,
1
,
0
,
0
]],
dtype
=
np
.
float32
)
class_targets2
=
np
.
array
([[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
]],
dtype
=
np
.
float32
)
class_targets1
=
np
.
array
([[[
0
,
1
,
1
],
[
1
,
1
,
0
]]],
dtype
=
np
.
float32
)
class_targets2
=
np
.
array
([[[
0
,
1
,
1
],
[
1
,
1
,
0
]],
[[
0
,
0
,
1
],
[
0
,
0
,
1
]]],
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]
],
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
],
[
0
,
0
,
0
,
0
,],
],
[[
0
,
0
,
0
,
0
,
],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]]
exp_cls_targets
=
[[[[
0.
,
1.
,
1.
],
[
1.
,
1.
,
0.
]],
[[
0.
,
0.
,
0.
],
...
...
@@ -605,72 +775,60 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[
0.
,
0.
,
0.
]]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
multiclass_target_assigner
=
self
.
_get_multi_dimensional_target_assigner
(
target_dimensions
=
(
2
,
3
))
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
,
match_out_1
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
,
class_targets1
,
class_targets2
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
def
test_batch_assign_empty_groundtruth
(
self
):
box_coords_expanded
=
tf
.
zeros
((
1
,
4
),
tf
.
float32
)
box_coords
=
tf
.
slice
(
box_coords_expanded
,
[
0
,
0
],
[
0
,
4
])
box_list1
=
box_list
.
BoxList
(
box_coords
)
gt_box_batch
=
[
box_list1
]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
gt_class_targets
):
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
gt_box_batch
=
[
groundtruth_boxlist
]
gt_class_targets_batch
=
[
gt_class_targets
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
gt_class_targets_batch
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_box_corners
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
exp_reg_targets
=
[[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
]]
exp_cls_targets
=
[[[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]]]
exp_reg_weights
=
[[
0
,
0
]]
exp_match_0
=
[]
num_classes
=
3
pad
=
1
gt_class_targets
=
tf
.
zeros
((
0
,
num_classes
+
pad
))
gt_class_targets_batch
=
[
gt_class_targets
]
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets_batch
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
1
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
gt_class_targets
=
np
.
zeros
((
0
,
num_classes
+
pad
),
dtype
=
np
.
float32
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
gt_class_targets
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
class
CreateTargetAssignerTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/data/BUILD
0 → 100644
View file @
fd7b6887
package
(
default_visibility
=
[
"//visibility:public"
],
)
licenses
([
"notice"
])
exports_files
([
"pet_label_map.pbtxt"
,
])
research/object_detection/data_decoders/BUILD
View file @
fd7b6887
...
...
@@ -12,9 +12,10 @@ py_library(
srcs
=
[
"tf_example_decoder.py"
],
deps
=
[
"//tensorflow"
,
"//tensorflow_models/object_detection/core:data_decoder"
,
"//tensorflow_models/object_detection/core:standard_fields"
,
"//tensorflow_models/object_detection/utils:label_map_util"
,
"//tensorflow/models/research/object_detection/core:data_decoder"
,
"//tensorflow/models/research/object_detection/core:standard_fields"
,
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2"
,
"//tensorflow/models/research/object_detection/utils:label_map_util"
,
],
)
...
...
@@ -24,6 +25,7 @@ py_test(
deps
=
[
":tf_example_decoder"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/core:standard_fields"
,
"//tensorflow/models/research/object_detection/core:standard_fields"
,
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2"
,
],
)
research/object_detection/data_decoders/tf_example_decoder.py
View file @
fd7b6887
...
...
@@ -22,6 +22,7 @@ import tensorflow as tf
from
object_detection.core
import
data_decoder
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.protos
import
input_reader_pb2
from
object_detection.utils
import
label_map_util
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
...
...
@@ -32,12 +33,15 @@ class TfExampleDecoder(data_decoder.DataDecoder):
def
__init__
(
self
,
load_instance_masks
=
False
,
instance_mask_type
=
input_reader_pb2
.
NUMERICAL_MASKS
,
label_map_proto_file
=
None
,
use_display_name
=
False
):
"""Constructor sets keys_to_features and items_to_handlers.
Args:
load_instance_masks: whether or not to load and handle instance masks.
instance_mask_type: type of instance masks. Options are provided in
input_reader.proto. This is only used if `load_instance_masks` is True.
label_map_proto_file: a file path to a
object_detection.protos.StringIntLabelMap proto. If provided, then the
mapped IDs of 'image/object/class/text' will take precedence over the
...
...
@@ -46,6 +50,11 @@ class TfExampleDecoder(data_decoder.DataDecoder):
use_display_name: whether or not to use the `display_name` for label
mapping (instead of `name`). Only used if label_map_proto_file is
provided.
Raises:
ValueError: If `instance_mask_type` option is not one of
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
"""
self
.
keys_to_features
=
{
'image/encoded'
:
...
...
@@ -83,6 +92,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf
.
VarLenFeature
(
tf
.
int64
),
'image/object/group_of'
:
tf
.
VarLenFeature
(
tf
.
int64
),
'image/object/weight'
:
tf
.
VarLenFeature
(
tf
.
float32
),
}
self
.
items_to_handlers
=
{
fields
.
InputDataFields
.
image
:
slim_example_decoder
.
Image
(
...
...
@@ -104,19 +115,47 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields
.
InputDataFields
.
groundtruth_difficult
:
(
slim_example_decoder
.
Tensor
(
'image/object/difficult'
)),
fields
.
InputDataFields
.
groundtruth_group_of
:
(
slim_example_decoder
.
Tensor
(
'image/object/group_of'
))
slim_example_decoder
.
Tensor
(
'image/object/group_of'
)),
fields
.
InputDataFields
.
groundtruth_weights
:
(
slim_example_decoder
.
Tensor
(
'image/object/weight'
)),
}
if
load_instance_masks
:
self
.
keys_to_features
[
'image/object/mask'
]
=
tf
.
VarLenFeature
(
tf
.
float32
)
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
(
slim_example_decoder
.
ItemHandlerCallback
(
[
'image/object/mask'
,
'image/height'
,
'image/width'
],
self
.
_reshape_instance_masks
))
# TODO: Add label_handler that decodes from 'image/object/class/text'
# primarily after the recent tf.contrib.slim changes make into a release
# supported by cloudml.
label_handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
)
if
instance_mask_type
in
(
input_reader_pb2
.
DEFAULT
,
input_reader_pb2
.
NUMERICAL_MASKS
):
self
.
keys_to_features
[
'image/object/mask'
]
=
(
tf
.
VarLenFeature
(
tf
.
float32
))
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
(
slim_example_decoder
.
ItemHandlerCallback
(
[
'image/object/mask'
,
'image/height'
,
'image/width'
],
self
.
_reshape_instance_masks
))
elif
instance_mask_type
==
input_reader_pb2
.
PNG_MASKS
:
self
.
keys_to_features
[
'image/object/mask'
]
=
tf
.
VarLenFeature
(
tf
.
string
)
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
(
slim_example_decoder
.
ItemHandlerCallback
(
[
'image/object/mask'
,
'image/height'
,
'image/width'
],
self
.
_decode_png_instance_masks
))
else
:
raise
ValueError
(
'Did not recognize the `instance_mask_type` option.'
)
if
label_map_proto_file
:
label_map
=
label_map_util
.
get_label_map_dict
(
label_map_proto_file
,
use_display_name
)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
table
=
tf
.
contrib
.
lookup
.
HashTable
(
initializer
=
tf
.
contrib
.
lookup
.
KeyValueTensorInitializer
(
keys
=
tf
.
constant
(
list
(
label_map
.
keys
())),
values
=
tf
.
constant
(
list
(
label_map
.
values
()),
dtype
=
tf
.
int64
)),
default_value
=-
1
)
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
label_handler
=
slim_example_decoder
.
BackupHandler
(
slim_example_decoder
.
LookupTensor
(
'image/object/class/text'
,
table
,
default_value
=
''
),
slim_example_decoder
.
Tensor
(
'image/object/class/label'
))
else
:
label_handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
)
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
label_handler
...
...
@@ -140,16 +179,21 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None, 4] containing box corners.
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
[None] containing classes for the boxes.
fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
shape [None] indicating the weights of groundtruth boxes.
fields.InputDataFields.num_groundtruth_boxes - int32 scalar indicating
the number of groundtruth_boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd.
Optional:
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
[None] indicating if the boxes represent `group_of` instances.
fields.InputDataFields.groundtruth_instance_masks - 3D
int64
tensor of
fields.InputDataFields.groundtruth_instance_masks - 3D
float32
tensor of
shape [None, None, None] containing instance masks.
"""
serialized_example
=
tf
.
reshape
(
tf_example_string_tensor
,
shape
=
[])
...
...
@@ -161,13 +205,27 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd
=
fields
.
InputDataFields
.
groundtruth_is_crowd
tensor_dict
[
is_crowd
]
=
tf
.
cast
(
tensor_dict
[
is_crowd
],
dtype
=
tf
.
bool
)
tensor_dict
[
fields
.
InputDataFields
.
image
].
set_shape
([
None
,
None
,
3
])
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]
def
default_groundtruth_weights
():
return
tf
.
ones
(
[
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]],
dtype
=
tf
.
float32
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
]
=
tf
.
cond
(
tf
.
greater
(
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
])[
0
],
0
),
lambda
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
],
default_groundtruth_weights
)
return
tensor_dict
def
_reshape_instance_masks
(
self
,
keys_to_tensors
):
"""Reshape instance segmentation masks.
The instance segmentation masks are reshaped to [num_instances, height,
width]
and cast to boolean type to save memory
.
width].
Args:
keys_to_tensors: a dictionary from keys to tensors.
...
...
@@ -184,3 +242,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
masks
=
tf
.
sparse_tensor_to_dense
(
masks
)
masks
=
tf
.
reshape
(
tf
.
to_float
(
tf
.
greater
(
masks
,
0.0
)),
to_shape
)
return
tf
.
cast
(
masks
,
tf
.
float32
)
def
_decode_png_instance_masks
(
self
,
keys_to_tensors
):
"""Decode PNG instance segmentation masks and stack into dense tensor.
The instance segmentation masks are reshaped to [num_instances, height,
width].
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float tensor of shape [num_instances, height, width] with values
in {0, 1}.
"""
def
decode_png_mask
(
image_buffer
):
image
=
tf
.
squeeze
(
tf
.
image
.
decode_image
(
image_buffer
,
channels
=
1
),
axis
=
2
)
image
.
set_shape
([
None
,
None
])
image
=
tf
.
to_float
(
tf
.
greater
(
image
,
0
))
return
image
png_masks
=
keys_to_tensors
[
'image/object/mask'
]
height
=
keys_to_tensors
[
'image/height'
]
width
=
keys_to_tensors
[
'image/width'
]
if
isinstance
(
png_masks
,
tf
.
SparseTensor
):
png_masks
=
tf
.
sparse_tensor_to_dense
(
png_masks
,
default_value
=
''
)
return
tf
.
cond
(
tf
.
greater
(
tf
.
size
(
png_masks
),
0
),
lambda
:
tf
.
map_fn
(
decode_png_mask
,
png_masks
,
dtype
=
tf
.
float32
),
lambda
:
tf
.
zeros
(
tf
.
to_int32
(
tf
.
stack
([
0
,
height
,
width
]))))
research/object_detection/data_decoders/tf_example_decoder_test.py
View file @
fd7b6887
...
...
@@ -21,6 +21,7 @@ import tensorflow as tf
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.data_decoders
import
tf_example_decoder
from
object_detection.protos
import
input_reader_pb2
class
TfExampleDecoderTest
(
tf
.
test
.
TestCase
):
...
...
@@ -57,7 +58,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
def
testDecodeJpegImage
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
decoded_jpeg
=
self
.
_DecodeImage
(
encoded_jpeg
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
...
@@ -78,7 +79,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
def
testDecodeImageKeyAndFilename
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
...
...
@@ -96,7 +97,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertEqual
(
'filename'
,
tensor_dict
[
fields
.
InputDataFields
.
filename
])
def
testDecodePngImage
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_png
=
self
.
_EncodeImage
(
image_tensor
,
encoding_type
=
'png'
)
decoded_png
=
self
.
_DecodeImage
(
encoded_png
,
encoding_type
=
'png'
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
...
@@ -116,8 +117,62 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
(
decoded_png
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
def
testDecodePngInstanceMasks
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
10
,
10
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
mask_1
=
np
.
random
.
randint
(
0
,
2
,
size
=
(
10
,
10
,
1
)).
astype
(
np
.
uint8
)
mask_2
=
np
.
random
.
randint
(
0
,
2
,
size
=
(
10
,
10
,
1
)).
astype
(
np
.
uint8
)
encoded_png_1
=
self
.
_EncodeImage
(
mask_1
,
encoding_type
=
'png'
)
decoded_png_1
=
np
.
squeeze
(
mask_1
.
astype
(
np
.
float32
))
encoded_png_2
=
self
.
_EncodeImage
(
mask_2
,
encoding_type
=
'png'
)
decoded_png_2
=
np
.
squeeze
(
mask_2
.
astype
(
np
.
float32
))
encoded_masks
=
[
encoded_png_1
,
encoded_png_2
]
decoded_masks
=
np
.
stack
([
decoded_png_1
,
decoded_png_2
])
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/mask'
:
self
.
_BytesFeature
(
encoded_masks
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
load_instance_masks
=
True
,
instance_mask_type
=
input_reader_pb2
.
PNG_MASKS
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
decoded_masks
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
])
def
testDecodeEmptyPngInstanceMasks
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
10
,
10
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_masks
=
[]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/mask'
:
self
.
_BytesFeature
(
encoded_masks
),
'image/height'
:
self
.
_Int64Feature
([
10
]),
'image/width'
:
self
.
_Int64Feature
([
10
]),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
load_instance_masks
=
True
,
instance_mask_type
=
input_reader_pb2
.
PNG_MASKS
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
].
shape
,
[
0
,
10
,
10
])
def
testDecodeBoundingBox
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_ymins
=
[
0.0
,
4.0
]
bbox_xmins
=
[
1.0
,
5.0
]
...
...
@@ -144,9 +199,39 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
self
.
assertAllEqual
(
expected_boxes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
self
.
assertAllEqual
(
2
,
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
])
def
testDecodeDefaultGroundtruthWeights
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_ymins
=
[
0.0
,
4.0
]
bbox_xmins
=
[
1.0
,
5.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
].
get_shape
().
as_list
()),
[
None
,
4
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllClose
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
],
np
.
ones
(
2
,
dtype
=
np
.
float32
))
def
testDecodeObjectLabel
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
...
@@ -168,8 +253,131 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
(
bbox_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelNoText
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes
=
[
1
,
2
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/class/label'
:
self
.
_Int64Feature
(
bbox_classes
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:1
name:'cat'
}
item {
id:2
name:'dog'
}
"""
label_map_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'label_map.pbtxt'
)
with
tf
.
gfile
.
Open
(
label_map_path
,
'wb'
)
as
f
:
f
.
write
(
label_map_string
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
].
get_shape
().
as_list
()),
[
None
])
init
=
tf
.
tables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
bbox_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelUnrecognizedName
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes_text
=
[
'cat'
,
'cheetah'
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/class/text'
:
self
.
_BytesFeature
(
bbox_classes_text
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:2
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'label_map.pbtxt'
)
with
tf
.
gfile
.
Open
(
label_map_path
,
'wb'
)
as
f
:
f
.
write
(
label_map_string
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
tables_initializer
())
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
([
2
,
-
1
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelWithMapping
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes_text
=
[
'cat'
,
'dog'
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/class/text'
:
self
.
_BytesFeature
(
bbox_classes_text
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:3
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'label_map.pbtxt'
)
with
tf
.
gfile
.
Open
(
label_map_path
,
'wb'
)
as
f
:
f
.
write
(
label_map_string
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
tables_initializer
())
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
([
3
,
1
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectArea
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_area
=
[
100.
,
174.
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
...
@@ -190,7 +398,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_area
])
def
testDecodeObjectIsCrowd
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_is_crowd
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
...
@@ -213,7 +421,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_is_crowd
])
def
testDecodeObjectDifficult
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_difficult
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
...
@@ -236,7 +444,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_difficult
])
def
testDecodeObjectGroupOf
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_group_of
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
...
...
@@ -259,13 +467,37 @@ class TfExampleDecoderTest(tf.test.TestCase):
[
bool
(
item
)
for
item
in
object_group_of
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_group_of
])
def
testDecodeObjectWeight
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_weights
=
[
0.75
,
1.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/weight'
:
self
.
_FloatFeature
(
object_weights
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
].
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
object_weights
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
])
def
testDecodeInstanceSegmentation
(
self
):
num_instances
=
4
image_height
=
5
image_width
=
3
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
image_height
,
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
...
...
@@ -316,7 +548,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_height
=
5
image_width
=
3
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
image_height
,
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
image_height
,
image_width
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
...
...
research/object_detection/dataset_tools/BUILD
View file @
fd7b6887
# Tensorflow Object Detection API:
main runnable
s.
# Tensorflow Object Detection API:
dataset tool
s.
package
(
default_visibility
=
[
"//visibility:public"
],
...
...
@@ -8,18 +8,43 @@ licenses(["notice"])
# Apache 2.0
py_binary
(
name
=
"create_coco_tf_record"
,
srcs
=
[
"create_coco_tf_record.py"
,
],
deps
=
[
"//PIL:pil"
,
"//pycocotools"
,
"//tensorflow"
,
"//tensorflow/models/research/object_detection/utils:dataset_util"
,
"//tensorflow/models/research/object_detection/utils:label_map_util"
,
],
)
py_test
(
name
=
"create_coco_tf_record_test"
,
srcs
=
[
"create_coco_tf_record_test.py"
,
],
deps
=
[
":create_coco_tf_record"
,
"//tensorflow"
,
],
)
py_binary
(
name
=
"create_kitti_tf_record"
,
srcs
=
[
"create_kitti_tf_record.py"
,
],
deps
=
[
"//
third_party/py/
PIL:pil"
,
"//
third_party/py/
lxml"
,
"//PIL:pil"
,
"//lxml"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:dataset_util"
,
"//tensorflow
_
models/object_detection/utils:label_map_util"
,
"//tensorflow
_
models/object_detection/utils:np_box_ops"
,
"//tensorflow
/
models/
research/
object_detection/utils:dataset_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:label_map_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:np_box_ops"
,
],
)
...
...
@@ -40,11 +65,11 @@ py_binary(
"create_pascal_tf_record.py"
,
],
deps
=
[
"//
third_party/py/
PIL:pil"
,
"//
third_party/py/
lxml"
,
"//PIL:pil"
,
"//lxml"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:dataset_util"
,
"//tensorflow
_
models/object_detection/utils:label_map_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:dataset_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:label_map_util"
,
],
)
...
...
@@ -65,11 +90,11 @@ py_binary(
"create_pet_tf_record.py"
,
],
deps
=
[
"//
third_party/py/
PIL:pil"
,
"//
third_party/py/
lxml"
,
"//PIL:pil"
,
"//lxml"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:dataset_util"
,
"//tensorflow
_
models/object_detection/utils:label_map_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:dataset_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:label_map_util"
,
],
)
...
...
@@ -78,8 +103,8 @@ py_library(
srcs
=
[
"oid_tfrecord_creation.py"
],
deps
=
[
"//tensorflow"
,
"//tensorflow
_
models/object_detection/core:standard_fields"
,
"//tensorflow
_
models/object_detection/utils:dataset_util"
,
"//tensorflow
/
models/
research/
object_detection/core:standard_fields"
,
"//tensorflow
/
models/
research/
object_detection/utils:dataset_util"
,
],
)
...
...
@@ -88,9 +113,9 @@ py_test(
srcs
=
[
"oid_tfrecord_creation_test.py"
],
deps
=
[
":oid_tfrecord_creation"
,
"//
third_party/py/
contextlib2"
,
"//
third_party/py/
pandas"
,
"//
third_party/py/
tensorflow"
,
"//contextlib2"
,
"//pandas"
,
"//tensorflow"
,
],
)
...
...
@@ -99,9 +124,9 @@ py_binary(
srcs
=
[
"create_oid_tf_record.py"
],
deps
=
[
":oid_tfrecord_creation"
,
"//
third_party/py/
contextlib2"
,
"//
third_party/py/
pandas"
,
"//contextlib2"
,
"//pandas"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:label_map_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:label_map_util"
,
],
)
research/object_detection/dataset_tools/__init__.py
View file @
fd7b6887
research/object_detection/dataset_tools/create_coco_tf_record.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
hashlib
import
io
import
json
import
os
import
numpy
as
np
import
PIL.Image
from
pycocotools
import
mask
import
tensorflow
as
tf
from
object_detection.utils
import
dataset_util
from
object_detection.utils
import
label_map_util
flags
=
tf
.
app
.
flags
tf
.
flags
.
DEFINE_boolean
(
'include_masks'
,
False
,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.'
)
tf
.
flags
.
DEFINE_string
(
'train_image_dir'
,
''
,
'Training image directory.'
)
tf
.
flags
.
DEFINE_string
(
'val_image_dir'
,
''
,
'Validation image directory.'
)
tf
.
flags
.
DEFINE_string
(
'test_image_dir'
,
''
,
'Test image directory.'
)
tf
.
flags
.
DEFINE_string
(
'train_annotations_file'
,
''
,
'Training annotations JSON file.'
)
tf
.
flags
.
DEFINE_string
(
'val_annotations_file'
,
''
,
'Validation annotations JSON file.'
)
tf
.
flags
.
DEFINE_string
(
'testdev_annotations_file'
,
''
,
'Test-dev annotations JSON file.'
)
tf
.
flags
.
DEFINE_string
(
'output_dir'
,
'/tmp/'
,
'Output data directory.'
)
FLAGS
=
flags
.
FLAGS
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
def
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
False
):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
annotations_list:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height
=
image
[
'height'
]
image_width
=
image
[
'width'
]
filename
=
image
[
'file_name'
]
image_id
=
image
[
'id'
]
full_path
=
os
.
path
.
join
(
image_dir
,
filename
)
with
tf
.
gfile
.
GFile
(
full_path
,
'rb'
)
as
fid
:
encoded_jpg
=
fid
.
read
()
encoded_jpg_io
=
io
.
BytesIO
(
encoded_jpg
)
image
=
PIL
.
Image
.
open
(
encoded_jpg_io
)
key
=
hashlib
.
sha256
(
encoded_jpg
).
hexdigest
()
xmin
=
[]
xmax
=
[]
ymin
=
[]
ymax
=
[]
is_crowd
=
[]
category_names
=
[]
category_ids
=
[]
area
=
[]
encoded_mask_png
=
[]
num_annotations_skipped
=
0
for
object_annotations
in
annotations_list
:
(
x
,
y
,
width
,
height
)
=
tuple
(
object_annotations
[
'bbox'
])
if
width
<=
0
or
height
<=
0
:
num_annotations_skipped
+=
1
continue
if
x
+
width
>
image_width
or
y
+
height
>
image_height
:
num_annotations_skipped
+=
1
continue
xmin
.
append
(
float
(
x
)
/
image_width
)
xmax
.
append
(
float
(
x
+
width
)
/
image_width
)
ymin
.
append
(
float
(
y
)
/
image_height
)
ymax
.
append
(
float
(
y
+
height
)
/
image_height
)
is_crowd
.
append
(
object_annotations
[
'iscrowd'
])
category_id
=
int
(
object_annotations
[
'category_id'
])
category_ids
.
append
(
category_id
)
category_names
.
append
(
category_index
[
category_id
][
'name'
].
encode
(
'utf8'
))
area
.
append
(
object_annotations
[
'area'
])
if
include_masks
:
run_len_encoding
=
mask
.
frPyObjects
(
object_annotations
[
'segmentation'
],
image_height
,
image_width
)
binary_mask
=
mask
.
decode
(
run_len_encoding
)
if
not
object_annotations
[
'iscrowd'
]:
binary_mask
=
np
.
amax
(
binary_mask
,
axis
=
2
)
pil_image
=
PIL
.
Image
.
fromarray
(
binary_mask
)
output_io
=
io
.
BytesIO
()
pil_image
.
save
(
output_io
,
format
=
'PNG'
)
encoded_mask_png
.
append
(
output_io
.
getvalue
())
feature_dict
=
{
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
'image/width'
:
dataset_util
.
int64_feature
(
image_width
),
'image/filename'
:
dataset_util
.
bytes_feature
(
filename
.
encode
(
'utf8'
)),
'image/source_id'
:
dataset_util
.
bytes_feature
(
str
(
image_id
).
encode
(
'utf8'
)),
'image/key/sha256'
:
dataset_util
.
bytes_feature
(
key
.
encode
(
'utf8'
)),
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
.
encode
(
'utf8'
)),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
xmin
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
xmax
),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
ymin
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
ymax
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
category_ids
),
'image/object/is_crowd'
:
dataset_util
.
int64_list_feature
(
is_crowd
),
'image/object/area'
:
dataset_util
.
float_list_feature
(
area
),
}
if
include_masks
:
feature_dict
[
'image/object/mask'
]
=
(
dataset_util
.
bytes_list_feature
(
encoded_mask_png
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature_dict
))
return
key
,
example
,
num_annotations_skipped
def
_create_tf_record_from_coco_annotations
(
annotations_file
,
image_dir
,
output_path
,
include_masks
):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
"""
with
tf
.
gfile
.
GFile
(
annotations_file
,
'r'
)
as
fid
:
groundtruth_data
=
json
.
load
(
fid
)
images
=
groundtruth_data
[
'images'
]
category_index
=
label_map_util
.
create_category_index
(
groundtruth_data
[
'categories'
])
annotations_index
=
{}
if
'annotations'
in
groundtruth_data
:
tf
.
logging
.
info
(
'Found groundtruth annotations. Building annotations index.'
)
for
annotation
in
groundtruth_data
[
'annotations'
]:
image_id
=
annotation
[
'image_id'
]
if
image_id
not
in
annotations_index
:
annotations_index
[
image_id
]
=
[]
annotations_index
[
image_id
].
append
(
annotation
)
missing_annotation_count
=
0
for
image
in
images
:
image_id
=
image
[
'id'
]
if
image_id
not
in
annotations_index
:
missing_annotation_count
+=
1
annotations_index
[
image_id
]
=
[]
tf
.
logging
.
info
(
'%d images are missing annotations.'
,
missing_annotation_count
)
tf
.
logging
.
info
(
'writing to output path: %s'
,
output_path
)
writer
=
tf
.
python_io
.
TFRecordWriter
(
output_path
)
total_num_annotations_skipped
=
0
for
idx
,
image
in
enumerate
(
images
):
if
idx
%
100
==
0
:
tf
.
logging
.
info
(
'On image %d of %d'
,
idx
,
len
(
images
))
annotations_list
=
annotations_index
[
image
[
'id'
]]
_
,
tf_example
,
num_annotations_skipped
=
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
)
total_num_annotations_skipped
+=
num_annotations_skipped
writer
.
write
(
tf_example
.
SerializeToString
())
writer
.
close
()
tf
.
logging
.
info
(
'Finished writing, skipped %d annotations.'
,
total_num_annotations_skipped
)
def
main
(
_
):
assert
FLAGS
.
train_image_dir
,
'`train_image_dir` missing.'
assert
FLAGS
.
val_image_dir
,
'`val_image_dir` missing.'
assert
FLAGS
.
test_image_dir
,
'`test_image_dir` missing.'
assert
FLAGS
.
train_annotations_file
,
'`train_annotations_file` missing.'
assert
FLAGS
.
val_annotations_file
,
'`val_annotations_file` missing.'
assert
FLAGS
.
testdev_annotations_file
,
'`testdev_annotations_file` missing.'
if
not
tf
.
gfile
.
IsDirectory
(
FLAGS
.
output_dir
):
tf
.
gfile
.
MakeDirs
(
FLAGS
.
output_dir
)
train_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'coco_train.record'
)
val_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'coco_val.record'
)
testdev_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'coco_testdev.record'
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
train_annotations_file
,
FLAGS
.
train_image_dir
,
train_output_path
,
FLAGS
.
include_masks
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
val_annotations_file
,
FLAGS
.
val_image_dir
,
val_output_path
,
FLAGS
.
include_masks
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
testdev_annotations_file
,
FLAGS
.
test_image_dir
,
testdev_output_path
,
FLAGS
.
include_masks
)
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
research/object_detection/dataset_tools/create_coco_tf_record_test.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_coco_tf_record.py."""
import
io
import
os
import
numpy
as
np
import
PIL.Image
import
tensorflow
as
tf
from
object_detection.dataset_tools
import
create_coco_tf_record
class
CreateCocoTFRecordTest
(
tf
.
test
.
TestCase
):
def
_assertProtoEqual
(
self
,
proto_field
,
expectation
):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list
=
[
p
for
p
in
proto_field
]
self
.
assertListEqual
(
proto_list
,
expectation
)
def
test_create_tf_example
(
self
):
image_file_name
=
'tmp_image.jpg'
image_data
=
np
.
random
.
rand
(
256
,
256
,
3
)
tmp_dir
=
self
.
get_temp_dir
()
save_path
=
os
.
path
.
join
(
tmp_dir
,
image_file_name
)
image
=
PIL
.
Image
.
fromarray
(
image_data
,
'RGB'
)
image
.
save
(
save_path
)
image
=
{
'file_name'
:
image_file_name
,
'height'
:
256
,
'width'
:
256
,
'id'
:
11
,
}
annotations_list
=
[{
'area'
:
.
5
,
'iscrowd'
:
False
,
'image_id'
:
11
,
'bbox'
:
[
64
,
64
,
128
,
128
],
'category_id'
:
2
,
'id'
:
1000
,
}]
image_dir
=
tmp_dir
category_index
=
{
1
:
{
'name'
:
'dog'
,
'id'
:
1
},
2
:
{
'name'
:
'cat'
,
'id'
:
2
},
3
:
{
'name'
:
'human'
,
'id'
:
3
}
}
(
_
,
example
,
num_annotations_skipped
)
=
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
)
self
.
assertEqual
(
num_annotations_skipped
,
0
)
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/height'
].
int64_list
.
value
,
[
256
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/width'
].
int64_list
.
value
,
[
256
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/filename'
].
bytes_list
.
value
,
[
image_file_name
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/source_id'
].
bytes_list
.
value
,
[
str
(
image
[
'id'
])])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/format'
].
bytes_list
.
value
,
[
'jpeg'
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmin'
].
float_list
.
value
,
[
0.25
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymin'
].
float_list
.
value
,
[
0.25
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmax'
].
float_list
.
value
,
[
0.75
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
0.75
])
def
test_create_tf_example_with_instance_masks
(
self
):
image_file_name
=
'tmp_image.jpg'
image_data
=
np
.
random
.
rand
(
8
,
8
,
3
)
tmp_dir
=
self
.
get_temp_dir
()
save_path
=
os
.
path
.
join
(
tmp_dir
,
image_file_name
)
image
=
PIL
.
Image
.
fromarray
(
image_data
,
'RGB'
)
image
.
save
(
save_path
)
image
=
{
'file_name'
:
image_file_name
,
'height'
:
8
,
'width'
:
8
,
'id'
:
11
,
}
annotations_list
=
[{
'area'
:
.
5
,
'iscrowd'
:
False
,
'image_id'
:
11
,
'bbox'
:
[
0
,
0
,
8
,
8
],
'segmentation'
:
[[
4
,
0
,
0
,
0
,
0
,
4
],
[
8
,
4
,
4
,
8
,
8
,
8
]],
'category_id'
:
1
,
'id'
:
1000
,
}]
image_dir
=
tmp_dir
category_index
=
{
1
:
{
'name'
:
'dog'
,
'id'
:
1
},
}
(
_
,
example
,
num_annotations_skipped
)
=
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
True
)
self
.
assertEqual
(
num_annotations_skipped
,
0
)
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/height'
].
int64_list
.
value
,
[
8
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/width'
].
int64_list
.
value
,
[
8
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/filename'
].
bytes_list
.
value
,
[
image_file_name
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/source_id'
].
bytes_list
.
value
,
[
str
(
image
[
'id'
])])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/format'
].
bytes_list
.
value
,
[
'jpeg'
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmin'
].
float_list
.
value
,
[
0
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymin'
].
float_list
.
value
,
[
0
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmax'
].
float_list
.
value
,
[
1
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
1
])
encoded_mask_pngs
=
[
io
.
BytesIO
(
encoded_masks
)
for
encoded_masks
in
example
.
features
.
feature
[
'image/object/mask'
].
bytes_list
.
value
]
pil_masks
=
[
np
.
array
(
PIL
.
Image
.
open
(
encoded_mask_png
))
for
encoded_mask_png
in
encoded_mask_pngs
]
self
.
assertTrue
(
len
(
pil_masks
)
==
1
)
self
.
assertAllEqual
(
pil_masks
[
0
],
[[
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
],
[
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
],
[
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
],
[
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
]])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/dataset_tools/create_kitti_tf_record.py
View file @
fd7b6887
...
...
@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
# Filter all bounding boxes of this frame that are of a legal class, and
# don't overlap with a dontcare region.
# TODO
(talremez)
filter out targets that are truncated or heavily occluded.
# TODO filter out targets that are truncated or heavily occluded.
annotation_for_image
=
filter_annotations
(
img_anno
,
classes_to_use
)
example
=
prepare_example
(
image_path
,
annotation_for_image
,
label_map_dict
)
...
...
research/object_detection/dataset_tools/create_kitti_tf_record_test.py
View file @
fd7b6887
...
...
@@ -24,7 +24,7 @@ import tensorflow as tf
from
object_detection.dataset_tools
import
create_kitti_tf_record
class
DictToTFExample
Test
(
tf
.
test
.
TestCase
):
class
CreateKittiTFRecord
Test
(
tf
.
test
.
TestCase
):
def
_assertProtoEqual
(
self
,
proto_field
,
expectation
):
"""Helper function to assert if a proto field equals some value.
...
...
Prev
1
2
3
4
5
6
7
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment