Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
fd7b6887
Unverified
Commit
fd7b6887
authored
Feb 09, 2018
by
Jonathan Huang
Committed by
GitHub
Feb 09, 2018
Browse files
Merge pull request #3293 from pkulzc/master
Internal changes of object_detection
parents
f98ec55e
1efe98bb
Changes
200
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2749 additions
and
717 deletions
+2749
-717
research/object_detection/core/matcher_test.py
research/object_detection/core/matcher_test.py
+43
-1
research/object_detection/core/model.py
research/object_detection/core/model.py
+43
-10
research/object_detection/core/post_processing.py
research/object_detection/core/post_processing.py
+39
-9
research/object_detection/core/post_processing_test.py
research/object_detection/core/post_processing_test.py
+119
-0
research/object_detection/core/preprocessor.py
research/object_detection/core/preprocessor.py
+548
-111
research/object_detection/core/preprocessor_cache.py
research/object_detection/core/preprocessor_cache.py
+102
-0
research/object_detection/core/preprocessor_test.py
research/object_detection/core/preprocessor_test.py
+292
-10
research/object_detection/core/standard_fields.py
research/object_detection/core/standard_fields.py
+10
-1
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+89
-86
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+594
-436
research/object_detection/data/BUILD
research/object_detection/data/BUILD
+9
-0
research/object_detection/data_decoders/BUILD
research/object_detection/data_decoders/BUILD
+6
-4
research/object_detection/data_decoders/tf_example_decoder.py
...arch/object_detection/data_decoders/tf_example_decoder.py
+102
-13
research/object_detection/data_decoders/tf_example_decoder_test.py
...object_detection/data_decoders/tf_example_decoder_test.py
+243
-11
research/object_detection/dataset_tools/BUILD
research/object_detection/dataset_tools/BUILD
+47
-22
research/object_detection/dataset_tools/__init__.py
research/object_detection/dataset_tools/__init__.py
+0
-1
research/object_detection/dataset_tools/create_coco_tf_record.py
...h/object_detection/dataset_tools/create_coco_tf_record.py
+273
-0
research/object_detection/dataset_tools/create_coco_tf_record_test.py
...ect_detection/dataset_tools/create_coco_tf_record_test.py
+188
-0
research/object_detection/dataset_tools/create_kitti_tf_record.py
.../object_detection/dataset_tools/create_kitti_tf_record.py
+1
-1
research/object_detection/dataset_tools/create_kitti_tf_record_test.py
...ct_detection/dataset_tools/create_kitti_tf_record_test.py
+1
-1
No files found.
research/object_detection/core/matcher_test.py
View file @
fd7b6887
...
@@ -20,7 +20,7 @@ import tensorflow as tf
...
@@ -20,7 +20,7 @@ import tensorflow as tf
from
object_detection.core
import
matcher
from
object_detection.core
import
matcher
class
Anchor
Match
er
Test
(
tf
.
test
.
TestCase
):
class
MatchTest
(
tf
.
test
.
TestCase
):
def
test_get_correct_matched_columnIndices
(
self
):
def
test_get_correct_matched_columnIndices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
...
@@ -145,6 +145,48 @@ class AnchorMatcherTest(tf.test.TestCase):
...
@@ -145,6 +145,48 @@ class AnchorMatcherTest(tf.test.TestCase):
self
.
assertAllEqual
(
all_indices_sorted
,
self
.
assertAllEqual
(
all_indices_sorted
,
np
.
arange
(
num_matches
,
dtype
=
np
.
int32
))
np
.
arange
(
num_matches
,
dtype
=
np
.
int32
))
def
test_scalar_gather_based_on_match
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
input_tensor
=
tf
.
constant
([
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
],
dtype
=
tf
.
float32
)
expected_gathered_tensor
=
[
3
,
1
,
100
,
0
,
100
,
5
,
200
]
match
=
matcher
.
Match
(
match_results
)
gathered_tensor
=
match
.
gather_based_on_match
(
input_tensor
,
unmatched_value
=
100.
,
ignored_value
=
200.
)
self
.
assertEquals
(
gathered_tensor
.
dtype
,
tf
.
float32
)
with
self
.
test_session
():
gathered_tensor_out
=
gathered_tensor
.
eval
()
self
.
assertAllEqual
(
expected_gathered_tensor
,
gathered_tensor_out
)
def
test_multidimensional_gather_based_on_match
(
self
):
match_results
=
tf
.
constant
([
1
,
-
1
,
-
2
])
input_tensor
=
tf
.
constant
([[
0
,
0.5
,
0
,
0.5
],
[
0
,
0
,
0.5
,
0.5
]],
dtype
=
tf
.
float32
)
expected_gathered_tensor
=
[[
0
,
0
,
0.5
,
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
match
=
matcher
.
Match
(
match_results
)
gathered_tensor
=
match
.
gather_based_on_match
(
input_tensor
,
unmatched_value
=
tf
.
zeros
(
4
),
ignored_value
=
tf
.
zeros
(
4
))
self
.
assertEquals
(
gathered_tensor
.
dtype
,
tf
.
float32
)
with
self
.
test_session
():
gathered_tensor_out
=
gathered_tensor
.
eval
()
self
.
assertAllEqual
(
expected_gathered_tensor
,
gathered_tensor_out
)
def
test_multidimensional_gather_based_on_match_with_matmul_gather_op
(
self
):
match_results
=
tf
.
constant
([
1
,
-
1
,
-
2
])
input_tensor
=
tf
.
constant
([[
0
,
0.5
,
0
,
0.5
],
[
0
,
0
,
0.5
,
0.5
]],
dtype
=
tf
.
float32
)
expected_gathered_tensor
=
[[
0
,
0
,
0.5
,
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
match
=
matcher
.
Match
(
match_results
,
use_matmul_gather
=
True
)
gathered_tensor
=
match
.
gather_based_on_match
(
input_tensor
,
unmatched_value
=
tf
.
zeros
(
4
),
ignored_value
=
tf
.
zeros
(
4
))
self
.
assertEquals
(
gathered_tensor
.
dtype
,
tf
.
float32
)
with
self
.
test_session
()
as
sess
:
self
.
assertTrue
(
all
([
op
.
name
is
not
'Gather'
for
op
in
sess
.
graph
.
get_operations
()]))
gathered_tensor_out
=
gathered_tensor
.
eval
()
self
.
assertAllEqual
(
expected_gathered_tensor
,
gathered_tensor_out
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/core/model.py
View file @
fd7b6887
...
@@ -39,6 +39,17 @@ resize/reshaping necessary (see docstring for the preprocess function).
...
@@ -39,6 +39,17 @@ resize/reshaping necessary (see docstring for the preprocess function).
Output classes are always integers in the range [0, num_classes). Any mapping
Output classes are always integers in the range [0, num_classes). Any mapping
of these integers to semantic labels is to be handled outside of this class.
of these integers to semantic labels is to be handled outside of this class.
Images are resized in the `preprocess` method. All of `preprocess`, `predict`,
and `postprocess` should be reentrant.
The `preprocess` method runs `image_resizer_fn` that returns resized_images and
`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros,
true_image_shapes indicate the slices that contain the image without padding.
This is useful for padding images to be a fixed size for batching.
The `postprocess` method uses the true image shapes to clip predictions that lie
outside of images.
By default, DetectionModels produce bounding box detections; However, we support
By default, DetectionModels produce bounding box detections; However, we support
a handful of auxiliary annotations associated with each bounding box, namely,
a handful of auxiliary annotations associated with each bounding box, namely,
instance masks and keypoints.
instance masks and keypoints.
...
@@ -106,12 +117,12 @@ class DetectionModel(object):
...
@@ -106,12 +117,12 @@ class DetectionModel(object):
This function is responsible for any scaling/shifting of input values that
This function is responsible for any scaling/shifting of input values that
is necessary prior to running the detector on an input image.
is necessary prior to running the detector on an input image.
It is also responsible for any resizing that might be necessary
as images
It is also responsible for any resizing
, padding
that might be necessary
are assumed to arrive in arbitrary sizes. While this function
could
as images
are assumed to arrive in arbitrary sizes. While this function
conceivably be part of the predict method (below), it is often
convenient
could
conceivably be part of the predict method (below), it is often
to keep these separate --- for example, we may want to preprocess
on one
convenient
to keep these separate --- for example, we may want to preprocess
device, place onto a queue, and let another device (e.g., the GPU)
handle
on one
device, place onto a queue, and let another device (e.g., the GPU)
prediction.
handle
prediction.
A few important notes about the preprocess function:
A few important notes about the preprocess function:
+ We assume that this operation does not have any trainable variables nor
+ We assume that this operation does not have any trainable variables nor
...
@@ -134,11 +145,15 @@ class DetectionModel(object):
...
@@ -134,11 +145,15 @@ class DetectionModel(object):
Returns:
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float32
preprocessed_inputs: a [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
tensor representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
"""
"""
pass
pass
@
abstractmethod
@
abstractmethod
def
predict
(
self
,
preprocessed_inputs
):
def
predict
(
self
,
preprocessed_inputs
,
true_image_shapes
):
"""Predict prediction tensors from inputs tensor.
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
Outputs of this function can be passed to loss or postprocess functions.
...
@@ -146,6 +161,10 @@ class DetectionModel(object):
...
@@ -146,6 +161,10 @@ class DetectionModel(object):
Args:
Args:
preprocessed_inputs: a [batch, height, width, channels] float32 tensor
preprocessed_inputs: a [batch, height, width, channels] float32 tensor
representing a batch of images.
representing a batch of images.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
Returns:
prediction_dict: a dictionary holding prediction tensors to be
prediction_dict: a dictionary holding prediction tensors to be
...
@@ -154,7 +173,7 @@ class DetectionModel(object):
...
@@ -154,7 +173,7 @@ class DetectionModel(object):
pass
pass
@
abstractmethod
@
abstractmethod
def
postprocess
(
self
,
prediction_dict
,
**
params
):
def
postprocess
(
self
,
prediction_dict
,
true_image_shapes
,
**
params
):
"""Convert predicted output tensors to final detections.
"""Convert predicted output tensors to final detections.
Outputs adhere to the following conventions:
Outputs adhere to the following conventions:
...
@@ -172,6 +191,10 @@ class DetectionModel(object):
...
@@ -172,6 +191,10 @@ class DetectionModel(object):
Args:
Args:
prediction_dict: a dictionary holding prediction tensors.
prediction_dict: a dictionary holding prediction tensors.
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
**params: Additional keyword arguments for specific implementations of
**params: Additional keyword arguments for specific implementations of
DetectionModel.
DetectionModel.
...
@@ -190,7 +213,7 @@ class DetectionModel(object):
...
@@ -190,7 +213,7 @@ class DetectionModel(object):
pass
pass
@
abstractmethod
@
abstractmethod
def
loss
(
self
,
prediction_dict
):
def
loss
(
self
,
prediction_dict
,
true_image_shapes
):
"""Compute scalar loss tensors with respect to provided groundtruth.
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
Calling this function requires that groundtruth tensors have been
...
@@ -198,6 +221,10 @@ class DetectionModel(object):
...
@@ -198,6 +221,10 @@ class DetectionModel(object):
Args:
Args:
prediction_dict: a dictionary holding predicted tensors
prediction_dict: a dictionary holding predicted tensors
true_image_shapes: int32 tensor of shape [batch, 3] where each row is
of the form [height, width, channels] indicating the shapes
of true images in the resized images, as resized images can be padded
with zeros.
Returns:
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
a dictionary mapping strings (loss names) to scalar tensors representing
...
@@ -209,7 +236,8 @@ class DetectionModel(object):
...
@@ -209,7 +236,8 @@ class DetectionModel(object):
groundtruth_boxes_list
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
=
None
,
groundtruth_masks_list
=
None
,
groundtruth_keypoints_list
=
None
):
groundtruth_keypoints_list
=
None
,
groundtruth_weights_list
=
None
):
"""Provide groundtruth tensors.
"""Provide groundtruth tensors.
Args:
Args:
...
@@ -230,10 +258,15 @@ class DetectionModel(object):
...
@@ -230,10 +258,15 @@ class DetectionModel(object):
shape [num_boxes, num_keypoints, 2] containing keypoints.
shape [num_boxes, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
missing keypoints should be encoded as NaN.
groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
"""
"""
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
boxes
]
=
groundtruth_boxes_list
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
boxes
]
=
groundtruth_boxes_list
self
.
_groundtruth_lists
[
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
classes
]
=
groundtruth_classes_list
fields
.
BoxListFields
.
classes
]
=
groundtruth_classes_list
if
groundtruth_weights_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
weights
]
=
groundtruth_weights_list
if
groundtruth_masks_list
:
if
groundtruth_masks_list
:
self
.
_groundtruth_lists
[
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
masks
]
=
groundtruth_masks_list
fields
.
BoxListFields
.
masks
]
=
groundtruth_masks_list
...
...
research/object_detection/core/post_processing.py
View file @
fd7b6887
...
@@ -20,6 +20,7 @@ import tensorflow as tf
...
@@ -20,6 +20,7 @@ import tensorflow as tf
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
shape_utils
def
multiclass_non_max_suppression
(
boxes
,
def
multiclass_non_max_suppression
(
boxes
,
...
@@ -31,6 +32,7 @@ def multiclass_non_max_suppression(boxes,
...
@@ -31,6 +32,7 @@ def multiclass_non_max_suppression(boxes,
clip_window
=
None
,
clip_window
=
None
,
change_coordinate_frame
=
False
,
change_coordinate_frame
=
False
,
masks
=
None
,
masks
=
None
,
boundaries
=
None
,
additional_fields
=
None
,
additional_fields
=
None
,
scope
=
None
):
scope
=
None
):
"""Multi-class version of non maximum suppression.
"""Multi-class version of non maximum suppression.
...
@@ -66,6 +68,9 @@ def multiclass_non_max_suppression(boxes,
...
@@ -66,6 +68,9 @@ def multiclass_non_max_suppression(boxes,
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
containing box masks. `q` can be either number of classes or 1 depending
containing box masks. `q` can be either number of classes or 1 depending
on whether a separate mask is predicted per class.
on whether a separate mask is predicted per class.
boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
tensor containing box boundaries. `q` can be either number of classes or 1
depending on whether a separate boundary is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose first dimensions are all of size `k`. After non-maximum
tensors whose first dimensions are all of size `k`. After non-maximum
suppression, all tensors corresponding to the selected boxes will be
suppression, all tensors corresponding to the selected boxes will be
...
@@ -114,6 +119,8 @@ def multiclass_non_max_suppression(boxes,
...
@@ -114,6 +119,8 @@ def multiclass_non_max_suppression(boxes,
per_class_boxes_list
=
tf
.
unstack
(
boxes
,
axis
=
1
)
per_class_boxes_list
=
tf
.
unstack
(
boxes
,
axis
=
1
)
if
masks
is
not
None
:
if
masks
is
not
None
:
per_class_masks_list
=
tf
.
unstack
(
masks
,
axis
=
1
)
per_class_masks_list
=
tf
.
unstack
(
masks
,
axis
=
1
)
if
boundaries
is
not
None
:
per_class_boundaries_list
=
tf
.
unstack
(
boundaries
,
axis
=
1
)
boxes_ids
=
(
range
(
num_classes
)
if
len
(
per_class_boxes_list
)
>
1
boxes_ids
=
(
range
(
num_classes
)
if
len
(
per_class_boxes_list
)
>
1
else
[
0
]
*
num_classes
)
else
[
0
]
*
num_classes
)
for
class_idx
,
boxes_idx
in
zip
(
range
(
num_classes
),
boxes_ids
):
for
class_idx
,
boxes_idx
in
zip
(
range
(
num_classes
),
boxes_ids
):
...
@@ -128,6 +135,10 @@ def multiclass_non_max_suppression(boxes,
...
@@ -128,6 +135,10 @@ def multiclass_non_max_suppression(boxes,
per_class_masks
=
per_class_masks_list
[
boxes_idx
]
per_class_masks
=
per_class_masks_list
[
boxes_idx
]
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
masks
,
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
masks
,
per_class_masks
)
per_class_masks
)
if
boundaries
is
not
None
:
per_class_boundaries
=
per_class_boundaries_list
[
boxes_idx
]
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
boundaries
,
per_class_boundaries
)
if
additional_fields
is
not
None
:
if
additional_fields
is
not
None
:
for
key
,
tensor
in
additional_fields
.
items
():
for
key
,
tensor
in
additional_fields
.
items
():
boxlist_and_class_scores
.
add_field
(
key
,
tensor
)
boxlist_and_class_scores
.
add_field
(
key
,
tensor
)
...
@@ -194,9 +205,12 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -194,9 +205,12 @@ def batch_multiclass_non_max_suppression(boxes,
max_size_per_class: maximum number of retained boxes per class.
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
clip_window: A float32 tensor of shape [batch_size, 4] where each entry is
representing the window to clip boxes to before performing non-max
of the form [y_min, x_min, y_max, x_max] representing the window to clip
suppression.
boxes to before performing non-max suppression. This argument can also be
a tensor of shape [4] in which case, the same clip window is applied to
all images in the batch. If clip_widow is None, all boxes are used to
perform non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
relative to clip_window (this can only be set to True if a clip_window
is provided)
is provided)
...
@@ -242,7 +256,9 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -242,7 +256,9 @@ def batch_multiclass_non_max_suppression(boxes,
if
q
!=
1
and
q
!=
num_classes
:
if
q
!=
1
and
q
!=
num_classes
:
raise
ValueError
(
'third dimension of boxes must be either 1 or equal '
raise
ValueError
(
'third dimension of boxes must be either 1 or equal '
'to the third dimension of scores'
)
'to the third dimension of scores'
)
if
change_coordinate_frame
and
clip_window
is
None
:
raise
ValueError
(
'if change_coordinate_frame is True, then a clip_window'
'must be specified.'
)
original_masks
=
masks
original_masks
=
masks
original_additional_fields
=
additional_fields
original_additional_fields
=
additional_fields
with
tf
.
name_scope
(
scope
,
'BatchMultiClassNonMaxSuppression'
):
with
tf
.
name_scope
(
scope
,
'BatchMultiClassNonMaxSuppression'
):
...
@@ -266,6 +282,16 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -266,6 +282,16 @@ def batch_multiclass_non_max_suppression(boxes,
masks_shape
=
tf
.
stack
([
batch_size
,
num_anchors
,
1
,
0
,
0
])
masks_shape
=
tf
.
stack
([
batch_size
,
num_anchors
,
1
,
0
,
0
])
masks
=
tf
.
zeros
(
masks_shape
)
masks
=
tf
.
zeros
(
masks_shape
)
if
clip_window
is
None
:
clip_window
=
tf
.
stack
([
tf
.
reduce_min
(
boxes
[:,
:,
:,
0
]),
tf
.
reduce_min
(
boxes
[:,
:,
:,
1
]),
tf
.
reduce_max
(
boxes
[:,
:,
:,
2
]),
tf
.
reduce_max
(
boxes
[:,
:,
:,
3
])
])
if
clip_window
.
shape
.
ndims
==
1
:
clip_window
=
tf
.
tile
(
tf
.
expand_dims
(
clip_window
,
0
),
[
batch_size
,
1
])
if
additional_fields
is
None
:
if
additional_fields
is
None
:
additional_fields
=
{}
additional_fields
=
{}
...
@@ -283,6 +309,9 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -283,6 +309,9 @@ def batch_multiclass_non_max_suppression(boxes,
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
tensor containing box masks. `q` can be either number of classes
tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
or 1 depending on whether a separate mask is predicted per class.
per_image_clip_window - A 1D float32 tensor of the form
[ymin, xmin, ymax, xmax] representing the window to clip the boxes
to.
per_image_additional_fields - (optional) A variable number of float32
per_image_additional_fields - (optional) A variable number of float32
tensors each with size [num_anchors, ...].
tensors each with size [num_anchors, ...].
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
...
@@ -311,9 +340,10 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -311,9 +340,10 @@ def batch_multiclass_non_max_suppression(boxes,
per_image_boxes
=
args
[
0
]
per_image_boxes
=
args
[
0
]
per_image_scores
=
args
[
1
]
per_image_scores
=
args
[
1
]
per_image_masks
=
args
[
2
]
per_image_masks
=
args
[
2
]
per_image_clip_window
=
args
[
3
]
per_image_additional_fields
=
{
per_image_additional_fields
=
{
key
:
value
key
:
value
for
key
,
value
in
zip
(
additional_fields
,
args
[
3
:
-
1
])
for
key
,
value
in
zip
(
additional_fields
,
args
[
4
:
-
1
])
}
}
per_image_num_valid_boxes
=
args
[
-
1
]
per_image_num_valid_boxes
=
args
[
-
1
]
per_image_boxes
=
tf
.
reshape
(
per_image_boxes
=
tf
.
reshape
(
...
@@ -345,7 +375,7 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -345,7 +375,7 @@ def batch_multiclass_non_max_suppression(boxes,
iou_thresh
,
iou_thresh
,
max_size_per_class
,
max_size_per_class
,
max_total_size
,
max_total_size
,
clip_window
=
clip_window
,
clip_window
=
per_image_
clip_window
,
change_coordinate_frame
=
change_coordinate_frame
,
change_coordinate_frame
=
change_coordinate_frame
,
masks
=
per_image_masks
,
masks
=
per_image_masks
,
additional_fields
=
per_image_additional_fields
)
additional_fields
=
per_image_additional_fields
)
...
@@ -367,10 +397,10 @@ def batch_multiclass_non_max_suppression(boxes,
...
@@ -367,10 +397,10 @@ def batch_multiclass_non_max_suppression(boxes,
num_additional_fields
=
len
(
additional_fields
)
num_additional_fields
=
len
(
additional_fields
)
num_nmsed_outputs
=
4
+
num_additional_fields
num_nmsed_outputs
=
4
+
num_additional_fields
batch_outputs
=
tf
.
map_fn
(
batch_outputs
=
shape_utils
.
static_or_dynamic_
map_fn
(
_single_image_nms_fn
,
_single_image_nms_fn
,
elems
=
([
boxes
,
scores
,
masks
]
+
list
(
additional_fields
.
values
())
+
elems
=
([
boxes
,
scores
,
masks
,
clip_window
]
+
[
num_valid_boxes
]),
list
(
additional_fields
.
values
())
+
[
num_valid_boxes
]),
dtype
=
(
num_nmsed_outputs
*
[
tf
.
float32
]
+
[
tf
.
int32
]),
dtype
=
(
num_nmsed_outputs
*
[
tf
.
float32
]
+
[
tf
.
int32
]),
parallel_iterations
=
parallel_iterations
)
parallel_iterations
=
parallel_iterations
)
...
...
research/object_detection/core/post_processing_test.py
View file @
fd7b6887
...
@@ -571,6 +571,125 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
...
@@ -571,6 +571,125 @@ class MulticlassNonMaxSuppressionTest(tf.test.TestCase):
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
self
.
assertAllClose
(
num_detections
,
[
2
,
3
])
self
.
assertAllClose
(
num_detections
,
[
2
,
3
])
def
test_batch_multiclass_nms_with_per_batch_clip_window
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
clip_window
=
tf
.
constant
([
0.
,
0.
,
200.
,
200.
])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
np
.
array
([[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]])
exp_nms_scores
=
np
.
array
([[.
95
,
.
9
,
0
,
0
],
[.
5
,
.
3
,
0
,
0
]])
exp_nms_classes
=
np
.
array
([[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
clip_window
=
clip_window
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
self
.
assertAllEqual
(
nmsed_classes
.
shape
.
as_list
(),
exp_nms_classes
.
shape
)
self
.
assertEqual
(
num_detections
.
shape
.
as_list
(),
[
2
])
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
num_detections
])
self
.
assertAllClose
(
nmsed_boxes
,
exp_nms_corners
)
self
.
assertAllClose
(
nmsed_scores
,
exp_nms_scores
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
self
.
assertAllClose
(
num_detections
,
[
2
,
2
])
def
test_batch_multiclass_nms_with_per_image_clip_window
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
clip_window
=
tf
.
constant
([[
0.
,
0.
,
5.
,
5.
],
[
0.
,
0.
,
200.
,
200.
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
np
.
array
([[[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]])
exp_nms_scores
=
np
.
array
([[.
9
,
0.
,
0.
,
0.
],
[.
5
,
.
3
,
0
,
0
]])
exp_nms_classes
=
np
.
array
([[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]])
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
nmsed_masks
,
nmsed_additional_fields
,
num_detections
)
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
clip_window
=
clip_window
)
self
.
assertIsNone
(
nmsed_masks
)
self
.
assertIsNone
(
nmsed_additional_fields
)
# Check static shapes
self
.
assertAllEqual
(
nmsed_boxes
.
shape
.
as_list
(),
exp_nms_corners
.
shape
)
self
.
assertAllEqual
(
nmsed_scores
.
shape
.
as_list
(),
exp_nms_scores
.
shape
)
self
.
assertAllEqual
(
nmsed_classes
.
shape
.
as_list
(),
exp_nms_classes
.
shape
)
self
.
assertEqual
(
num_detections
.
shape
.
as_list
(),
[
2
])
with
self
.
test_session
()
as
sess
:
(
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
num_detections
)
=
sess
.
run
([
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
num_detections
])
self
.
assertAllClose
(
nmsed_boxes
,
exp_nms_corners
)
self
.
assertAllClose
(
nmsed_scores
,
exp_nms_scores
)
self
.
assertAllClose
(
nmsed_classes
,
exp_nms_classes
)
self
.
assertAllClose
(
num_detections
,
[
1
,
2
])
def
test_batch_multiclass_nms_with_masks
(
self
):
def
test_batch_multiclass_nms_with_masks
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
...
...
research/object_detection/core/preprocessor.py
View file @
fd7b6887
...
@@ -35,6 +35,27 @@ in each row there is a box with [ymin xmin ymax xmax].
...
@@ -35,6 +35,27 @@ in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning
Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1]
their coordinate values range in [0, 1]
To preprocess multiple images with the same operations in cases where
nondeterministic operations are used, a preprocessor_cache.PreprocessorCache
object can be passed into the preprocess function or individual operations.
All nondeterministic operations except random_jitter_boxes support caching.
E.g.
Let tensor_dict{1,2,3,4,5} be copies of the same inputs.
Let preprocess_options contain nondeterministic operation(s) excluding
random_jitter_boxes.
cache1 = preprocessor_cache.PreprocessorCache()
cache2 = preprocessor_cache.PreprocessorCache()
a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1)
b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1)
c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2)
d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2)
e = preprocess(tensor_dict5, preprocess_options)
Then correspondings tensors of object pairs (a,b) and (c,d)
are guaranteed to be equal element-wise, but the equality of any other object
pair cannot be determined.
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the
functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then
preprocess function we squeeze the image to become a rank 3 tensor and then
...
@@ -42,6 +63,8 @@ we pass it to the functions. At the end of the preprocess we expand the image
...
@@ -42,6 +63,8 @@ we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4.
back to rank 4.
"""
"""
import
functools
import
inspect
import
sys
import
sys
import
tensorflow
as
tf
import
tensorflow
as
tf
...
@@ -50,44 +73,79 @@ from tensorflow.python.ops import control_flow_ops
...
@@ -50,44 +73,79 @@ from tensorflow.python.ops import control_flow_ops
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
keypoint_ops
from
object_detection.core
import
keypoint_ops
from
object_detection.core
import
preprocessor_cache
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.utils
import
shape_utils
def
_apply_with_random_selector
(
x
,
func
,
num_cases
):
def
_apply_with_random_selector
(
x
,
func
,
num_cases
,
preprocess_vars_cache
=
None
,
key
=
''
):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
If both preprocess_vars_cache AND key are the same between two calls, sel will
be the same value in both calls.
Args:
Args:
x: input Tensor.
x: input Tensor.
func: Python function to apply.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
num_cases: Python int32, number of cases to sample sel from.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: variable identifier for preprocess_vars_cache.
Returns:
Returns:
The result of func(x, sel), where func receives the value of the
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
selector as a python integer, but sel is sampled dynamically.
"""
"""
rand_sel
=
tf
.
random_uniform
([],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
rand_sel
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
SELECTOR
,
preprocess_vars_cache
,
key
)
# Pass the real x only to one of the func calls.
# Pass the real x only to one of the func calls.
return
control_flow_ops
.
merge
([
func
(
return
control_flow_ops
.
merge
([
func
(
control_flow_ops
.
switch
(
x
,
tf
.
equal
(
rand_sel
,
case
))[
1
],
case
)
control_flow_ops
.
switch
(
x
,
tf
.
equal
(
rand_sel
,
case
))[
1
],
case
)
for
case
in
range
(
num_cases
)])[
0
]
for
case
in
range
(
num_cases
)])[
0
]
def
_apply_with_random_selector_tuples
(
x
,
func
,
num_cases
):
def
_apply_with_random_selector_tuples
(
x
,
func
,
num_cases
,
preprocess_vars_cache
=
None
,
key
=
''
):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
If both preprocess_vars_cache AND key are the same between two calls, sel will
be the same value in both calls.
Args:
Args:
x: A tuple of input tensors.
x: A tuple of input tensors.
func: Python function to apply.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
num_cases: Python int32, number of cases to sample sel from.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: variable identifier for preprocess_vars_cache.
Returns:
Returns:
The result of func(x, sel), where func receives the value of the
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
selector as a python integer, but sel is sampled dynamically.
"""
"""
num_inputs
=
len
(
x
)
num_inputs
=
len
(
x
)
rand_sel
=
tf
.
random_uniform
([],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
generator_func
=
functools
.
partial
(
# Pass the real x only to one of the func calls.
tf
.
random_uniform
,
[],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
rand_sel
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
SELECTOR_TUPLES
,
preprocess_vars_cache
,
key
)
# Pass the real x only to one of the func calls.
tuples
=
[
list
()
for
t
in
x
]
tuples
=
[
list
()
for
t
in
x
]
for
case
in
range
(
num_cases
):
for
case
in
range
(
num_cases
):
new_x
=
[
control_flow_ops
.
switch
(
t
,
tf
.
equal
(
rand_sel
,
case
))[
1
]
for
t
in
x
]
new_x
=
[
control_flow_ops
.
switch
(
t
,
tf
.
equal
(
rand_sel
,
case
))[
1
]
for
t
in
x
]
...
@@ -100,6 +158,37 @@ def _apply_with_random_selector_tuples(x, func, num_cases):
...
@@ -100,6 +158,37 @@ def _apply_with_random_selector_tuples(x, func, num_cases):
return
tuple
(
tuples
)
return
tuple
(
tuples
)
def
_get_or_create_preprocess_rand_vars
(
generator_func
,
function_id
,
preprocess_vars_cache
,
key
=
''
):
"""Returns a tensor stored in preprocess_vars_cache or using generator_func.
If the tensor was previously generated and appears in the PreprocessorCache,
the previously generated tensor will be returned. Otherwise, a new tensor
is generated using generator_func and stored in the cache.
Args:
generator_func: A 0-argument function that generates a tensor.
function_id: identifier for the preprocessing function used.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
key: identifier for the variable stored.
Returns:
The generated tensor.
"""
if
preprocess_vars_cache
is
not
None
:
var
=
preprocess_vars_cache
.
get
(
function_id
,
key
)
if
var
is
None
:
var
=
generator_func
()
preprocess_vars_cache
.
update
(
function_id
,
key
,
var
)
else
:
var
=
generator_func
()
return
var
def
_random_integer
(
minval
,
maxval
,
seed
):
def
_random_integer
(
minval
,
maxval
,
seed
):
"""Returns a random 0-D tensor between minval and maxval.
"""Returns a random 0-D tensor between minval and maxval.
...
@@ -115,6 +204,40 @@ def _random_integer(minval, maxval, seed):
...
@@ -115,6 +204,40 @@ def _random_integer(minval, maxval, seed):
[],
minval
=
minval
,
maxval
=
maxval
,
dtype
=
tf
.
int32
,
seed
=
seed
)
[],
minval
=
minval
,
maxval
=
maxval
,
dtype
=
tf
.
int32
,
seed
=
seed
)
# TODO: This method is needed because the current
# tf.image.rgb_to_grayscale method does not support quantization. Replace with
# tf.image.rgb_to_grayscale after quantization support is added.
def
_rgb_to_grayscale
(
images
,
name
=
None
):
"""Converts one or more images from RGB to Grayscale.
Outputs a tensor of the same `DType` and rank as `images`. The size of the
last dimension of the output is 1, containing the Grayscale value of the
pixels.
Args:
images: The RGB tensor to convert. Last dimension must have size 3 and
should contain RGB values.
name: A name for the operation (optional).
Returns:
The converted grayscale image(s).
"""
with
tf
.
name_scope
(
name
,
'rgb_to_grayscale'
,
[
images
])
as
name
:
images
=
tf
.
convert_to_tensor
(
images
,
name
=
'images'
)
# Remember original dtype to so we can convert back if needed
orig_dtype
=
images
.
dtype
flt_image
=
tf
.
image
.
convert_image_dtype
(
images
,
tf
.
float32
)
# Reference for converting between RGB and grayscale.
# https://en.wikipedia.org/wiki/Luma_%28video%29
rgb_weights
=
[
0.2989
,
0.5870
,
0.1140
]
rank_1
=
tf
.
expand_dims
(
tf
.
rank
(
images
)
-
1
,
0
)
gray_float
=
tf
.
reduce_sum
(
flt_image
*
rgb_weights
,
rank_1
,
keepdims
=
True
)
gray_float
.
set_shape
(
images
.
get_shape
()[:
-
1
].
concatenate
([
1
]))
return
tf
.
image
.
convert_image_dtype
(
gray_float
,
orig_dtype
,
name
=
name
)
def
normalize_image
(
image
,
original_minval
,
original_maxval
,
target_minval
,
def
normalize_image
(
image
,
original_minval
,
original_maxval
,
target_minval
,
target_maxval
):
target_maxval
):
"""Normalizes pixel values in the image.
"""Normalizes pixel values in the image.
...
@@ -312,7 +435,8 @@ def random_horizontal_flip(image,
...
@@ -312,7 +435,8 @@ def random_horizontal_flip(image,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
keypoint_flip_permutation
=
None
,
keypoint_flip_permutation
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly flips the image and detections horizontally.
"""Randomly flips the image and detections horizontally.
The probability of flipping the image is 50%.
The probability of flipping the image is 50%.
...
@@ -333,6 +457,10 @@ def random_horizontal_flip(image,
...
@@ -333,6 +457,10 @@ def random_horizontal_flip(image,
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
permutation.
seed: random seed
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
...
@@ -364,7 +492,12 @@ def random_horizontal_flip(image,
...
@@ -364,7 +492,12 @@ def random_horizontal_flip(image,
with
tf
.
name_scope
(
'RandomHorizontalFlip'
,
values
=
[
image
,
boxes
]):
with
tf
.
name_scope
(
'RandomHorizontalFlip'
,
values
=
[
image
,
boxes
]):
result
=
[]
result
=
[]
# random variable defining whether to do flip or not
# random variable defining whether to do flip or not
do_a_flip_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_a_flip_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
HORIZONTAL_FLIP
,
preprocess_vars_cache
)
do_a_flip_random
=
tf
.
greater
(
do_a_flip_random
,
0.5
)
# flip image
# flip image
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
...
@@ -399,7 +532,8 @@ def random_vertical_flip(image,
...
@@ -399,7 +532,8 @@ def random_vertical_flip(image,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
keypoint_flip_permutation
=
None
,
keypoint_flip_permutation
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly flips the image and detections vertically.
"""Randomly flips the image and detections vertically.
The probability of flipping the image is 50%.
The probability of flipping the image is 50%.
...
@@ -420,6 +554,10 @@ def random_vertical_flip(image,
...
@@ -420,6 +554,10 @@ def random_vertical_flip(image,
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
permutation.
seed: random seed
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
...
@@ -451,7 +589,11 @@ def random_vertical_flip(image,
...
@@ -451,7 +589,11 @@ def random_vertical_flip(image,
with
tf
.
name_scope
(
'RandomVerticalFlip'
,
values
=
[
image
,
boxes
]):
with
tf
.
name_scope
(
'RandomVerticalFlip'
,
values
=
[
image
,
boxes
]):
result
=
[]
result
=
[]
# random variable defining whether to do flip or not
# random variable defining whether to do flip or not
do_a_flip_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_a_flip_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
VERTICAL_FLIP
,
preprocess_vars_cache
)
do_a_flip_random
=
tf
.
greater
(
do_a_flip_random
,
0.5
)
# flip image
# flip image
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
...
@@ -485,7 +627,8 @@ def random_rotation90(image,
...
@@ -485,7 +627,8 @@ def random_rotation90(image,
boxes
=
None
,
boxes
=
None
,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly rotates the image and detections 90 degrees counter-clockwise.
"""Randomly rotates the image and detections 90 degrees counter-clockwise.
The probability of rotating the image is 50%. This can be combined with
The probability of rotating the image is 50%. This can be combined with
...
@@ -507,6 +650,10 @@ def random_rotation90(image,
...
@@ -507,6 +650,10 @@ def random_rotation90(image,
[num_instances, num_keypoints, 2]. The keypoints are in y-x
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
normalized coordinates.
seed: random seed
seed: random seed
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
...
@@ -532,7 +679,11 @@ def random_rotation90(image,
...
@@ -532,7 +679,11 @@ def random_rotation90(image,
result
=
[]
result
=
[]
# random variable defining whether to rotate by 90 degrees or not
# random variable defining whether to rotate by 90 degrees or not
do_a_rot90_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_a_rot90_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ROTATION90
,
preprocess_vars_cache
)
do_a_rot90_random
=
tf
.
greater
(
do_a_rot90_random
,
0.5
)
# flip image
# flip image
image
=
tf
.
cond
(
do_a_rot90_random
,
lambda
:
_rot90_image
(
image
),
image
=
tf
.
cond
(
do_a_rot90_random
,
lambda
:
_rot90_image
(
image
),
...
@@ -562,7 +713,11 @@ def random_rotation90(image,
...
@@ -562,7 +713,11 @@ def random_rotation90(image,
return
tuple
(
result
)
return
tuple
(
result
)
def
random_pixel_value_scale
(
image
,
minval
=
0.9
,
maxval
=
1.1
,
seed
=
None
):
def
random_pixel_value_scale
(
image
,
minval
=
0.9
,
maxval
=
1.1
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Scales each value in the pixels of the image.
"""Scales each value in the pixels of the image.
This function scales each pixel independent of the other ones.
This function scales each pixel independent of the other ones.
...
@@ -575,17 +730,24 @@ def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
...
@@ -575,17 +730,24 @@ def random_pixel_value_scale(image, minval=0.9, maxval=1.1, seed=None):
minval: lower ratio of scaling pixel values.
minval: lower ratio of scaling pixel values.
maxval: upper ratio of scaling pixel values.
maxval: upper ratio of scaling pixel values.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
"""
"""
with
tf
.
name_scope
(
'RandomPixelValueScale'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomPixelValueScale'
,
values
=
[
image
]):
color_coef
=
tf
.
random_uniform
(
generator_func
=
functools
.
partial
(
tf
.
shape
(
image
),
tf
.
random_uniform
,
tf
.
shape
(
image
),
minval
=
minval
,
minval
=
minval
,
maxval
=
maxval
,
maxval
=
maxval
,
dtype
=
tf
.
float32
,
seed
=
seed
)
dtype
=
tf
.
float32
,
color_coef
=
_get_or_create_preprocess_rand_vars
(
seed
=
seed
)
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
PIXEL_VALUE_SCALE
,
preprocess_vars_cache
)
image
=
tf
.
multiply
(
image
,
color_coef
)
image
=
tf
.
multiply
(
image
,
color_coef
)
image
=
tf
.
clip_by_value
(
image
,
0.0
,
1.0
)
image
=
tf
.
clip_by_value
(
image
,
0.0
,
1.0
)
...
@@ -596,7 +758,8 @@ def random_image_scale(image,
...
@@ -596,7 +758,8 @@ def random_image_scale(image,
masks
=
None
,
masks
=
None
,
min_scale_ratio
=
0.5
,
min_scale_ratio
=
0.5
,
max_scale_ratio
=
2.0
,
max_scale_ratio
=
2.0
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Scales the image size.
"""Scales the image size.
Args:
Args:
...
@@ -607,6 +770,10 @@ def random_image_scale(image,
...
@@ -607,6 +770,10 @@ def random_image_scale(image,
min_scale_ratio: minimum scaling ratio.
min_scale_ratio: minimum scaling ratio.
max_scale_ratio: maximum scaling ratio.
max_scale_ratio: maximum scaling ratio.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same rank as input image.
image: image which is the same rank as input image.
...
@@ -618,10 +785,14 @@ def random_image_scale(image,
...
@@ -618,10 +785,14 @@ def random_image_scale(image,
image_shape
=
tf
.
shape
(
image
)
image_shape
=
tf
.
shape
(
image
)
image_height
=
image_shape
[
0
]
image_height
=
image_shape
[
0
]
image_width
=
image_shape
[
1
]
image_width
=
image_shape
[
1
]
size_coef
=
tf
.
random_uniform
([],
generator_func
=
functools
.
partial
(
minval
=
min_scale_ratio
,
tf
.
random_uniform
,
[],
maxval
=
max_scale_ratio
,
minval
=
min_scale_ratio
,
maxval
=
max_scale_ratio
,
dtype
=
tf
.
float32
,
seed
=
seed
)
dtype
=
tf
.
float32
,
seed
=
seed
)
size_coef
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
IMAGE_SCALE
,
preprocess_vars_cache
)
image_newysize
=
tf
.
to_int32
(
image_newysize
=
tf
.
to_int32
(
tf
.
multiply
(
tf
.
to_float
(
image_height
),
size_coef
))
tf
.
multiply
(
tf
.
to_float
(
image_height
),
size_coef
))
image_newxsize
=
tf
.
to_int32
(
image_newxsize
=
tf
.
to_int32
(
...
@@ -636,7 +807,10 @@ def random_image_scale(image,
...
@@ -636,7 +807,10 @@ def random_image_scale(image,
return
tuple
(
result
)
return
tuple
(
result
)
def
random_rgb_to_gray
(
image
,
probability
=
0.1
,
seed
=
None
):
def
random_rgb_to_gray
(
image
,
probability
=
0.1
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Changes the image from RGB to Grayscale with the given probability.
"""Changes the image from RGB to Grayscale with the given probability.
Args:
Args:
...
@@ -645,18 +819,25 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
...
@@ -645,18 +819,25 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
probability: the probability of returning a grayscale image.
probability: the probability of returning a grayscale image.
The probability should be a number between [0, 1].
The probability should be a number between [0, 1].
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
"""
"""
def
_image_to_gray
(
image
):
def
_image_to_gray
(
image
):
image_gray1
=
tf
.
image
.
rgb_to_grayscale
(
image
)
image_gray1
=
_
rgb_to_grayscale
(
image
)
image_gray3
=
tf
.
image
.
grayscale_to_rgb
(
image_gray1
)
image_gray3
=
tf
.
image
.
grayscale_to_rgb
(
image_gray1
)
return
image_gray3
return
image_gray3
with
tf
.
name_scope
(
'RandomRGBtoGray'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomRGBtoGray'
,
values
=
[
image
]):
# random variable defining whether to do flip or not
# random variable defining whether to change to grayscale or not
do_gray_random
=
tf
.
random_uniform
([],
seed
=
seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_gray_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
RGB_TO_GRAY
,
preprocess_vars_cache
)
image
=
tf
.
cond
(
image
=
tf
.
cond
(
tf
.
greater
(
do_gray_random
,
probability
),
lambda
:
image
,
tf
.
greater
(
do_gray_random
,
probability
),
lambda
:
image
,
...
@@ -665,7 +846,10 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
...
@@ -665,7 +846,10 @@ def random_rgb_to_gray(image, probability=0.1, seed=None):
return
image
return
image
def
random_adjust_brightness
(
image
,
max_delta
=
0.2
):
def
random_adjust_brightness
(
image
,
max_delta
=
0.2
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adjusts brightness.
"""Randomly adjusts brightness.
Makes sure the output image is still between 0 and 1.
Makes sure the output image is still between 0 and 1.
...
@@ -674,18 +858,34 @@ def random_adjust_brightness(image, max_delta=0.2):
...
@@ -674,18 +858,34 @@ def random_adjust_brightness(image, max_delta=0.2):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
with pixel values varying between [0, 1].
max_delta: how much to change the brightness. A value between [0, 1).
max_delta: how much to change the brightness. A value between [0, 1).
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
boxes: boxes which is the same shape as input boxes.
boxes: boxes which is the same shape as input boxes.
"""
"""
with
tf
.
name_scope
(
'RandomAdjustBrightness'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomAdjustBrightness'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
-
max_delta
,
max_delta
,
seed
=
seed
)
delta
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADJUST_BRIGHTNESS
,
preprocess_vars_cache
)
image
=
tf
.
image
.
adjust_brightness
(
image
,
delta
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
return
image
def
random_adjust_contrast
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
):
def
random_adjust_contrast
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adjusts contrast.
"""Randomly adjusts contrast.
Makes sure the output image is still between 0 and 1.
Makes sure the output image is still between 0 and 1.
...
@@ -697,17 +897,31 @@ def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
...
@@ -697,17 +897,31 @@ def random_adjust_contrast(image, min_delta=0.8, max_delta=1.25):
max_delta: how much to change the contrast. Contrast will change with a
max_delta: how much to change the contrast. Contrast will change with a
value between min_delta and max_delta. This value will be
value between min_delta and max_delta. This value will be
multiplied to the current contrast of the image.
multiplied to the current contrast of the image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
"""
"""
with
tf
.
name_scope
(
'RandomAdjustContrast'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomAdjustContrast'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_contrast
(
image
,
min_delta
,
max_delta
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
min_delta
,
max_delta
,
seed
=
seed
)
contrast_factor
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADJUST_CONTRAST
,
preprocess_vars_cache
)
image
=
tf
.
image
.
adjust_contrast
(
image
,
contrast_factor
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
return
image
def
random_adjust_hue
(
image
,
max_delta
=
0.02
):
def
random_adjust_hue
(
image
,
max_delta
=
0.02
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adjusts hue.
"""Randomly adjusts hue.
Makes sure the output image is still between 0 and 1.
Makes sure the output image is still between 0 and 1.
...
@@ -716,17 +930,31 @@ def random_adjust_hue(image, max_delta=0.02):
...
@@ -716,17 +930,31 @@ def random_adjust_hue(image, max_delta=0.02):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
with pixel values varying between [0, 1].
max_delta: change hue randomly with a value between 0 and max_delta.
max_delta: change hue randomly with a value between 0 and max_delta.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
"""
"""
with
tf
.
name_scope
(
'RandomAdjustHue'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomAdjustHue'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
-
max_delta
,
max_delta
,
seed
=
seed
)
delta
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADJUST_HUE
,
preprocess_vars_cache
)
image
=
tf
.
image
.
adjust_hue
(
image
,
delta
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
return
image
def
random_adjust_saturation
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
):
def
random_adjust_saturation
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
,
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adjusts saturation.
"""Randomly adjusts saturation.
Makes sure the output image is still between 0 and 1.
Makes sure the output image is still between 0 and 1.
...
@@ -738,17 +966,28 @@ def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
...
@@ -738,17 +966,28 @@ def random_adjust_saturation(image, min_delta=0.8, max_delta=1.25):
max_delta: how much to change the saturation. Saturation will change with a
max_delta: how much to change the saturation. Saturation will change with a
value between min_delta and max_delta. This value will be
value between min_delta and max_delta. This value will be
multiplied to the current saturation of the image.
multiplied to the current saturation of the image.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
"""
"""
with
tf
.
name_scope
(
'RandomAdjustSaturation'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomAdjustSaturation'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_saturation
(
image
,
min_delta
,
max_delta
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
min_delta
,
max_delta
,
seed
=
seed
)
saturation_factor
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADJUST_SATURATION
,
preprocess_vars_cache
)
image
=
tf
.
image
.
adjust_saturation
(
image
,
saturation_factor
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
return
image
def
random_distort_color
(
image
,
color_ordering
=
0
):
def
random_distort_color
(
image
,
color_ordering
=
0
,
preprocess_vars_cache
=
None
):
"""Randomly distorts color.
"""Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast
Randomly distorts color using a combination of brightness, hue, contrast
...
@@ -758,6 +997,10 @@ def random_distort_color(image, color_ordering=0):
...
@@ -758,6 +997,10 @@ def random_distort_color(image, color_ordering=0):
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
with pixel values varying between [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0, 1).
color_ordering: Python int, a type of distortion (valid values: 0, 1).
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
...
@@ -767,20 +1010,34 @@ def random_distort_color(image, color_ordering=0):
...
@@ -767,20 +1010,34 @@ def random_distort_color(image, color_ordering=0):
"""
"""
with
tf
.
name_scope
(
'RandomDistortColor'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomDistortColor'
,
values
=
[
image
]):
if
color_ordering
==
0
:
if
color_ordering
==
0
:
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
=
32.
/
255.
)
image
=
random_adjust_brightness
(
image
=
tf
.
image
.
random_saturation
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
,
max_delta
=
32.
/
255.
,
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
=
0.2
)
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
tf
.
image
.
random_contrast
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
=
random_adjust_saturation
(
image
,
min_delta
=
0.5
,
max_delta
=
1.5
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_hue
(
image
,
max_delta
=
0.2
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_contrast
(
image
,
min_delta
=
0.5
,
max_delta
=
1.5
,
preprocess_vars_cache
=
preprocess_vars_cache
)
elif
color_ordering
==
1
:
elif
color_ordering
==
1
:
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
=
32.
/
255.
)
image
=
random_adjust_brightness
(
image
=
tf
.
image
.
random_contrast
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
,
max_delta
=
32.
/
255.
,
image
=
tf
.
image
.
random_saturation
(
image
,
lower
=
0.5
,
upper
=
1.5
)
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
=
0.2
)
image
=
random_adjust_contrast
(
image
,
min_delta
=
0.5
,
max_delta
=
1.5
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_saturation
(
image
,
min_delta
=
0.5
,
max_delta
=
1.5
,
preprocess_vars_cache
=
preprocess_vars_cache
)
image
=
random_adjust_hue
(
image
,
max_delta
=
0.2
,
preprocess_vars_cache
=
preprocess_vars_cache
)
else
:
else
:
raise
ValueError
(
'color_ordering must be in {0, 1}'
)
raise
ValueError
(
'color_ordering must be in {0, 1}'
)
# The random_* ops do not necessarily clamp.
image
=
tf
.
clip_by_value
(
image
,
0.0
,
1.0
)
return
image
return
image
...
@@ -845,7 +1102,8 @@ def _strict_random_crop_image(image,
...
@@ -845,7 +1102,8 @@ def _strict_random_crop_image(image,
min_object_covered
=
1.0
,
min_object_covered
=
1.0
,
aspect_ratio_range
=
(
0.75
,
1.33
),
aspect_ratio_range
=
(
0.75
,
1.33
),
area_range
=
(
0.1
,
1.0
),
area_range
=
(
0.1
,
1.0
),
overlap_thresh
=
0.3
):
overlap_thresh
=
0.3
,
preprocess_vars_cache
=
None
):
"""Performs random crop.
"""Performs random crop.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
Note: boxes will be clipped to the crop. Keypoint coordinates that are
...
@@ -878,6 +1136,10 @@ def _strict_random_crop_image(image,
...
@@ -878,6 +1136,10 @@ def _strict_random_crop_image(image,
original image.
original image.
overlap_thresh: minimum overlap thresh with new cropped
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
image to keep the box.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same rank as input image.
image: image which is the same rank as input image.
...
@@ -900,7 +1162,8 @@ def _strict_random_crop_image(image,
...
@@ -900,7 +1162,8 @@ def _strict_random_crop_image(image,
tf
.
clip_by_value
(
tf
.
clip_by_value
(
boxes
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
),
1
)
boxes
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
),
1
)
sample_distorted_bounding_box
=
tf
.
image
.
sample_distorted_bounding_box
(
generator_func
=
functools
.
partial
(
tf
.
image
.
sample_distorted_bounding_box
,
image_shape
,
image_shape
,
bounding_boxes
=
boxes_expanded
,
bounding_boxes
=
boxes_expanded
,
min_object_covered
=
min_object_covered
,
min_object_covered
=
min_object_covered
,
...
@@ -909,6 +1172,13 @@ def _strict_random_crop_image(image,
...
@@ -909,6 +1172,13 @@ def _strict_random_crop_image(image,
max_attempts
=
100
,
max_attempts
=
100
,
use_image_if_no_bounding_boxes
=
True
)
use_image_if_no_bounding_boxes
=
True
)
# for ssd cropping, each value of min_object_covered has its own
# cached random variable
sample_distorted_bounding_box
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
STRICT_CROP_IMAGE
,
preprocess_vars_cache
,
key
=
min_object_covered
)
im_box_begin
,
im_box_size
,
im_box
=
sample_distorted_bounding_box
im_box_begin
,
im_box_size
,
im_box
=
sample_distorted_bounding_box
new_image
=
tf
.
slice
(
image
,
im_box_begin
,
im_box_size
)
new_image
=
tf
.
slice
(
image
,
im_box_begin
,
im_box_size
)
...
@@ -984,7 +1254,8 @@ def random_crop_image(image,
...
@@ -984,7 +1254,8 @@ def random_crop_image(image,
area_range
=
(
0.1
,
1.0
),
area_range
=
(
0.1
,
1.0
),
overlap_thresh
=
0.3
,
overlap_thresh
=
0.3
,
random_coef
=
0.0
,
random_coef
=
0.0
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly crops the image.
"""Randomly crops the image.
Given the input image and its bounding boxes, this op randomly
Given the input image and its bounding boxes, this op randomly
...
@@ -1029,6 +1300,10 @@ def random_crop_image(image,
...
@@ -1029,6 +1300,10 @@ def random_crop_image(image,
cropped image, and if it is 1.0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
original image.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: Image shape will be [new_height, new_width, channels].
image: Image shape will be [new_height, new_width, channels].
...
@@ -1056,13 +1331,17 @@ def random_crop_image(image,
...
@@ -1056,13 +1331,17 @@ def random_crop_image(image,
min_object_covered
=
min_object_covered
,
min_object_covered
=
min_object_covered
,
aspect_ratio_range
=
aspect_ratio_range
,
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
area_range
=
area_range
,
overlap_thresh
=
overlap_thresh
)
overlap_thresh
=
overlap_thresh
,
preprocess_vars_cache
=
preprocess_vars_cache
)
# avoids tf.cond to make faster RCNN training on borg. See b/140057645.
# avoids tf.cond to make faster RCNN training on borg. See b/140057645.
if
random_coef
<
sys
.
float_info
.
min
:
if
random_coef
<
sys
.
float_info
.
min
:
result
=
strict_random_crop_image_fn
()
result
=
strict_random_crop_image_fn
()
else
:
else
:
do_a_crop_random
=
tf
.
random_uniform
([],
seed
=
seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
seed
=
seed
)
do_a_crop_random
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
CROP_IMAGE
,
preprocess_vars_cache
)
do_a_crop_random
=
tf
.
greater
(
do_a_crop_random
,
random_coef
)
do_a_crop_random
=
tf
.
greater
(
do_a_crop_random
,
random_coef
)
outputs
=
[
image
,
boxes
,
labels
]
outputs
=
[
image
,
boxes
,
labels
]
...
@@ -1084,7 +1363,8 @@ def random_pad_image(image,
...
@@ -1084,7 +1363,8 @@ def random_pad_image(image,
min_image_size
=
None
,
min_image_size
=
None
,
max_image_size
=
None
,
max_image_size
=
None
,
pad_color
=
None
,
pad_color
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly pads the image.
"""Randomly pads the image.
This function randomly pads the image with zeros. The final size of the
This function randomly pads the image with zeros. The final size of the
...
@@ -1110,8 +1390,11 @@ def random_pad_image(image,
...
@@ -1110,8 +1390,11 @@ def random_pad_image(image,
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the input
if set as None, it will be set to average color of the input
image.
image.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: Image shape will be [new_height, new_width, channels].
image: Image shape will be [new_height, new_width, channels].
...
@@ -1155,6 +1438,12 @@ def random_pad_image(image,
...
@@ -1155,6 +1438,12 @@ def random_pad_image(image,
lambda
:
_random_integer
(
0
,
target_width
-
image_width
,
seed
),
lambda
:
_random_integer
(
0
,
target_width
-
image_width
,
seed
),
lambda
:
tf
.
constant
(
0
,
dtype
=
tf
.
int32
))
lambda
:
tf
.
constant
(
0
,
dtype
=
tf
.
int32
))
gen_func
=
lambda
:
(
target_height
,
target_width
,
offset_height
,
offset_width
)
params
=
_get_or_create_preprocess_rand_vars
(
gen_func
,
preprocessor_cache
.
PreprocessorCache
.
PAD_IMAGE
,
preprocess_vars_cache
)
target_height
,
target_width
,
offset_height
,
offset_width
=
params
new_image
=
tf
.
image
.
pad_to_bounding_box
(
new_image
=
tf
.
image
.
pad_to_bounding_box
(
image
,
image
,
offset_height
=
offset_height
,
offset_height
=
offset_height
,
...
@@ -1200,7 +1489,8 @@ def random_crop_pad_image(image,
...
@@ -1200,7 +1489,8 @@ def random_crop_pad_image(image,
min_padded_size_ratio
=
(
1.0
,
1.0
),
min_padded_size_ratio
=
(
1.0
,
1.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
pad_color
=
None
,
pad_color
=
None
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly crops and pads the image.
"""Randomly crops and pads the image.
Given an input image and its bounding boxes, this op first randomly crops
Given an input image and its bounding boxes, this op first randomly crops
...
@@ -1241,6 +1531,10 @@ def random_crop_pad_image(image,
...
@@ -1241,6 +1531,10 @@ def random_crop_pad_image(image,
if set as None, it will be set to average color of the randomly
if set as None, it will be set to average color of the randomly
cropped image.
cropped image.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
padded_image: padded image.
padded_image: padded image.
...
@@ -1263,7 +1557,8 @@ def random_crop_pad_image(image,
...
@@ -1263,7 +1557,8 @@ def random_crop_pad_image(image,
area_range
=
area_range
,
area_range
=
area_range
,
overlap_thresh
=
overlap_thresh
,
overlap_thresh
=
overlap_thresh
,
random_coef
=
random_coef
,
random_coef
=
random_coef
,
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
cropped_image
,
cropped_boxes
,
cropped_labels
=
result
[:
3
]
cropped_image
,
cropped_boxes
,
cropped_labels
=
result
[:
3
]
...
@@ -1280,7 +1575,8 @@ def random_crop_pad_image(image,
...
@@ -1280,7 +1575,8 @@ def random_crop_pad_image(image,
min_image_size
=
min_image_size
,
min_image_size
=
min_image_size
,
max_image_size
=
max_image_size
,
max_image_size
=
max_image_size
,
pad_color
=
pad_color
,
pad_color
=
pad_color
,
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
cropped_padded_output
=
(
padded_image
,
padded_boxes
,
cropped_labels
)
cropped_padded_output
=
(
padded_image
,
padded_boxes
,
cropped_labels
)
...
@@ -1299,7 +1595,8 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1299,7 +1595,8 @@ def random_crop_to_aspect_ratio(image,
keypoints
=
None
,
keypoints
=
None
,
aspect_ratio
=
1.0
,
aspect_ratio
=
1.0
,
overlap_thresh
=
0.3
,
overlap_thresh
=
0.3
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly crops an image to the specified aspect ratio.
"""Randomly crops an image to the specified aspect ratio.
Randomly crops the a portion of the image such that the crop is of the
Randomly crops the a portion of the image such that the crop is of the
...
@@ -1331,6 +1628,10 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1331,6 +1628,10 @@ def random_crop_to_aspect_ratio(image,
overlap_thresh: minimum overlap thresh with new cropped
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
image to keep the box.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same rank as input image.
image: image which is the same rank as input image.
...
@@ -1374,6 +1675,13 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1374,6 +1675,13 @@ def random_crop_to_aspect_ratio(image,
# offset_height is randomly chosen from [0, offset_height - target_height)
# offset_height is randomly chosen from [0, offset_height - target_height)
offset_height
=
_random_integer
(
0
,
orig_height
-
target_height
+
1
,
seed
)
offset_height
=
_random_integer
(
0
,
orig_height
-
target_height
+
1
,
seed
)
offset_width
=
_random_integer
(
0
,
orig_width
-
target_width
+
1
,
seed
)
offset_width
=
_random_integer
(
0
,
orig_width
-
target_width
+
1
,
seed
)
generator_func
=
lambda
:
(
offset_height
,
offset_width
)
offset_height
,
offset_width
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
CROP_TO_ASPECT_RATIO
,
preprocess_vars_cache
)
new_image
=
tf
.
image
.
crop_to_bounding_box
(
new_image
=
tf
.
image
.
crop_to_bounding_box
(
image
,
offset_height
,
offset_width
,
target_height
,
target_width
)
image
,
offset_height
,
offset_width
,
target_height
,
target_width
)
...
@@ -1436,7 +1744,8 @@ def random_pad_to_aspect_ratio(image,
...
@@ -1436,7 +1744,8 @@ def random_pad_to_aspect_ratio(image,
aspect_ratio
=
1.0
,
aspect_ratio
=
1.0
,
min_padded_size_ratio
=
(
1.0
,
1.0
),
min_padded_size_ratio
=
(
1.0
,
1.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly zero pads an image to the specified aspect ratio.
"""Randomly zero pads an image to the specified aspect ratio.
Pads the image so that the resulting image will have the specified aspect
Pads the image so that the resulting image will have the specified aspect
...
@@ -1464,6 +1773,10 @@ def random_pad_to_aspect_ratio(image,
...
@@ -1464,6 +1773,10 @@ def random_pad_to_aspect_ratio(image,
max_padded_size_ratio: max ratio of padded image height and width to the
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width.
input image's height and width.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same rank as input image.
image: image which is the same rank as input image.
...
@@ -1510,7 +1823,13 @@ def random_pad_to_aspect_ratio(image,
...
@@ -1510,7 +1823,13 @@ def random_pad_to_aspect_ratio(image,
min_scale
=
tf
.
maximum
(
min_height
/
target_height
,
min_width
/
target_width
)
min_scale
=
tf
.
maximum
(
min_height
/
target_height
,
min_width
/
target_width
)
max_scale
=
tf
.
minimum
(
max_height
/
target_height
,
max_width
/
target_width
)
max_scale
=
tf
.
minimum
(
max_height
/
target_height
,
max_width
/
target_width
)
scale
=
tf
.
random_uniform
([],
min_scale
,
max_scale
,
seed
=
seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
min_scale
,
max_scale
,
seed
=
seed
)
scale
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
PAD_TO_ASPECT_RATIO
,
preprocess_vars_cache
)
target_height
=
scale
*
target_height
target_height
=
scale
*
target_height
target_width
=
scale
*
target_width
target_width
=
scale
*
target_width
...
@@ -1549,7 +1868,8 @@ def random_black_patches(image,
...
@@ -1549,7 +1868,8 @@ def random_black_patches(image,
max_black_patches
=
10
,
max_black_patches
=
10
,
probability
=
0.5
,
probability
=
0.5
,
size_to_image_ratio
=
0.1
,
size_to_image_ratio
=
0.1
,
random_seed
=
None
):
random_seed
=
None
,
preprocess_vars_cache
=
None
):
"""Randomly adds some black patches to the image.
"""Randomly adds some black patches to the image.
This op adds up to max_black_patches square black patches of a fixed size
This op adds up to max_black_patches square black patches of a fixed size
...
@@ -1566,15 +1886,20 @@ def random_black_patches(image,
...
@@ -1566,15 +1886,20 @@ def random_black_patches(image,
box_size = size_to_image_ratio *
box_size = size_to_image_ratio *
min(image_width, image_height)
min(image_width, image_height)
random_seed: random seed.
random_seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image
image
"""
"""
def
add_black_patch_to_image
(
image
):
def
add_black_patch_to_image
(
image
,
idx
):
"""Function for adding one patch to the image.
"""Function for adding one patch to the image.
Args:
Args:
image: image
image: image
idx: counter for number of patches that could have been added
Returns:
Returns:
image with a randomly added black box
image with a randomly added black box
...
@@ -1586,10 +1911,19 @@ def random_black_patches(image,
...
@@ -1586,10 +1911,19 @@ def random_black_patches(image,
tf
.
multiply
(
tf
.
multiply
(
tf
.
minimum
(
tf
.
to_float
(
image_height
),
tf
.
to_float
(
image_width
)),
tf
.
minimum
(
tf
.
to_float
(
image_height
),
tf
.
to_float
(
image_width
)),
size_to_image_ratio
))
size_to_image_ratio
))
normalized_y_min
=
tf
.
random_uniform
(
[],
minval
=
0.0
,
maxval
=
(
1.0
-
size_to_image_ratio
),
seed
=
random_seed
)
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
minval
=
0.0
,
normalized_x_min
=
tf
.
random_uniform
(
maxval
=
(
1.0
-
size_to_image_ratio
),
[],
minval
=
0.0
,
maxval
=
(
1.0
-
size_to_image_ratio
),
seed
=
random_seed
)
seed
=
random_seed
)
normalized_y_min
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADD_BLACK_PATCH
,
preprocess_vars_cache
,
key
=
str
(
idx
)
+
'y'
)
normalized_x_min
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
ADD_BLACK_PATCH
,
preprocess_vars_cache
,
key
=
str
(
idx
)
+
'x'
)
y_min
=
tf
.
to_int32
(
normalized_y_min
*
tf
.
to_float
(
image_height
))
y_min
=
tf
.
to_int32
(
normalized_y_min
*
tf
.
to_float
(
image_height
))
x_min
=
tf
.
to_int32
(
normalized_x_min
*
tf
.
to_float
(
image_width
))
x_min
=
tf
.
to_int32
(
normalized_x_min
*
tf
.
to_float
(
image_width
))
black_box
=
tf
.
ones
([
box_size
,
box_size
,
3
],
dtype
=
tf
.
float32
)
black_box
=
tf
.
ones
([
box_size
,
box_size
,
3
],
dtype
=
tf
.
float32
)
...
@@ -1599,13 +1933,17 @@ def random_black_patches(image,
...
@@ -1599,13 +1933,17 @@ def random_black_patches(image,
return
image
return
image
with
tf
.
name_scope
(
'RandomBlackPatchInImage'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomBlackPatchInImage'
,
values
=
[
image
]):
for
_
in
range
(
max_black_patches
):
for
idx
in
range
(
max_black_patches
):
random_prob
=
tf
.
random_uniform
(
generator_func
=
functools
.
partial
(
tf
.
random_uniform
,
[],
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
,
seed
=
random_seed
)
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
,
seed
=
random_seed
)
random_prob
=
_get_or_create_preprocess_rand_vars
(
generator_func
,
preprocessor_cache
.
PreprocessorCache
.
BLACK_PATCHES
,
preprocess_vars_cache
,
key
=
idx
)
image
=
tf
.
cond
(
image
=
tf
.
cond
(
tf
.
greater
(
random_prob
,
probability
),
lambda
:
image
,
tf
.
greater
(
random_prob
,
probability
),
lambda
:
image
,
lambda
:
add_black_patch_to_image
(
image
))
functools
.
partial
(
add_black_patch_to_image
,
image
=
image
,
idx
=
idx
))
return
image
return
image
...
@@ -1623,12 +1961,16 @@ def image_to_float(image):
...
@@ -1623,12 +1961,16 @@ def image_to_float(image):
return
image
return
image
def
random_resize_method
(
image
,
target_size
):
def
random_resize_method
(
image
,
target_size
,
preprocess_vars_cache
=
None
):
"""Uses a random resize method to resize the image to target size.
"""Uses a random resize method to resize the image to target size.
Args:
Args:
image: a rank 3 tensor.
image: a rank 3 tensor.
target_size: a list of [target_height, target_width]
target_size: a list of [target_height, target_width]
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
resized image.
resized image.
...
@@ -1637,7 +1979,9 @@ def random_resize_method(image, target_size):
...
@@ -1637,7 +1979,9 @@ def random_resize_method(image, target_size):
resized_image
=
_apply_with_random_selector
(
resized_image
=
_apply_with_random_selector
(
image
,
image
,
lambda
x
,
method
:
tf
.
image
.
resize_images
(
x
,
target_size
,
method
),
lambda
x
,
method
:
tf
.
image
.
resize_images
(
x
,
target_size
,
method
),
num_cases
=
4
)
num_cases
=
4
,
preprocess_vars_cache
=
preprocess_vars_cache
,
key
=
preprocessor_cache
.
PreprocessorCache
.
RESIZE_METHOD
)
return
resized_image
return
resized_image
...
@@ -1647,6 +1991,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
...
@@ -1647,6 +1991,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
image_shape
=
image
.
get_shape
().
as_list
()
image_shape
=
image
.
get_shape
().
as_list
()
orig_height
=
image_shape
[
0
]
orig_height
=
image_shape
[
0
]
orig_width
=
image_shape
[
1
]
orig_width
=
image_shape
[
1
]
num_channels
=
image_shape
[
2
]
orig_min_dim
=
min
(
orig_height
,
orig_width
)
orig_min_dim
=
min
(
orig_height
,
orig_width
)
# Calculates the larger of the possible sizes
# Calculates the larger of the possible sizes
large_scale_factor
=
min_dimension
/
float
(
orig_min_dim
)
large_scale_factor
=
min_dimension
/
float
(
orig_min_dim
)
...
@@ -1674,7 +2019,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
...
@@ -1674,7 +2019,7 @@ def _compute_new_static_size(image, min_dimension, max_dimension):
new_size
=
small_size
new_size
=
small_size
else
:
else
:
new_size
=
large_size
new_size
=
large_size
return
tf
.
constant
(
new_size
)
return
tf
.
constant
(
new_size
+
[
num_channels
]
)
def
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
):
def
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
):
...
@@ -1682,6 +2027,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
...
@@ -1682,6 +2027,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
image_shape
=
tf
.
shape
(
image
)
image_shape
=
tf
.
shape
(
image
)
orig_height
=
tf
.
to_float
(
image_shape
[
0
])
orig_height
=
tf
.
to_float
(
image_shape
[
0
])
orig_width
=
tf
.
to_float
(
image_shape
[
1
])
orig_width
=
tf
.
to_float
(
image_shape
[
1
])
num_channels
=
image_shape
[
2
]
orig_min_dim
=
tf
.
minimum
(
orig_height
,
orig_width
)
orig_min_dim
=
tf
.
minimum
(
orig_height
,
orig_width
)
# Calculates the larger of the possible sizes
# Calculates the larger of the possible sizes
min_dimension
=
tf
.
constant
(
min_dimension
,
dtype
=
tf
.
float32
)
min_dimension
=
tf
.
constant
(
min_dimension
,
dtype
=
tf
.
float32
)
...
@@ -1711,7 +2057,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
...
@@ -1711,7 +2057,7 @@ def _compute_new_dynamic_size(image, min_dimension, max_dimension):
lambda
:
small_size
,
lambda
:
large_size
)
lambda
:
small_size
,
lambda
:
large_size
)
else
:
else
:
new_size
=
large_size
new_size
=
large_size
return
new_size
return
tf
.
stack
(
tf
.
unstack
(
new_size
)
+
[
num_channels
])
def
resize_to_range
(
image
,
def
resize_to_range
(
image
,
...
@@ -1719,7 +2065,8 @@ def resize_to_range(image,
...
@@ -1719,7 +2065,8 @@ def resize_to_range(image,
min_dimension
=
None
,
min_dimension
=
None
,
max_dimension
=
None
,
max_dimension
=
None
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
align_corners
=
False
):
align_corners
=
False
,
pad_to_max_dimension
=
False
):
"""Resizes an image so its dimensions are within the provided value.
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
The output size can be described by two cases:
...
@@ -1740,15 +2087,22 @@ def resize_to_range(image,
...
@@ -1740,15 +2087,22 @@ def resize_to_range(image,
BILINEAR.
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
and output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros
so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded
similarly.
Returns:
Returns:
A 3D tensor of shape [new_height, new_width, channels],
Note that the position of the resized_image_shape changes based on whether
where the image has been resized (with bilinear interpolation) so that
masks are present.
min(new_height, new_width) == min_dimension or
resized_image: A 3D tensor of shape [new_height, new_width, channels],
max(new_height, new_width) == max_dimension.
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
If masks is not None, also outputs masks:
max(new_height, new_width) == max_dimension.
A 3D tensor of shape [num_instances, new_height, new_width]
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width].
resized_image_shape: A 1D tensor of shape [3] containing shape of the
resized image.
Raises:
Raises:
ValueError: if the image is not a 3D tensor.
ValueError: if the image is not a 3D tensor.
...
@@ -1762,16 +2116,27 @@ def resize_to_range(image,
...
@@ -1762,16 +2116,27 @@ def resize_to_range(image,
else
:
else
:
new_size
=
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
)
new_size
=
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
)
new_image
=
tf
.
image
.
resize_images
(
new_image
=
tf
.
image
.
resize_images
(
image
,
new_size
,
method
=
method
,
align_corners
=
align_corners
)
image
,
new_size
[:
-
1
]
,
method
=
method
,
align_corners
=
align_corners
)
result
=
new_image
if
pad_to_max_dimension
:
new_image
=
tf
.
image
.
pad_to_bounding_box
(
new_image
,
0
,
0
,
max_dimension
,
max_dimension
)
result
=
[
new_image
]
if
masks
is
not
None
:
if
masks
is
not
None
:
new_masks
=
tf
.
expand_dims
(
masks
,
3
)
new_masks
=
tf
.
expand_dims
(
masks
,
3
)
new_masks
=
tf
.
image
.
resize_nearest_neighbor
(
new_masks
=
tf
.
image
.
resize_images
(
new_masks
,
new_size
,
align_corners
=
align_corners
)
new_masks
,
new_size
[:
-
1
],
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
,
align_corners
=
align_corners
)
new_masks
=
tf
.
squeeze
(
new_masks
,
3
)
new_masks
=
tf
.
squeeze
(
new_masks
,
3
)
result
=
[
new_image
,
new_masks
]
if
pad_to_max_dimension
:
new_masks
=
tf
.
image
.
pad_to_bounding_box
(
new_masks
,
0
,
0
,
max_dimension
,
max_dimension
)
result
.
append
(
new_masks
)
result
.
append
(
new_size
)
return
result
return
result
...
@@ -1789,10 +2154,13 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
...
@@ -1789,10 +2154,13 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
min_dimension: minimum image dimension.
min_dimension: minimum image dimension.
Returns:
Returns:
a tuple containing the following:
Note that the position of the resized_image_shape changes based on whether
Resized image. A tensor of size [new_height, new_width, channels].
masks are present.
(optional) Resized masks. A tensor of
resized_image: A tensor of size [new_height, new_width, channels].
size [num_instances, new_height, new_width].
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width]
resized_image_shape: A 1D tensor of shape [3] containing the shape of the
resized image.
Raises:
Raises:
ValueError: if the image is not a 3D tensor.
ValueError: if the image is not a 3D tensor.
...
@@ -1803,6 +2171,7 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
...
@@ -1803,6 +2171,7 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
with
tf
.
name_scope
(
'ResizeGivenMinDimension'
,
values
=
[
image
,
min_dimension
]):
with
tf
.
name_scope
(
'ResizeGivenMinDimension'
,
values
=
[
image
,
min_dimension
]):
image_height
=
tf
.
shape
(
image
)[
0
]
image_height
=
tf
.
shape
(
image
)[
0
]
image_width
=
tf
.
shape
(
image
)[
1
]
image_width
=
tf
.
shape
(
image
)[
1
]
num_channels
=
tf
.
shape
(
image
)[
2
]
min_image_dimension
=
tf
.
minimum
(
image_height
,
image_width
)
min_image_dimension
=
tf
.
minimum
(
image_height
,
image_width
)
min_target_dimension
=
tf
.
maximum
(
min_image_dimension
,
min_dimension
)
min_target_dimension
=
tf
.
maximum
(
min_image_dimension
,
min_dimension
)
target_ratio
=
tf
.
to_float
(
min_target_dimension
)
/
tf
.
to_float
(
target_ratio
=
tf
.
to_float
(
min_target_dimension
)
/
tf
.
to_float
(
...
@@ -1813,13 +2182,16 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
...
@@ -1813,13 +2182,16 @@ def resize_to_min_dimension(image, masks=None, min_dimension=600):
tf
.
expand_dims
(
image
,
axis
=
0
),
tf
.
expand_dims
(
image
,
axis
=
0
),
size
=
[
target_height
,
target_width
],
size
=
[
target_height
,
target_width
],
align_corners
=
True
)
align_corners
=
True
)
result
=
tf
.
squeeze
(
image
,
axis
=
0
)
result
=
[
tf
.
squeeze
(
image
,
axis
=
0
)]
if
masks
is
not
None
:
if
masks
is
not
None
:
masks
=
tf
.
image
.
resize_nearest_neighbor
(
masks
=
tf
.
image
.
resize_nearest_neighbor
(
tf
.
expand_dims
(
masks
,
axis
=
3
),
tf
.
expand_dims
(
masks
,
axis
=
3
),
size
=
[
target_height
,
target_width
],
size
=
[
target_height
,
target_width
],
align_corners
=
True
)
align_corners
=
True
)
result
=
(
result
,
tf
.
squeeze
(
masks
,
axis
=
3
))
result
.
append
(
tf
.
squeeze
(
masks
,
axis
=
3
))
result
.
append
(
tf
.
stack
([
target_height
,
target_width
,
num_channels
]))
return
result
return
result
...
@@ -1854,6 +2226,8 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
...
@@ -1854,6 +2226,8 @@ def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
return
tuple
(
result
)
return
tuple
(
result
)
# TODO: Investigate if instead the function should return None if
# masks is None.
# pylint: disable=g-doc-return-or-yield
# pylint: disable=g-doc-return-or-yield
def
resize_image
(
image
,
def
resize_image
(
image
,
masks
=
None
,
masks
=
None
,
...
@@ -1861,7 +2235,28 @@ def resize_image(image,
...
@@ -1861,7 +2235,28 @@ def resize_image(image,
new_width
=
1024
,
new_width
=
1024
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
align_corners
=
False
):
align_corners
=
False
):
"""See `tf.image.resize_images` for detailed doc."""
"""Resizes images to the given height and width.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
new_height: (optional) (scalar) desired height of the image.
new_width: (optional) (scalar) desired width of the image.
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
Returns:
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A tensor of size [new_height, new_width, channels].
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width]
resized_image_shape: A 1D tensor of shape [3] containing the shape of the
resized image.
"""
with
tf
.
name_scope
(
with
tf
.
name_scope
(
'ResizeImage'
,
'ResizeImage'
,
values
=
[
image
,
new_height
,
new_width
,
method
,
align_corners
]):
values
=
[
image
,
new_height
,
new_width
,
method
,
align_corners
]):
...
@@ -1869,7 +2264,8 @@ def resize_image(image,
...
@@ -1869,7 +2264,8 @@ def resize_image(image,
image
,
[
new_height
,
new_width
],
image
,
[
new_height
,
new_width
],
method
=
method
,
method
=
method
,
align_corners
=
align_corners
)
align_corners
=
align_corners
)
result
=
new_image
image_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
image
)
result
=
[
new_image
]
if
masks
is
not
None
:
if
masks
is
not
None
:
num_instances
=
tf
.
shape
(
masks
)[
0
]
num_instances
=
tf
.
shape
(
masks
)[
0
]
new_size
=
tf
.
constant
([
new_height
,
new_width
],
dtype
=
tf
.
int32
)
new_size
=
tf
.
constant
([
new_height
,
new_width
],
dtype
=
tf
.
int32
)
...
@@ -1886,8 +2282,9 @@ def resize_image(image,
...
@@ -1886,8 +2282,9 @@ def resize_image(image,
masks
=
tf
.
cond
(
num_instances
>
0
,
resize_masks_branch
,
masks
=
tf
.
cond
(
num_instances
>
0
,
resize_masks_branch
,
reshape_masks_branch
)
reshape_masks_branch
)
result
=
[
new_image
,
masks
]
result
.
append
(
masks
)
result
.
append
(
tf
.
stack
([
new_height
,
new_width
,
image_shape
[
2
]]))
return
result
return
result
...
@@ -1946,7 +2343,7 @@ def rgb_to_gray(image):
...
@@ -1946,7 +2343,7 @@ def rgb_to_gray(image):
Returns:
Returns:
image: A single channel grayscale image -> [image, height, 1].
image: A single channel grayscale image -> [image, height, 1].
"""
"""
return
tf
.
image
.
rgb_to_grayscale
(
image
)
return
_
rgb_to_grayscale
(
image
)
def
ssd_random_crop
(
image
,
def
ssd_random_crop
(
image
,
...
@@ -1960,7 +2357,8 @@ def ssd_random_crop(image,
...
@@ -1960,7 +2357,8 @@ def ssd_random_crop(image,
area_range
=
((
0.1
,
1.0
),)
*
7
,
area_range
=
((
0.1
,
1.0
),)
*
7
,
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
7
,
random_coef
=
(
0.15
,)
*
7
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
Liu et al., SSD: Single shot multibox detector.
...
@@ -1994,6 +2392,10 @@ def ssd_random_crop(image,
...
@@ -1994,6 +2392,10 @@ def ssd_random_crop(image,
cropped image, and if it is 1.0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
original image.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same rank as input image.
image: image which is the same rank as input image.
...
@@ -2046,14 +2448,17 @@ def ssd_random_crop(image,
...
@@ -2046,14 +2448,17 @@ def ssd_random_crop(image,
area_range
=
area_range
[
index
],
area_range
=
area_range
[
index
],
overlap_thresh
=
overlap_thresh
[
index
],
overlap_thresh
=
overlap_thresh
[
index
],
random_coef
=
random_coef
[
index
],
random_coef
=
random_coef
[
index
],
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
result
=
_apply_with_random_selector_tuples
(
result
=
_apply_with_random_selector_tuples
(
tuple
(
tuple
(
t
for
t
in
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
)
t
for
t
in
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
)
if
t
is
not
None
),
if
t
is
not
None
),
random_crop_selector
,
random_crop_selector
,
num_cases
=
len
(
min_object_covered
))
num_cases
=
len
(
min_object_covered
),
preprocess_vars_cache
=
preprocess_vars_cache
,
key
=
preprocessor_cache
.
PreprocessorCache
.
SSD_CROP_SELECTOR_ID
)
return
result
return
result
...
@@ -2069,7 +2474,8 @@ def ssd_random_crop_pad(image,
...
@@ -2069,7 +2474,8 @@ def ssd_random_crop_pad(image,
min_padded_size_ratio
=
((
1.0
,
1.0
),)
*
6
,
min_padded_size_ratio
=
((
1.0
,
1.0
),)
*
6
,
max_padded_size_ratio
=
((
2.0
,
2.0
),)
*
6
,
max_padded_size_ratio
=
((
2.0
,
2.0
),)
*
6
,
pad_color
=
(
None
,)
*
6
,
pad_color
=
(
None
,)
*
6
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
Liu et al., SSD: Single shot multibox detector.
...
@@ -2105,6 +2511,10 @@ def ssd_random_crop_pad(image,
...
@@ -2105,6 +2511,10 @@ def ssd_random_crop_pad(image,
if set as None, it will be set to average color of the randomly
if set as None, it will be set to average color of the randomly
cropped image.
cropped image.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: Image shape will be [new_height, new_width, channels].
image: Image shape will be [new_height, new_width, channels].
...
@@ -2134,12 +2544,15 @@ def ssd_random_crop_pad(image,
...
@@ -2134,12 +2544,15 @@ def ssd_random_crop_pad(image,
min_padded_size_ratio
=
min_padded_size_ratio
[
index
],
min_padded_size_ratio
=
min_padded_size_ratio
[
index
],
max_padded_size_ratio
=
max_padded_size_ratio
[
index
],
max_padded_size_ratio
=
max_padded_size_ratio
[
index
],
pad_color
=
pad_color
[
index
],
pad_color
=
pad_color
[
index
],
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
return
_apply_with_random_selector_tuples
(
return
_apply_with_random_selector_tuples
(
tuple
(
t
for
t
in
(
image
,
boxes
,
labels
,
label_scores
)
if
t
is
not
None
),
tuple
(
t
for
t
in
(
image
,
boxes
,
labels
,
label_scores
)
if
t
is
not
None
),
random_crop_pad_selector
,
random_crop_pad_selector
,
num_cases
=
len
(
min_object_covered
))
num_cases
=
len
(
min_object_covered
),
preprocess_vars_cache
=
preprocess_vars_cache
,
key
=
preprocessor_cache
.
PreprocessorCache
.
SSD_CROP_PAD_SELECTOR_ID
)
def
ssd_random_crop_fixed_aspect_ratio
(
def
ssd_random_crop_fixed_aspect_ratio
(
...
@@ -2154,7 +2567,8 @@ def ssd_random_crop_fixed_aspect_ratio(
...
@@ -2154,7 +2567,8 @@ def ssd_random_crop_fixed_aspect_ratio(
area_range
=
((
0.1
,
1.0
),)
*
7
,
area_range
=
((
0.1
,
1.0
),)
*
7
,
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
7
,
random_coef
=
(
0.15
,)
*
7
,
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
Liu et al., SSD: Single shot multibox detector.
...
@@ -2191,6 +2605,10 @@ def ssd_random_crop_fixed_aspect_ratio(
...
@@ -2191,6 +2605,10 @@ def ssd_random_crop_fixed_aspect_ratio(
cropped image, and if it is 1.0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
original image.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same rank as input image.
image: image which is the same rank as input image.
...
@@ -2209,7 +2627,8 @@ def ssd_random_crop_fixed_aspect_ratio(
...
@@ -2209,7 +2627,8 @@ def ssd_random_crop_fixed_aspect_ratio(
crop_result
=
ssd_random_crop
(
crop_result
=
ssd_random_crop
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
,
min_object_covered
,
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
,
min_object_covered
,
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
)
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
,
preprocess_vars_cache
)
i
=
3
i
=
3
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_label_scores
=
None
new_label_scores
=
None
...
@@ -2231,7 +2650,8 @@ def ssd_random_crop_fixed_aspect_ratio(
...
@@ -2231,7 +2650,8 @@ def ssd_random_crop_fixed_aspect_ratio(
new_masks
,
new_masks
,
new_keypoints
,
new_keypoints
,
aspect_ratio
=
aspect_ratio
,
aspect_ratio
=
aspect_ratio
,
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
return
result
return
result
...
@@ -2251,7 +2671,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
...
@@ -2251,7 +2671,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
random_coef
=
(
0.15
,)
*
7
,
random_coef
=
(
0.15
,)
*
7
,
min_padded_size_ratio
=
(
1.0
,
1.0
),
min_padded_size_ratio
=
(
1.0
,
1.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
seed
=
None
):
seed
=
None
,
preprocess_vars_cache
=
None
):
"""Random crop and pad preprocessing with default parameters as in SSD paper.
"""Random crop and pad preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
Liu et al., SSD: Single shot multibox detector.
...
@@ -2294,6 +2715,10 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
...
@@ -2294,6 +2715,10 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
max_padded_size_ratio: max ratio of padded image height and width to the
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width.
input image's height and width.
seed: random seed.
seed: random seed.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
image: image which is the same rank as input image.
image: image which is the same rank as input image.
...
@@ -2310,7 +2735,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
...
@@ -2310,7 +2735,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
"""
"""
crop_result
=
ssd_random_crop
(
crop_result
=
ssd_random_crop
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
,
min_object_covered
,
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
,
min_object_covered
,
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
)
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
,
preprocess_vars_cache
)
i
=
3
i
=
3
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_label_scores
=
None
new_label_scores
=
None
...
@@ -2332,7 +2758,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
...
@@ -2332,7 +2758,8 @@ def ssd_random_crop_pad_fixed_aspect_ratio(
aspect_ratio
=
aspect_ratio
,
aspect_ratio
=
aspect_ratio
,
min_padded_size_ratio
=
min_padded_size_ratio
,
min_padded_size_ratio
=
min_padded_size_ratio
,
max_padded_size_ratio
=
max_padded_size_ratio
,
max_padded_size_ratio
=
max_padded_size_ratio
,
seed
=
seed
)
seed
=
seed
,
preprocess_vars_cache
=
preprocess_vars_cache
)
result
=
list
(
result
)
result
=
list
(
result
)
if
new_label_scores
is
not
None
:
if
new_label_scores
is
not
None
:
...
@@ -2480,7 +2907,10 @@ def get_default_func_arg_map(include_label_scores=False,
...
@@ -2480,7 +2907,10 @@ def get_default_func_arg_map(include_label_scores=False,
return
prep_func_arg_map
return
prep_func_arg_map
def
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
None
):
def
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
None
,
preprocess_vars_cache
=
None
):
"""Preprocess images and bounding boxes.
"""Preprocess images and bounding boxes.
Various types of preprocessing (to be implemented) based on the
Various types of preprocessing (to be implemented) based on the
...
@@ -2505,6 +2935,10 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
...
@@ -2505,6 +2935,10 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
their values.
their values.
func_arg_map: mapping from preprocessing functions to arguments that they
func_arg_map: mapping from preprocessing functions to arguments that they
expect to receive and return.
expect to receive and return.
preprocess_vars_cache: PreprocessorCache object that records previously
performed augmentations. Updated in-place. If this
function is called multiple times with the same
non-null cache, it will perform deterministically.
Returns:
Returns:
tensor_dict: which contains the preprocessed images, bounding boxes, etc.
tensor_dict: which contains the preprocessed images, bounding boxes, etc.
...
@@ -2544,6 +2978,9 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
...
@@ -2544,6 +2978,9 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
return
tensor_dict
[
key
]
if
key
is
not
None
else
None
return
tensor_dict
[
key
]
if
key
is
not
None
else
None
args
=
[
get_arg
(
a
)
for
a
in
arg_names
]
args
=
[
get_arg
(
a
)
for
a
in
arg_names
]
if
(
preprocess_vars_cache
is
not
None
and
'preprocess_vars_cache'
in
inspect
.
getargspec
(
func
).
args
):
params
[
'preprocess_vars_cache'
]
=
preprocess_vars_cache
results
=
func
(
*
args
,
**
params
)
results
=
func
(
*
args
,
**
params
)
if
not
isinstance
(
results
,
(
list
,
tuple
)):
if
not
isinstance
(
results
,
(
list
,
tuple
)):
results
=
(
results
,)
results
=
(
results
,)
...
...
research/object_detection/core/preprocessor_cache.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Records previous preprocessing operations and allows them to be repeated.
Used with object_detection.core.preprocessor. Passing a PreprocessorCache
into individual data augmentation functions or the general preprocess() function
will store all randomly generated variables in the PreprocessorCache. When
a preprocessor function is called multiple times with the same
PreprocessorCache object, that function will perform the same augmentation
on all calls.
"""
from
collections
import
defaultdict
class
PreprocessorCache
(
object
):
"""Dictionary wrapper storing random variables generated during preprocessing.
"""
# Constant keys representing different preprocessing functions
ROTATION90
=
'rotation90'
HORIZONTAL_FLIP
=
'horizontal_flip'
VERTICAL_FLIP
=
'vertical_flip'
PIXEL_VALUE_SCALE
=
'pixel_value_scale'
IMAGE_SCALE
=
'image_scale'
RGB_TO_GRAY
=
'rgb_to_gray'
ADJUST_BRIGHTNESS
=
'adjust_brightness'
ADJUST_CONTRAST
=
'adjust_contrast'
ADJUST_HUE
=
'adjust_hue'
ADJUST_SATURATION
=
'adjust_saturation'
DISTORT_COLOR
=
'distort_color'
STRICT_CROP_IMAGE
=
'strict_crop_image'
CROP_IMAGE
=
'crop_image'
PAD_IMAGE
=
'pad_image'
CROP_TO_ASPECT_RATIO
=
'crop_to_aspect_ratio'
RESIZE_METHOD
=
'resize_method'
PAD_TO_ASPECT_RATIO
=
'pad_to_aspect_ratio'
BLACK_PATCHES
=
'black_patches'
ADD_BLACK_PATCH
=
'add_black_patch'
SELECTOR
=
'selector'
SELECTOR_TUPLES
=
'selector_tuples'
SSD_CROP_SELECTOR_ID
=
'ssd_crop_selector_id'
SSD_CROP_PAD_SELECTOR_ID
=
'ssd_crop_pad_selector_id'
# 23 permitted function ids
_VALID_FNS
=
[
ROTATION90
,
HORIZONTAL_FLIP
,
VERTICAL_FLIP
,
PIXEL_VALUE_SCALE
,
IMAGE_SCALE
,
RGB_TO_GRAY
,
ADJUST_BRIGHTNESS
,
ADJUST_CONTRAST
,
ADJUST_HUE
,
ADJUST_SATURATION
,
DISTORT_COLOR
,
STRICT_CROP_IMAGE
,
CROP_IMAGE
,
PAD_IMAGE
,
CROP_TO_ASPECT_RATIO
,
RESIZE_METHOD
,
PAD_TO_ASPECT_RATIO
,
BLACK_PATCHES
,
ADD_BLACK_PATCH
,
SELECTOR
,
SELECTOR_TUPLES
,
SSD_CROP_SELECTOR_ID
,
SSD_CROP_PAD_SELECTOR_ID
]
def
__init__
(
self
):
self
.
_history
=
defaultdict
(
dict
)
def
clear
(
self
):
"""Resets cache."""
self
.
_history
=
{}
def
get
(
self
,
function_id
,
key
):
"""Gets stored value given a function id and key.
Args:
function_id: identifier for the preprocessing function used.
key: identifier for the variable stored.
Returns:
value: the corresponding value, expected to be a tensor or
nested structure of tensors.
Raises:
ValueError: if function_id is not one of the 23 valid function ids.
"""
if
function_id
not
in
self
.
_VALID_FNS
:
raise
ValueError
(
'Function id not recognized: %s.'
%
str
(
function_id
))
return
self
.
_history
[
function_id
].
get
(
key
)
def
update
(
self
,
function_id
,
key
,
value
):
"""Adds a value to the dictionary.
Args:
function_id: identifier for the preprocessing function used.
key: identifier for the variable stored.
value: the value to store, expected to be a tensor or nested structure
of tensors.
Raises:
ValueError: if function_id is not one of the 23 valid function ids.
"""
if
function_id
not
in
self
.
_VALID_FNS
:
raise
ValueError
(
'Function id not recognized: %s.'
%
str
(
function_id
))
self
.
_history
[
function_id
][
key
]
=
value
research/object_detection/core/preprocessor_test.py
View file @
fd7b6887
...
@@ -21,6 +21,7 @@ import six
...
@@ -21,6 +21,7 @@ import six
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.core
import
preprocessor
from
object_detection.core
import
preprocessor
from
object_detection.core
import
preprocessor_cache
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
if
six
.
PY2
:
if
six
.
PY2
:
...
@@ -290,6 +291,15 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -290,6 +291,15 @@ class PreprocessorTest(tf.test.TestCase):
def
expectedLabelsAfterThresholdingWithMissingScore
(
self
):
def
expectedLabelsAfterThresholdingWithMissingScore
(
self
):
return
tf
.
constant
([
2
],
dtype
=
tf
.
float32
)
return
tf
.
constant
([
2
],
dtype
=
tf
.
float32
)
def
testRgbToGrayscale
(
self
):
images
=
self
.
createTestImages
()
grayscale_images
=
preprocessor
.
_rgb_to_grayscale
(
images
)
expected_images
=
tf
.
image
.
rgb_to_grayscale
(
images
)
with
self
.
test_session
()
as
sess
:
(
grayscale_images
,
expected_images
)
=
sess
.
run
(
[
grayscale_images
,
expected_images
])
self
.
assertAllEqual
(
expected_images
,
grayscale_images
)
def
testNormalizeImage
(
self
):
def
testNormalizeImage
(
self
):
preprocess_options
=
[(
preprocessor
.
normalize_image
,
{
preprocess_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_minval'
:
0
,
...
@@ -435,6 +445,55 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -435,6 +445,55 @@ class PreprocessorTest(tf.test.TestCase):
rotated_mask
,
expected_mask
=
sess
.
run
([
rotated_mask
,
expected_mask
])
rotated_mask
,
expected_mask
=
sess
.
run
([
rotated_mask
,
expected_mask
])
self
.
assertAllEqual
(
rotated_mask
.
flatten
(),
expected_mask
.
flatten
())
self
.
assertAllEqual
(
rotated_mask
.
flatten
(),
expected_mask
.
flatten
())
def
_testPreprocessorCache
(
self
,
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
,
num_runs
=
4
):
cache
=
preprocessor_cache
.
PreprocessorCache
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
classes
=
self
.
createTestLabels
()
masks
=
self
.
createTestMasks
()
keypoints
=
self
.
createTestKeypoints
()
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
test_masks
,
include_keypoints
=
test_keypoints
)
out
=
[]
for
i
in
range
(
num_runs
):
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
}
num_outputs
=
1
if
test_boxes
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
=
boxes
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
classes
num_outputs
+=
1
if
test_masks
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
masks
num_outputs
+=
1
if
test_keypoints
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
=
keypoints
num_outputs
+=
1
out
.
append
(
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
,
preprocessor_arg_map
,
cache
))
with
self
.
test_session
()
as
sess
:
to_run
=
[]
for
i
in
range
(
num_runs
):
to_run
.
append
(
out
[
i
][
fields
.
InputDataFields
.
image
])
if
test_boxes
:
to_run
.
append
(
out
[
i
][
fields
.
InputDataFields
.
groundtruth_boxes
])
if
test_masks
:
to_run
.
append
(
out
[
i
][
fields
.
InputDataFields
.
groundtruth_instance_masks
])
if
test_keypoints
:
to_run
.
append
(
out
[
i
][
fields
.
InputDataFields
.
groundtruth_keypoints
])
out_array
=
sess
.
run
(
to_run
)
for
i
in
range
(
num_outputs
,
len
(
out_array
)):
self
.
assertAllClose
(
out_array
[
i
],
out_array
[
i
-
num_outputs
])
def
testRandomHorizontalFlip
(
self
):
def
testRandomHorizontalFlip
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
images
=
self
.
expectedImagesAfterNormalization
()
...
@@ -491,6 +550,16 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -491,6 +550,16 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomHorizontalFlipWithCache
(
self
):
keypoint_flip_permutation
=
self
.
createKeypointFlipPermutation
()
preprocess_options
=
[
(
preprocessor
.
random_horizontal_flip
,
{
'keypoint_flip_permutation'
:
keypoint_flip_permutation
})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomHorizontalFlipWithMaskAndKeypoints
(
self
):
def
testRunRandomHorizontalFlipWithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
image_height
=
3
image_height
=
3
...
@@ -578,6 +647,16 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -578,6 +647,16 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomVerticalFlipWithCache
(
self
):
keypoint_flip_permutation
=
self
.
createKeypointFlipPermutation
()
preprocess_options
=
[
(
preprocessor
.
random_vertical_flip
,
{
'keypoint_flip_permutation'
:
keypoint_flip_permutation
})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomVerticalFlipWithMaskAndKeypoints
(
self
):
def
testRunRandomVerticalFlipWithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_vertical_flip
,
{})]
preprocess_options
=
[(
preprocessor
.
random_vertical_flip
,
{})]
image_height
=
3
image_height
=
3
...
@@ -665,6 +744,13 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -665,6 +744,13 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomRotation90WithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rotation90
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomRotation90WithMaskAndKeypoints
(
self
):
def
testRunRandomRotation90WithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rotation90
,
{})]
preprocess_options
=
[(
preprocessor
.
random_rotation90
,
{})]
image_height
=
3
image_height
=
3
...
@@ -716,6 +802,20 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -716,6 +802,20 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
values_greater_
,
values_true_
)
self
.
assertAllClose
(
values_greater_
,
values_true_
)
self
.
assertAllClose
(
values_less_
,
values_true_
)
self
.
assertAllClose
(
values_less_
,
values_true_
)
def
testRandomPixelValueScaleWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_pixel_value_scale
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomImageScale
(
self
):
def
testRandomImageScale
(
self
):
preprocess_options
=
[(
preprocessor
.
random_image_scale
,
{})]
preprocess_options
=
[(
preprocessor
.
random_image_scale
,
{})]
images_original
=
self
.
createTestImages
()
images_original
=
self
.
createTestImages
()
...
@@ -736,6 +836,13 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -736,6 +836,13 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertTrue
(
self
.
assertTrue
(
images_original_shape_
[
2
]
*
2.0
>=
images_scaled_shape_
[
2
])
images_original_shape_
[
2
]
*
2.0
>=
images_scaled_shape_
[
2
])
def
testRandomImageScaleWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_image_scale
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomRGBtoGray
(
self
):
def
testRandomRGBtoGray
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rgb_to_gray
,
{})]
preprocess_options
=
[(
preprocessor
.
random_rgb_to_gray
,
{})]
images_original
=
self
.
createTestImages
()
images_original
=
self
.
createTestImages
()
...
@@ -769,6 +876,14 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -769,6 +876,14 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
images_g_diff_
,
image_zero1_
)
self
.
assertAllClose
(
images_g_diff_
,
image_zero1_
)
self
.
assertAllClose
(
images_b_diff_
,
image_zero1_
)
self
.
assertAllClose
(
images_b_diff_
,
image_zero1_
)
def
testRandomRGBtoGrayWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rgb_to_gray
,
{
'probability'
:
0.5
})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomAdjustBrightness
(
self
):
def
testRandomAdjustBrightness
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
@@ -789,6 +904,20 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -789,6 +904,20 @@ class PreprocessorTest(tf.test.TestCase):
[
image_original_shape
,
image_bright_shape
])
[
image_original_shape
,
image_bright_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_bright_shape_
)
self
.
assertAllEqual
(
image_original_shape_
,
image_bright_shape_
)
def
testRandomAdjustBrightnessWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_adjust_brightness
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomAdjustContrast
(
self
):
def
testRandomAdjustContrast
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
@@ -809,6 +938,20 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -809,6 +938,20 @@ class PreprocessorTest(tf.test.TestCase):
[
image_original_shape
,
image_contrast_shape
])
[
image_original_shape
,
image_contrast_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_contrast_shape_
)
self
.
assertAllEqual
(
image_original_shape_
,
image_contrast_shape_
)
def
testRandomAdjustContrastWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_adjust_contrast
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomAdjustHue
(
self
):
def
testRandomAdjustHue
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
@@ -829,6 +972,20 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -829,6 +972,20 @@ class PreprocessorTest(tf.test.TestCase):
[
image_original_shape
,
image_hue_shape
])
[
image_original_shape
,
image_hue_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_hue_shape_
)
self
.
assertAllEqual
(
image_original_shape_
,
image_hue_shape_
)
def
testRandomAdjustHueWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_adjust_hue
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomDistortColor
(
self
):
def
testRandomDistortColor
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
@@ -849,6 +1006,20 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -849,6 +1006,20 @@ class PreprocessorTest(tf.test.TestCase):
[
images_original_shape
,
images_distorted_color_shape
])
[
images_original_shape
,
images_distorted_color_shape
])
self
.
assertAllEqual
(
images_original_shape_
,
images_distorted_color_shape_
)
self
.
assertAllEqual
(
images_original_shape_
,
images_distorted_color_shape_
)
def
testRandomDistortColorWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_distort_color
,
{}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
False
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomJitterBoxes
(
self
):
def
testRandomJitterBoxes
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
random_jitter_boxes
,
{}))
preprocessing_options
.
append
((
preprocessor
.
random_jitter_boxes
,
{}))
...
@@ -900,6 +1071,21 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -900,6 +1071,21 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testRandomCropImageWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rgb_to_gray
,
{
'probability'
:
0.5
}),
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
,
}),
(
preprocessor
.
random_crop_image
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRandomCropImageGrayscale
(
self
):
def
testRandomCropImageGrayscale
(
self
):
preprocessing_options
=
[(
preprocessor
.
rgb_to_gray
,
{}),
preprocessing_options
=
[(
preprocessor
.
rgb_to_gray
,
{}),
(
preprocessor
.
normalize_image
,
{
(
preprocessor
.
normalize_image
,
{
...
@@ -1446,6 +1632,13 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1446,6 +1632,13 @@ class PreprocessorTest(tf.test.TestCase):
self
.
expectedKeypointsAfterThresholding
()])
self
.
expectedKeypointsAfterThresholding
()])
self
.
assertAllClose
(
retained_keypoints_
,
expected_keypoints_
)
self
.
assertAllClose
(
retained_keypoints_
,
expected_keypoints_
)
def
testRandomCropToAspectRatioWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_crop_to_aspect_ratio
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testRunRandomCropToAspectRatioWithMasks
(
self
):
def
testRunRandomCropToAspectRatioWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
...
@@ -1536,6 +1729,13 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1536,6 +1729,13 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
expected_keypoints
.
flatten
())
def
testRandomPadToAspectRatioWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
random_pad_to_aspect_ratio
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRunRandomPadToAspectRatioWithMasks
(
self
):
def
testRunRandomPadToAspectRatioWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
...
@@ -1624,6 +1824,17 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1624,6 +1824,17 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
expected_keypoints
.
flatten
())
def
testRandomPadImageWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
,}),
(
preprocessor
.
random_pad_image
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRandomPadImage
(
self
):
def
testRandomPadImage
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_minval'
:
0
,
...
@@ -1670,6 +1881,17 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1670,6 +1881,17 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertTrue
(
np
.
all
((
boxes_
[:,
3
]
-
boxes_
[:,
1
])
>=
(
self
.
assertTrue
(
np
.
all
((
boxes_
[:,
3
]
-
boxes_
[:,
1
])
>=
(
padded_boxes_
[:,
3
]
-
padded_boxes_
[:,
1
])))
padded_boxes_
[:,
3
]
-
padded_boxes_
[:,
1
])))
def
testRandomCropPadImageWithCache
(
self
):
preprocess_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
,}),
(
preprocessor
.
random_crop_pad_image
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRandomCropPadImageWithRandomCoefOne
(
self
):
def
testRandomCropPadImageWithRandomCoefOne
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_minval'
:
0
,
...
@@ -1788,6 +2010,22 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1788,6 +2010,22 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertEqual
(
images_shape_
[
1
],
padded_images_shape_
[
1
])
self
.
assertEqual
(
images_shape_
[
1
],
padded_images_shape_
[
1
])
self
.
assertEqual
(
2
*
images_shape_
[
2
],
padded_images_shape_
[
2
])
self
.
assertEqual
(
2
*
images_shape_
[
2
],
padded_images_shape_
[
2
])
def
testRandomBlackPatchesWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_black_patches
,
{
'size_to_image_ratio'
:
0.5
}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRandomBlackPatches
(
self
):
def
testRandomBlackPatches
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
@@ -1812,6 +2050,22 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1812,6 +2050,22 @@ class PreprocessorTest(tf.test.TestCase):
[
images_shape
,
blacked_images_shape
])
[
images_shape
,
blacked_images_shape
])
self
.
assertAllEqual
(
images_shape_
,
blacked_images_shape_
)
self
.
assertAllEqual
(
images_shape_
,
blacked_images_shape_
)
def
testRandomResizeMethodWithCache
(
self
):
preprocess_options
=
[]
preprocess_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocess_options
.
append
((
preprocessor
.
random_resize_method
,
{
'target_size'
:
(
75
,
150
)
}))
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
True
,
test_keypoints
=
True
)
def
testRandomResizeMethod
(
self
):
def
testRandomResizeMethod
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
@@ -1853,7 +2107,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1853,7 +2107,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_image
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_image
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
@@ -1880,7 +2134,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1880,7 +2134,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_image
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_image
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
@@ -1900,7 +2154,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1900,7 +2154,7 @@ class PreprocessorTest(tf.test.TestCase):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
in_image
=
tf
.
random_uniform
(
in_shape
)
in_image
=
tf
.
random_uniform
(
in_shape
)
out_image
=
preprocessor
.
resize_to_range
(
out_image
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
self
.
assertAllEqual
(
out_image
.
get_shape
().
as_list
(),
expected_shape
)
self
.
assertAllEqual
(
out_image
.
get_shape
().
as_list
(),
expected_shape
)
...
@@ -1913,7 +2167,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1913,7 +2167,7 @@ class PreprocessorTest(tf.test.TestCase):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
out_image
=
preprocessor
.
resize_to_range
(
out_image
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
...
@@ -1938,7 +2192,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1938,7 +2192,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_range
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
self
.
assertAllEqual
(
out_masks
.
get_shape
().
as_list
(),
expected_mask_shape
)
self
.
assertAllEqual
(
out_masks
.
get_shape
().
as_list
(),
expected_mask_shape
)
self
.
assertAllEqual
(
out_image
.
get_shape
().
as_list
(),
expected_image_shape
)
self
.
assertAllEqual
(
out_image
.
get_shape
().
as_list
(),
expected_image_shape
)
...
@@ -1960,7 +2214,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1960,7 +2214,7 @@ class PreprocessorTest(tf.test.TestCase):
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
in_masks
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
None
))
in_masks
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
None
))
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_range
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
@@ -1991,7 +2245,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1991,7 +2245,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_range
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
@@ -2016,7 +2270,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -2016,7 +2270,7 @@ class PreprocessorTest(tf.test.TestCase):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
in_image
=
tf
.
random_uniform
(
in_shape
)
in_image
=
tf
.
random_uniform
(
in_shape
)
out_image
=
preprocessor
.
resize_to_range
(
out_image
,
_
=
preprocessor
.
resize_to_range
(
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
...
@@ -2039,7 +2293,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -2039,7 +2293,7 @@ class PreprocessorTest(tf.test.TestCase):
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
in_masks
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
None
))
in_masks
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
None
))
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_min_dimension
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_min_dimension
(
in_image
,
in_masks
,
min_dimension
=
min_dim
)
in_image
,
in_masks
,
min_dimension
=
min_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
@@ -2069,7 +2323,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -2069,7 +2323,7 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_min_dimension
(
out_image
,
out_masks
,
_
=
preprocessor
.
resize_to_min_dimension
(
in_image
,
in_masks
,
min_dimension
=
min_dim
)
in_image
,
in_masks
,
min_dimension
=
min_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
@@ -2144,6 +2398,20 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -2144,6 +2398,20 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
([
0
,
1
,
1
,
0
,
1
],
one_hot
)
self
.
assertAllEqual
([
0
,
1
,
1
,
0
,
1
],
one_hot
)
def
testSSDRandomCropWithCache
(
self
):
preprocess_options
=
[
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
testSSDRandomCrop
(
self
):
def
testSSDRandomCrop
(
self
):
preprocessing_options
=
[
preprocessing_options
=
[
(
preprocessor
.
normalize_image
,
{
(
preprocessor
.
normalize_image
,
{
...
@@ -2216,6 +2484,20 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -2216,6 +2484,20 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testSSDRandomCropFixedAspectRatioWithCache
(
self
):
preprocess_options
=
[
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop_fixed_aspect_ratio
,
{})]
self
.
_testPreprocessorCache
(
preprocess_options
,
test_boxes
=
True
,
test_masks
=
False
,
test_keypoints
=
False
)
def
_testSSDRandomCropFixedAspectRatio
(
self
,
def
_testSSDRandomCropFixedAspectRatio
(
self
,
include_label_scores
,
include_label_scores
,
include_instance_masks
,
include_instance_masks
,
...
...
research/object_detection/core/standard_fields.py
View file @
fd7b6887
...
@@ -57,6 +57,10 @@ class InputDataFields(object):
...
@@ -57,6 +57,10 @@ class InputDataFields(object):
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores.
groundtruth_label_scores: groundtruth label scores.
groundtruth_weights: groundtruth weight factor for bounding boxes.
num_groundtruth_boxes: number of groundtruth boxes.
true_image_shapes: true shapes of images in the resized images, as resized
images can be padded with zeros.
"""
"""
image
=
'image'
image
=
'image'
original_image
=
'original_image'
original_image
=
'original_image'
...
@@ -79,10 +83,13 @@ class InputDataFields(object):
...
@@ -79,10 +83,13 @@ class InputDataFields(object):
groundtruth_keypoints
=
'groundtruth_keypoints'
groundtruth_keypoints
=
'groundtruth_keypoints'
groundtruth_keypoint_visibilities
=
'groundtruth_keypoint_visibilities'
groundtruth_keypoint_visibilities
=
'groundtruth_keypoint_visibilities'
groundtruth_label_scores
=
'groundtruth_label_scores'
groundtruth_label_scores
=
'groundtruth_label_scores'
groundtruth_weights
=
'groundtruth_weights'
num_groundtruth_boxes
=
'num_groundtruth_boxes'
true_image_shape
=
'true_image_shape'
class
DetectionResultFields
(
object
):
class
DetectionResultFields
(
object
):
"""Naming conve
r
ntions for storing the output of the detector.
"""Naming conventions for storing the output of the detector.
Attributes:
Attributes:
source_id: source of the original image.
source_id: source of the original image.
...
@@ -162,6 +169,7 @@ class TfExampleFields(object):
...
@@ -162,6 +169,7 @@ class TfExampleFields(object):
object_is_crowd: [DEPRECATED, use object_group_of instead]
object_is_crowd: [DEPRECATED, use object_group_of instead]
is the object a single object or a crowd
is the object a single object or a crowd
object_segment_area: the area of the segment.
object_segment_area: the area of the segment.
object_weight: a weight factor for the object's bounding box.
instance_masks: instance segmentation masks.
instance_masks: instance segmentation masks.
instance_boundaries: instance boundaries.
instance_boundaries: instance boundaries.
instance_classes: Classes for each instance segmentation mask.
instance_classes: Classes for each instance segmentation mask.
...
@@ -194,6 +202,7 @@ class TfExampleFields(object):
...
@@ -194,6 +202,7 @@ class TfExampleFields(object):
object_depiction
=
'image/object/depiction'
object_depiction
=
'image/object/depiction'
object_is_crowd
=
'image/object/is_crowd'
object_is_crowd
=
'image/object/is_crowd'
object_segment_area
=
'image/object/segment/area'
object_segment_area
=
'image/object/segment/area'
object_weight
=
'image/object/weight'
instance_masks
=
'image/segmentation/object'
instance_masks
=
'image/segmentation/object'
instance_boundaries
=
'image/boundaries/object'
instance_boundaries
=
'image/boundaries/object'
instance_classes
=
'image/segmentation/object/class'
instance_classes
=
'image/segmentation/object/class'
...
...
research/object_detection/core/target_assigner.py
View file @
fd7b6887
...
@@ -37,19 +37,19 @@ from object_detection.box_coders import faster_rcnn_box_coder
...
@@ -37,19 +37,19 @@ from object_detection.box_coders import faster_rcnn_box_coder
from
object_detection.box_coders
import
mean_stddev_box_coder
from
object_detection.box_coders
import
mean_stddev_box_coder
from
object_detection.core
import
box_coder
as
bcoder
from
object_detection.core
import
box_coder
as
bcoder
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
matcher
as
mat
from
object_detection.core
import
matcher
as
mat
from
object_detection.core
import
region_similarity_calculator
as
sim_calc
from
object_detection.core
import
region_similarity_calculator
as
sim_calc
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.matchers
import
argmax_matcher
from
object_detection.matchers
import
argmax_matcher
from
object_detection.matchers
import
bipartite_matcher
from
object_detection.matchers
import
bipartite_matcher
from
object_detection.utils
import
shape_utils
class
TargetAssigner
(
object
):
class
TargetAssigner
(
object
):
"""Target assigner to compute classification and regression targets."""
"""Target assigner to compute classification and regression targets."""
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
unmatched_cls_target
=
None
):
"""Construct Object Detection Target Assigner.
"""Construct Object Detection Target Assigner.
Args:
Args:
...
@@ -58,10 +58,8 @@ class TargetAssigner(object):
...
@@ -58,10 +58,8 @@ class TargetAssigner(object):
anchors.
anchors.
box_coder: an object_detection.core.BoxCoder used to encode matching
box_coder: an object_detection.core.BoxCoder used to encode matching
groundtruth boxes with respect to anchors.
groundtruth boxes with respect to anchors.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
anchors (default: 1.0)
. The weight must be in [0., 1.].
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
anchor (and can be empty for scalar targets). This shape must thus be
...
@@ -82,7 +80,6 @@ class TargetAssigner(object):
...
@@ -82,7 +80,6 @@ class TargetAssigner(object):
self
.
_similarity_calc
=
similarity_calc
self
.
_similarity_calc
=
similarity_calc
self
.
_matcher
=
matcher
self
.
_matcher
=
matcher
self
.
_box_coder
=
box_coder
self
.
_box_coder
=
box_coder
self
.
_positive_class_weight
=
positive_class_weight
self
.
_negative_class_weight
=
negative_class_weight
self
.
_negative_class_weight
=
negative_class_weight
if
unmatched_cls_target
is
None
:
if
unmatched_cls_target
is
None
:
self
.
_unmatched_cls_target
=
tf
.
constant
([
0
],
tf
.
float32
)
self
.
_unmatched_cls_target
=
tf
.
constant
([
0
],
tf
.
float32
)
...
@@ -94,7 +91,7 @@ class TargetAssigner(object):
...
@@ -94,7 +91,7 @@ class TargetAssigner(object):
return
self
.
_box_coder
return
self
.
_box_coder
def
assign
(
self
,
anchors
,
groundtruth_boxes
,
groundtruth_labels
=
None
,
def
assign
(
self
,
anchors
,
groundtruth_boxes
,
groundtruth_labels
=
None
,
**
params
):
groundtruth_weights
=
None
,
**
params
):
"""Assign classification and regression targets to each anchor.
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
For a given set of anchors and groundtruth detections, match anchors
...
@@ -113,6 +110,9 @@ class TargetAssigner(object):
...
@@ -113,6 +110,9 @@ class TargetAssigner(object):
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
ground_truth boxes get a positive label (of 1).
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of
**params: Additional keyword arguments for specific implementations of
the Matcher.
the Matcher.
...
@@ -140,14 +140,21 @@ class TargetAssigner(object):
...
@@ -140,14 +140,21 @@ class TargetAssigner(object):
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
0
))
0
))
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
unmatched_shape_assert
=
tf
.
assert_equal
(
unmatched_shape_assert
=
shape_utils
.
assert_shape_equal
(
tf
.
shape
(
groundtruth_labels
)[
1
:],
tf
.
shape
(
self
.
_unmatched_cls_target
),
shape_utils
.
combined_static_and_dynamic_shape
(
groundtruth_labels
)[
1
:],
message
=
'Unmatched class target shape incompatible '
shape_utils
.
combined_static_and_dynamic_shape
(
'with groundtruth labels shape!'
)
self
.
_unmatched_cls_target
))
labels_and_box_shapes_assert
=
tf
.
assert_equal
(
labels_and_box_shapes_assert
=
shape_utils
.
assert_shape_equal
(
tf
.
shape
(
groundtruth_labels
)[
0
],
groundtruth_boxes
.
num_boxes
(),
shape_utils
.
combined_static_and_dynamic_shape
(
message
=
'Groundtruth boxes and labels have incompatible shapes!'
)
groundtruth_labels
)[:
1
],
shape_utils
.
combined_static_and_dynamic_shape
(
groundtruth_boxes
.
get
())[:
1
])
if
groundtruth_weights
is
None
:
num_gt_boxes
=
groundtruth_boxes
.
num_boxes_static
()
if
not
num_gt_boxes
:
num_gt_boxes
=
groundtruth_boxes
.
num_boxes
()
groundtruth_weights
=
tf
.
ones
([
num_gt_boxes
],
dtype
=
tf
.
float32
)
with
tf
.
control_dependencies
(
with
tf
.
control_dependencies
(
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
...
@@ -158,16 +165,16 @@ class TargetAssigner(object):
...
@@ -158,16 +165,16 @@ class TargetAssigner(object):
match
)
match
)
cls_targets
=
self
.
_create_classification_targets
(
groundtruth_labels
,
cls_targets
=
self
.
_create_classification_targets
(
groundtruth_labels
,
match
)
match
)
reg_weights
=
self
.
_create_regression_weights
(
match
)
reg_weights
=
self
.
_create_regression_weights
(
match
,
groundtruth_weights
)
cls_weights
=
self
.
_create_classification_weights
(
cls_weights
=
self
.
_create_classification_weights
(
match
,
match
,
self
.
_positive_class_weight
,
self
.
_negative_class
_weight
)
groundtruth
_weight
s
)
num_anchors
=
anchors
.
num_boxes_static
()
num_anchors
=
anchors
.
num_boxes_static
()
if
num_anchors
is
not
None
:
if
num_anchors
is
not
None
:
reg_targets
=
self
.
_reset_target_shape
(
reg_targets
,
num_anchors
)
reg_targets
=
self
.
_reset_target_shape
(
reg_targets
,
num_anchors
)
cls_targets
=
self
.
_reset_target_shape
(
cls_targets
,
num_anchors
)
cls_targets
=
self
.
_reset_target_shape
(
cls_targets
,
num_anchors
)
reg_weights
=
self
.
_reset_target_shape
(
reg_weights
,
num_anchors
)
reg_weights
=
self
.
_reset_target_shape
(
reg_weights
,
num_anchors
)
cls_weights
=
self
.
_reset_target_shape
(
cls_weights
,
num_anchors
)
cls_weights
=
self
.
_reset_target_shape
(
cls_weights
,
num_anchors
)
return
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
return
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
...
@@ -198,23 +205,31 @@ class TargetAssigner(object):
...
@@ -198,23 +205,31 @@ class TargetAssigner(object):
Returns:
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
"""
matched_anchor_indices
=
match
.
matched_column_indices
()
matched_gt_boxes
=
match
.
gather_based_on_match
(
unmatched_ignored_anchor_indices
=
(
match
.
groundtruth_boxes
.
get
(),
unmatched_or_ignored_column_indices
())
unmatched_value
=
tf
.
zeros
(
4
),
matched_gt_indices
=
match
.
matched_row_indices
()
ignored_value
=
tf
.
zeros
(
4
))
matched_anchors
=
box_list_ops
.
gather
(
anchors
,
matched_gt_boxlist
=
box_list
.
BoxList
(
matched_gt_boxes
)
matched_anchor_indices
)
if
groundtruth_boxes
.
has_field
(
fields
.
BoxListFields
.
keypoints
):
matched_gt_boxes
=
box_list_ops
.
gather
(
groundtruth_boxes
,
groundtruth_keypoints
=
groundtruth_boxes
.
get_field
(
matched_gt_indices
)
fields
.
BoxListFields
.
keypoints
)
matched_reg_targets
=
self
.
_box_coder
.
encode
(
matched_gt_boxes
,
matched_keypoints
=
match
.
gather_based_on_match
(
matched_anchors
)
groundtruth_keypoints
,
unmatched_value
=
tf
.
zeros
(
groundtruth_keypoints
.
get_shape
()[
1
:]),
ignored_value
=
tf
.
zeros
(
groundtruth_keypoints
.
get_shape
()[
1
:]))
matched_gt_boxlist
.
add_field
(
fields
.
BoxListFields
.
keypoints
,
matched_keypoints
)
matched_reg_targets
=
self
.
_box_coder
.
encode
(
matched_gt_boxlist
,
anchors
)
match_results_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
match
.
match_results
)
# Zero out the unmatched and ignored regression targets.
unmatched_ignored_reg_targets
=
tf
.
tile
(
unmatched_ignored_reg_targets
=
tf
.
tile
(
self
.
_default_regression_target
(),
self
.
_default_regression_target
(),
[
match_results_shape
[
0
],
1
])
tf
.
stack
([
tf
.
size
(
unmatched_ignored_anchor_indices
),
1
]))
matched_anchors_mask
=
match
.
matched_column_indicator
()
reg_targets
=
tf
.
dynamic_stitch
(
reg_targets
=
tf
.
where
(
matched_anchors_mask
,
[
matched_anchor_indices
,
unmatched_ignored_anchor_indices
],
matched_reg_targets
,
[
matched_reg_targets
,
unmatched_ignored_reg_targets
])
unmatched_ignored_reg_targets
)
# TODO: summarize the number of matches on average.
return
reg_targets
return
reg_targets
def
_default_regression_target
(
self
):
def
_default_regression_target
(
self
):
...
@@ -245,27 +260,16 @@ class TargetAssigner(object):
...
@@ -245,27 +260,16 @@ class TargetAssigner(object):
and groundtruth boxes.
and groundtruth boxes.
Returns:
Returns:
cls_targets:
a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the
where the
subshape [d_1, ..., d_k] is compatible with groundtruth_labels
subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has
which has
shape [num_gt_boxes, d_1, d_2, ... d_k].
shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
"""
matched_anchor_indices
=
match
.
matched_column_indices
()
return
match
.
gather_based_on_match
(
unmatched_ignored_anchor_indices
=
(
match
.
groundtruth_labels
,
unmatched_or_ignored_column_indices
())
unmatched_value
=
self
.
_unmatched_cls_target
,
matched_gt_indices
=
match
.
matched_row_indices
()
ignored_value
=
self
.
_unmatched_cls_target
)
matched_cls_targets
=
tf
.
gather
(
groundtruth_labels
,
matched_gt_indices
)
def
_create_regression_weights
(
self
,
match
,
groundtruth_weights
):
ones
=
self
.
_unmatched_cls_target
.
shape
.
ndims
*
[
1
]
unmatched_ignored_cls_targets
=
tf
.
tile
(
tf
.
expand_dims
(
self
.
_unmatched_cls_target
,
0
),
tf
.
stack
([
tf
.
size
(
unmatched_ignored_anchor_indices
)]
+
ones
))
cls_targets
=
tf
.
dynamic_stitch
(
[
matched_anchor_indices
,
unmatched_ignored_anchor_indices
],
[
matched_cls_targets
,
unmatched_ignored_cls_targets
])
return
cls_targets
def
_create_regression_weights
(
self
,
match
):
"""Set regression weight for each anchor.
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
Only positive anchors are set to contribute to the regression loss, so this
...
@@ -275,18 +279,18 @@ class TargetAssigner(object):
...
@@ -275,18 +279,18 @@ class TargetAssigner(object):
Args:
Args:
match: a matcher.Match object that provides a matching between anchors
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
and groundtruth boxes.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
Returns:
reg_weights: a float32 tensor with shape [num_anchors] representing
a float32 tensor with shape [num_anchors] representing regression weights.
regression weights
"""
"""
re
g_weights
=
tf
.
cast
(
match
.
m
at
c
he
d_column_indicator
(),
tf
.
float32
)
re
turn
match
.
g
athe
r_based_on_match
(
return
reg_weights
groundtruth_weights
,
ignored_value
=
0.
,
unmatched_value
=
0.
)
def
_create_classification_weights
(
self
,
def
_create_classification_weights
(
self
,
match
,
match
,
positive_class_weight
=
1.0
,
groundtruth_weights
):
negative_class_weight
=
1.0
):
"""Create classification weights for each anchor.
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
Positive (matched) anchors are associated with a weight of
...
@@ -299,25 +303,23 @@ class TargetAssigner(object):
...
@@ -299,25 +303,23 @@ class TargetAssigner(object):
Args:
Args:
match: a matcher.Match object that provides a matching between anchors
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
and groundtruth boxes.
positive_class
_weight:
weight to be associated to positive anchors
groundtruth
_weight
s
:
a float tensor of shape [M] indicating the weight to
negative_class_weight: weight to be associated to negative anchors
assign to all anchors match to a particular groundtruth box.
Returns:
Returns:
cls_weights:
a float32 tensor with shape [num_anchors] representing
a float32 tensor with shape [num_anchors] representing
classification
classification
weights.
weights.
"""
"""
matched_indicator
=
tf
.
cast
(
match
.
matched_column_indicator
(),
tf
.
float32
)
return
match
.
gather_based_on_match
(
ignore_indicator
=
tf
.
cast
(
match
.
ignored_column_indicator
(),
tf
.
float32
)
groundtruth_weights
,
unmatched_indicator
=
1.0
-
matched_indicator
-
ignore_indicator
ignored_value
=
0.
,
cls_weights
=
(
positive_class_weight
*
matched_indicator
unmatched_value
=
self
.
_negative_class_weight
)
+
negative_class_weight
*
unmatched_indicator
)
return
cls_weights
def
get_box_coder
(
self
):
def
get_box_coder
(
self
):
"""Get BoxCoder of this TargetAssigner.
"""Get BoxCoder of this TargetAssigner.
Returns:
Returns:
BoxCoder:
BoxCoder object.
BoxCoder object.
"""
"""
return
self
.
_box_coder
return
self
.
_box_coder
...
@@ -325,7 +327,6 @@ class TargetAssigner(object):
...
@@ -325,7 +327,6 @@ class TargetAssigner(object):
# TODO: This method pulls in all the implementation dependencies into
# TODO: This method pulls in all the implementation dependencies into
# core. Therefore its best to have this factory method outside of core.
# core. Therefore its best to have this factory method outside of core.
def
create_target_assigner
(
reference
,
stage
=
None
,
def
create_target_assigner
(
reference
,
stage
=
None
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
unmatched_cls_target
=
None
):
"""Factory function for creating standard target assigners.
"""Factory function for creating standard target assigners.
...
@@ -333,8 +334,6 @@ def create_target_assigner(reference, stage=None,
...
@@ -333,8 +334,6 @@ def create_target_assigner(reference, stage=None,
Args:
Args:
reference: string referencing the type of TargetAssigner.
reference: string referencing the type of TargetAssigner.
stage: string denoting stage: {proposal, detection}.
stage: string denoting stage: {proposal, detection}.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
...
@@ -383,7 +382,6 @@ def create_target_assigner(reference, stage=None,
...
@@ -383,7 +382,6 @@ def create_target_assigner(reference, stage=None,
raise
ValueError
(
'No valid combination of reference and stage.'
)
raise
ValueError
(
'No valid combination of reference and stage.'
)
return
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
return
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
positive_class_weight
,
negative_class_weight
=
negative_class_weight
,
negative_class_weight
=
negative_class_weight
,
unmatched_cls_target
=
unmatched_cls_target
)
unmatched_cls_target
=
unmatched_cls_target
)
...
@@ -391,7 +389,8 @@ def create_target_assigner(reference, stage=None,
...
@@ -391,7 +389,8 @@ def create_target_assigner(reference, stage=None,
def
batch_assign_targets
(
target_assigner
,
def
batch_assign_targets
(
target_assigner
,
anchors_batch
,
anchors_batch
,
gt_box_batch
,
gt_box_batch
,
gt_class_targets_batch
):
gt_class_targets_batch
,
gt_weights_batch
=
None
):
"""Batched assignment of classification and regression targets.
"""Batched assignment of classification and regression targets.
Args:
Args:
...
@@ -404,6 +403,8 @@ def batch_assign_targets(target_assigner,
...
@@ -404,6 +403,8 @@ def batch_assign_targets(target_assigner,
each tensor has shape [num_gt_boxes_i, classification_target_size] and
each tensor has shape [num_gt_boxes_i, classification_target_size] and
num_gt_boxes_i is the number of boxes in the ith boxlist of
num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch.
gt_box_batch.
gt_weights_batch: A list of 1-D tf.float32 tensors of shape
[num_boxes] containing weights for groundtruth boxes.
Returns:
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
...
@@ -437,11 +438,13 @@ def batch_assign_targets(target_assigner,
...
@@ -437,11 +438,13 @@ def batch_assign_targets(target_assigner,
reg_targets_list
=
[]
reg_targets_list
=
[]
reg_weights_list
=
[]
reg_weights_list
=
[]
match_list
=
[]
match_list
=
[]
for
anchors
,
gt_boxes
,
gt_class_targets
in
zip
(
if
gt_weights_batch
is
None
:
anchors_batch
,
gt_box_batch
,
gt_class_targets_batch
):
gt_weights_batch
=
[
None
]
*
len
(
gt_class_targets_batch
)
for
anchors
,
gt_boxes
,
gt_class_targets
,
gt_weights
in
zip
(
anchors_batch
,
gt_box_batch
,
gt_class_targets_batch
,
gt_weights_batch
):
(
cls_targets
,
cls_weights
,
reg_targets
,
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
target_assigner
.
assign
(
reg_weights
,
match
)
=
target_assigner
.
assign
(
anchors
,
gt_boxes
,
gt_class_targets
)
anchors
,
gt_boxes
,
gt_class_targets
,
gt_weights
)
cls_targets_list
.
append
(
cls_targets
)
cls_targets_list
.
append
(
cls_targets
)
cls_weights_list
.
append
(
cls_weights
)
cls_weights_list
.
append
(
cls_weights
)
reg_targets_list
.
append
(
reg_targets
)
reg_targets_list
.
append
(
reg_targets
)
...
...
research/object_detection/core/target_assigner_test.py
View file @
fd7b6887
...
@@ -17,135 +17,238 @@
...
@@ -17,135 +17,238 @@
import
numpy
as
np
import
numpy
as
np
import
tensorflow
as
tf
import
tensorflow
as
tf
from
object_detection.box_coders
import
keypoint_box_coder
from
object_detection.box_coders
import
mean_stddev_box_coder
from
object_detection.box_coders
import
mean_stddev_box_coder
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list
from
object_detection.core
import
region_similarity_calculator
from
object_detection.core
import
region_similarity_calculator
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
target_assigner
as
targetassigner
from
object_detection.core
import
target_assigner
as
targetassigner
from
object_detection.matchers
import
argmax_matcher
from
object_detection.matchers
import
argmax_matcher
from
object_detection.matchers
import
bipartite_matcher
from
object_detection.matchers
import
bipartite_matcher
from
object_detection.utils
import
test_case
class
TargetAssignerTest
(
t
f
.
t
est
.
TestCase
):
class
TargetAssignerTest
(
test
_case
.
TestCase
):
def
test_assign_agnostic
(
self
):
def
test_assign_agnostic
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
):
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
target_assigner
=
targetassigner
.
TargetAssigner
(
unmatched_threshold
=
0.5
)
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
target_assigner
=
targetassigner
.
TargetAssigner
(
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
[
0.5
,
0.5
,
1.0
,
0.8
],
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
[
0
,
0.5
,
.
5
,
1.0
]])
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
prior_stddevs
=
tf
.
constant
(
3
*
[
4
*
[.
1
]])
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
priors
=
box_list
.
BoxList
(
prior_means
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
3
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
1
]
exp_cls_weights
=
[
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
0
,
0
]]
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
exp_reg_weights
=
[
1
,
1
,
0
]
exp_matching_anchors
=
[
0
,
1
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
result
=
target_assigner
.
assign
(
priors
,
boxes
,
num_valid_rows
=
2
)
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
groundtruth_box_corners
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
with
self
.
test_session
()
as
sess
:
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
(
cls_targets_out
,
cls_weights_out
,
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
match
.
matched_column_indices
()])
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
def
test_assign_class_agnostic_with_ignored_matches
(
self
):
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_with_ignored_matches
(
self
):
# Note: test is very similar to above. The third box matched with an IOU
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
# classification weights are [1, 1, 0].
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
):
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
unmatched_threshold
=
0.3
)
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_threshold
=
0.3
)
target_assigner
=
targetassigner
.
TargetAssigner
(
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
similarity_calc
,
matcher
,
box_coder
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
[
0.5
,
0.5
,
1.0
,
0.8
],
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
[
0.0
,
0.5
,
.
9
,
1.0
]])
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
prior_stddevs
=
tf
.
constant
(
3
*
[
4
*
[.
1
]])
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
)
priors
=
box_list
.
BoxList
(
prior_means
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]]
[
0.5
,
0.5
,
1.0
,
0.8
],
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
[
0.0
,
0.5
,
.
9
,
1.0
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
3
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
0
]
exp_cls_weights
=
[
1
,
1
,
0
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
0
,
0
]]
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
exp_reg_weights
=
[
1
,
1
,
0
]
exp_matching_anchors
=
[
0
,
1
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
result
=
target_assigner
.
assign
(
priors
,
boxes
)
groundtruth_box_corners
])
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
with
self
.
test_session
()
as
sess
:
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
(
cls_targets_out
,
cls_weights_out
,
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
match
.
matched_column_indices
()])
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
def
test_assign_agnostic_with_keypoints
(
self
):
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
def
graph_fn
(
anchor_means
,
groundtruth_box_corners
,
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
groundtruth_keypoints
):
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
unmatched_threshold
=
0.5
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
box_coder
=
keypoint_box_coder
.
KeypointBoxCoder
(
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
num_keypoints
=
6
,
scale_factors
=
[
10.0
,
10.0
,
5.0
,
5.0
])
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
groundtruth_boxlist
.
add_field
(
fields
.
BoxListFields
.
keypoints
,
groundtruth_keypoints
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
1.0
],
[
0.0
,
0.5
,
.
9
,
1.0
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.45
,
0.45
,
0.95
,
0.95
]],
dtype
=
np
.
float32
)
groundtruth_keypoints
=
np
.
array
(
[[[
0.1
,
0.2
],
[
0.1
,
0.3
],
[
0.2
,
0.2
],
[
0.2
,
0.2
],
[
0.1
,
0.1
],
[
0.9
,
0
]],
[[
0
,
0.3
],
[
0.2
,
0.4
],
[
0.5
,
0.6
],
[
0
,
0.6
],
[
0.8
,
0.2
],
[
0.2
,
0.4
]]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
,
-
3
,
-
1
,
-
3
,
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
3
,
-
3
,
13
,
-
5
],
[
-
1
,
-
1
,
0
,
0
,
-
15
,
-
9
,
-
11
,
-
7
,
-
5
,
-
3
,
-
15
,
-
3
,
1
,
-
11
,
-
11
,
-
7
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
groundtruth_box_corners
,
groundtruth_keypoints
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_assign_class_agnostic_with_keypoints_and_ignored_matches
(
self
):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
def
graph_fn
(
anchor_means
,
groundtruth_box_corners
,
groundtruth_keypoints
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
keypoint_box_coder
.
KeypointBoxCoder
(
num_keypoints
=
6
,
scale_factors
=
[
10.0
,
10.0
,
5.0
,
5.0
])
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
groundtruth_boxlist
.
add_field
(
fields
.
BoxListFields
.
keypoints
,
groundtruth_keypoints
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
1.0
],
[
0.0
,
0.5
,
.
9
,
1.0
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.45
,
0.45
,
0.95
,
0.95
]],
dtype
=
np
.
float32
)
groundtruth_keypoints
=
np
.
array
(
[[[
0.1
,
0.2
],
[
0.1
,
0.3
],
[
0.2
,
0.2
],
[
0.2
,
0.2
],
[
0.1
,
0.1
],
[
0.9
,
0
]],
[[
0
,
0.3
],
[
0.2
,
0.4
],
[
0.5
,
0.6
],
[
0
,
0.6
],
[
0.8
,
0.2
],
[
0.2
,
0.4
]]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
,
-
3
,
-
1
,
-
3
,
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
3
,
-
3
,
13
,
-
5
],
[
-
1
,
-
1
,
0
,
0
,
-
15
,
-
9
,
-
11
,
-
7
,
-
5
,
-
3
,
-
15
,
-
3
,
1
,
-
11
,
-
11
,
-
7
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
groundtruth_box_corners
,
groundtruth_keypoints
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
def
test_assign_multiclass
(
self
):
def
test_assign_multiclass
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
groundtruth_labels
):
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
target_assigner
=
targetassigner
.
TargetAssigner
(
unmatched_threshold
=
0.5
)
similarity_calc
,
matcher
,
box_coder
,
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
unmatched_cls_target
)
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
similarity_calc
,
matcher
,
box_coder
,
[
0.5
,
0.5
,
1.0
,
0.8
],
unmatched_cls_target
=
unmatched_cls_target
)
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
priors
=
box_list
.
BoxList
(
prior_means
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
,
groundtruth_labels
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
[
0.5
,
0.5
,
0.9
,
0.9
],
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0.5
,
.
5
,
1.0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[.
75
,
0
,
1.0
,
.
25
]],
dtype
=
np
.
float32
)
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
anchor_stddevs
=
np
.
array
(
4
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]],
dtype
=
np
.
float32
)
groundtruth_labels
=
np
.
array
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
exp_cls_targets
=
[[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
...
@@ -157,88 +260,98 @@ class TargetAssignerTest(tf.test.TestCase):
...
@@ -157,88 +260,98 @@ class TargetAssignerTest(tf.test.TestCase):
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
.
5
,
.
2
]]
[
0
,
0
,
-
.
5
,
.
2
]]
exp_reg_weights
=
[
1
,
1
,
0
,
1
]
exp_reg_weights
=
[
1
,
1
,
0
,
1
]
exp_matching_anchors
=
[
0
,
1
,
3
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_multiclass_unequal_class_weights
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
0.5
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
[
0.5
,
0.5
,
1.0
,
0.8
],
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
[
0
,
0.5
,
.
5
,
1.0
],
groundtruth_box_corners
,
[.
75
,
0
,
1.0
,
.
25
]])
groundtruth_labels
])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
priors
=
box_list
.
BoxList
(
prior_means
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
[
0.5
,
0.5
,
0.9
,
0.9
],
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
[.
75
,
0
,
.
95
,
.
27
]]
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
def
test_assign_multiclass_with_groundtruth_weights
(
self
):
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
groundtruth_labels
,
groundtruth_weights
):
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
exp_cls_weights
=
[
1
,
1
,
.
5
,
1
]
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
unmatched_threshold
=
0.5
)
num_valid_rows
=
3
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
(
_
,
cls_weights
,
_
,
_
,
_
)
=
result
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
with
self
.
test_session
()
as
sess
:
target_assigner
=
targetassigner
.
TargetAssigner
(
cls_weights_out
=
sess
.
run
(
cls_weights
)
similarity_calc
,
matcher
,
box_coder
,
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
unmatched_cls_target
=
unmatched_cls_target
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
,
groundtruth_labels
,
groundtruth_weights
)
(
_
,
cls_weights
,
_
,
reg_weights
,
_
)
=
result
return
(
cls_weights
,
reg_weights
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
4
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]],
dtype
=
np
.
float32
)
groundtruth_labels
=
np
.
array
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
groundtruth_weights
=
np
.
array
([
0.3
,
0.
,
0.5
],
dtype
=
np
.
float32
)
exp_cls_weights
=
[
0.3
,
0.
,
1
,
0.5
]
# background class gets weight of 1.
exp_reg_weights
=
[
0.3
,
0.
,
0.
,
0.5
]
# background class gets weight of 0.
(
cls_weights_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
groundtruth_labels
,
groundtruth_weights
])
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
def
test_assign_multidimensional_class_targets
(
self
):
def
test_assign_multidimensional_class_targets
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
groundtruth_labels
):
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
unmatched_cls_target
=
tf
.
constant
([[
0
,
0
],
[
0
,
0
]],
tf
.
float32
)
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
target_assigner
=
targetassigner
.
TargetAssigner
(
unmatched_threshold
=
0.5
)
similarity_calc
,
matcher
,
box_coder
,
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
unmatched_cls_target
)
unmatched_cls_target
=
tf
.
constant
([[
0
,
0
],
[
0
,
0
]],
tf
.
float32
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
target_assigner
=
targetassigner
.
TargetAssigner
(
[
0.5
,
0.5
,
1.0
,
0.8
],
similarity_calc
,
matcher
,
box_coder
,
[
0
,
0.5
,
.
5
,
1.0
],
unmatched_cls_target
=
unmatched_cls_target
)
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
priors
=
box_list
.
BoxList
(
prior_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
,
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
groundtruth_labels
)
[
0.5
,
0.5
,
0.9
,
0.9
],
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
[.
75
,
0
,
.
95
,
.
27
]]
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
groundtruth_labels
=
tf
.
constant
([[[
0
,
1
],
[
1
,
0
]],
[
0.5
,
0.5
,
1.0
,
0.8
],
[[
1
,
0
],
[
0
,
1
]],
[
0
,
0.5
,
.
5
,
1.0
],
[[
0
,
1
],
[
1
,
.
5
]]],
tf
.
float32
)
[.
75
,
0
,
1.0
,
.
25
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
4
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
groundtruth_box_corners
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]],
dtype
=
np
.
float32
)
groundtruth_labels
=
np
.
array
([[[
0
,
1
],
[
1
,
0
]],
[[
1
,
0
],
[
0
,
1
]],
[[
0
,
1
],
[
1
,
.
5
]]],
np
.
float32
)
exp_cls_targets
=
[[[
0
,
1
],
[
1
,
0
]],
exp_cls_targets
=
[[[
0
,
1
],
[
1
,
0
]],
[[
1
,
0
],
[
0
,
1
]],
[[
1
,
0
],
[
0
,
1
]],
...
@@ -250,52 +363,46 @@ class TargetAssignerTest(tf.test.TestCase):
...
@@ -250,52 +363,46 @@ class TargetAssignerTest(tf.test.TestCase):
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
.
5
,
.
2
]]
[
0
,
0
,
-
.
5
,
.
2
]]
exp_reg_weights
=
[
1
,
1
,
0
,
1
]
exp_reg_weights
=
[
1
,
1
,
0
,
1
]
exp_matching_anchors
=
[
0
,
1
,
3
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
groundtruth_box_corners
,
num_valid_rows
=
3
)
groundtruth_labels
])
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
with
self
.
test_session
()
as
sess
:
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
(
cls_targets_out
,
cls_weights_out
,
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
match
.
matched_column_indices
()])
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_empty_groundtruth
(
self
):
def
test_assign_empty_groundtruth
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
groundtruth_labels
):
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
unmatched_cls_target
=
tf
.
constant
([
0
,
0
,
0
],
tf
.
float32
)
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
target_assigner
=
targetassigner
.
TargetAssigner
(
unmatched_threshold
=
0.5
)
similarity_calc
,
matcher
,
box_coder
,
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
unmatched_cls_target
)
unmatched_cls_target
=
tf
.
constant
([
0
,
0
,
0
],
tf
.
float32
)
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
[
0.5
,
0.5
,
1.0
,
0.8
],
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
[
0
,
0.5
,
.
5
,
1.0
],
target_assigner
=
targetassigner
.
TargetAssigner
(
[.
75
,
0
,
1.0
,
.
25
]])
similarity_calc
,
matcher
,
box_coder
,
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
unmatched_cls_target
=
unmatched_cls_target
)
priors
=
box_list
.
BoxList
(
prior_means
)
result
=
target_assigner
.
assign
(
anchors_boxlist
,
groundtruth_boxlist
,
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
groundtruth_labels
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
box_corners_expanded
=
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
]])
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
box_corners
=
tf
.
slice
(
box_corners_expanded
,
[
0
,
0
],
[
0
,
4
])
boxes
=
box_list
.
BoxList
(
box_corners
)
groundtruth_box_corners
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
groundtruth_labels
=
np
.
zeros
((
0
,
3
),
dtype
=
np
.
float32
)
groundtruth_labels_expanded
=
tf
.
constant
([[
0
,
0
,
0
]],
tf
.
float32
)
anchor_means
=
np
.
array
([[
0.0
,
0.0
,
0.5
,
0.5
],
groundtruth_labels
=
tf
.
slice
(
groundtruth_labels_expanded
,
[
0
,
0
],
[
0
,
3
])
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
(
4
*
[
4
*
[.
1
]],
dtype
=
np
.
float32
)
exp_cls_targets
=
[[
0
,
0
,
0
],
exp_cls_targets
=
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
...
@@ -306,26 +413,18 @@ class TargetAssignerTest(tf.test.TestCase):
...
@@ -306,26 +413,18 @@ class TargetAssignerTest(tf.test.TestCase):
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
0
,
0
,
0
,
0
]
exp_reg_weights
=
[
0
,
0
,
0
,
0
]
exp_matching_anchors
=
[]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
)
groundtruth_box_corners
,
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
groundtruth_labels
])
with
self
.
test_session
()
as
sess
:
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
(
cls_targets_out
,
cls_weights_out
,
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
match
.
matched_column_indices
()])
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_raises_error_on_incompatible_groundtruth_boxes_and_labels
(
self
):
def
test_raises_error_on_incompatible_groundtruth_boxes_and_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
...
@@ -353,14 +452,9 @@ class TargetAssignerTest(tf.test.TestCase):
...
@@ -353,14 +452,9 @@ class TargetAssignerTest(tf.test.TestCase):
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
with
self
.
assertRaisesRegexp
(
ValueError
,
'Unequal shapes'
):
num_valid_rows
=
3
)
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
num_valid_rows
=
3
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesWithPredicateMatch
(
tf
.
errors
.
InvalidArgumentError
,
'Groundtruth boxes and labels have incompatible shapes!'
):
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
])
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
...
@@ -380,7 +474,6 @@ class TargetAssignerTest(tf.test.TestCase):
...
@@ -380,7 +474,6 @@ class TargetAssignerTest(tf.test.TestCase):
[
0.5
,
0.5
,
0.9
,
0.9
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[[
0
,
1
],
[
1
,
0
]]],
tf
.
float32
)
groundtruth_labels
=
tf
.
constant
([[[
0
,
1
],
[
1
,
0
]]],
tf
.
float32
)
with
self
.
assertRaises
(
ValueError
):
with
self
.
assertRaises
(
ValueError
):
...
@@ -388,61 +481,66 @@ class TargetAssignerTest(tf.test.TestCase):
...
@@ -388,61 +481,66 @@ class TargetAssignerTest(tf.test.TestCase):
num_valid_rows
=
3
)
num_valid_rows
=
3
)
class
BatchTargetAssignerTest
(
t
f
.
t
est
.
TestCase
):
class
BatchTargetAssignerTest
(
test
_case
.
TestCase
):
def
_get_agnostic_target_assigner
(
self
):
def
_get_agnostic_target_assigner
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
return
targetassigner
.
TargetAssigner
(
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
)
unmatched_cls_target
=
None
)
def
_get_multi_class_target_assigner
(
self
,
num_classes
):
def
_get_multi_class_target_assigner
(
self
,
num_classes
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
]
+
num_classes
*
[
0
],
tf
.
float32
)
unmatched_cls_target
=
tf
.
constant
([
1
]
+
num_classes
*
[
0
],
tf
.
float32
)
return
targetassigner
.
TargetAssigner
(
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
unmatched_cls_target
)
unmatched_cls_target
=
unmatched_cls_target
)
def
_get_multi_dimensional_target_assigner
(
self
,
target_dimensions
):
def
_get_multi_dimensional_target_assigner
(
self
,
target_dimensions
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.5
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
(
np
.
zeros
(
target_dimensions
),
unmatched_cls_target
=
tf
.
constant
(
np
.
zeros
(
target_dimensions
),
tf
.
float32
)
tf
.
float32
)
return
targetassigner
.
TargetAssigner
(
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
unmatched_cls_target
)
unmatched_cls_target
=
unmatched_cls_target
)
def
test_batch_assign_targets
(
self
):
def
test_batch_assign_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
groundtruth_boxlist2
):
[[
0
,
0.25123152
,
1
,
1
],
box_list1
=
box_list
.
BoxList
(
groundtruth_boxlist1
)
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
box_list2
=
box_list
.
BoxList
(
groundtruth_boxlist2
)
))
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
None
,
None
]
gt_box_batch
=
[
box_list1
,
box_list2
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
gt_class_targets
=
[
None
,
None
]
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
agnostic_target_assigner
=
self
.
_get_agnostic_target_assigner
()
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
[
0
,
.
25
,
1
,
1
],
_
)
=
targetassigner
.
batch_assign_targets
(
[
0
,
.
1
,
.
5
,
.
5
],
agnostic_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
[.
75
,
.
75
,
1
,
1
]])
gt_class_targets
)
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
groundtruth_boxlist1
=
np
.
array
([[
0.
,
0.
,
0.2
,
0.2
]],
dtype
=
np
.
float32
)
[.
1
,
.
1
,
.
1
,
.
1
]])
groundtruth_boxlist2
=
np
.
array
([[
0
,
0.25123152
,
1
,
1
],
priors
=
box_list
.
BoxList
(
prior_means
)
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]],
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
...
@@ -458,58 +556,128 @@ class BatchTargetAssignerTest(tf.test.TestCase):
...
@@ -458,58 +556,128 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[[
0
],
[
1
],
[
1
],
[
0
]]]
[[
0
],
[
1
],
[
1
],
[
0
]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
agnostic_target_assigner
=
self
.
_get_agnostic_target_assigner
()
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
agnostic_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
,
match_out_1
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
def
test_batch_assign_multiclass_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
[[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
))
gt_box_batch
=
[
box_list1
,
box_list2
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
class_targets1
=
tf
.
constant
([[
0
,
1
,
0
,
0
]],
tf
.
float32
)
def
test_batch_assign_multiclass_targets
(
self
):
class_targets2
=
tf
.
constant
([[
0
,
0
,
0
,
1
],
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
[
0
,
0
,
1
,
0
]],
tf
.
float32
)
groundtruth_boxlist2
,
class_targets1
,
class_targets2
):
box_list1
=
box_list
.
BoxList
(
groundtruth_boxlist1
)
box_list2
=
box_list
.
BoxList
(
groundtruth_boxlist2
)
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
class_targets1
,
class_targets2
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
gt_class_targets
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_boxlist1
=
np
.
array
([[
0.
,
0.
,
0.2
,
0.2
]],
dtype
=
np
.
float32
)
groundtruth_boxlist2
=
np
.
array
([[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]],
dtype
=
np
.
float32
)
class_targets1
=
np
.
array
([[
0
,
1
,
0
,
0
]],
dtype
=
np
.
float32
)
class_targets2
=
np
.
array
([[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
]],
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
gt_class_targets
=
[
class_targets1
,
class_targets2
]
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,],
[
0
,
0
,
0
,
0
,],],
[[
0
,
0
,
0
,
0
,],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]]
exp_cls_targets
=
[[[
0
,
1
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]],
[[
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
],
[
1
,
0
,
0
,
0
]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
[
0
,
.
25
,
1
,
1
],
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
[
0
,
.
1
,
.
5
,
.
5
],
groundtruth_boxlist1
,
[.
75
,
.
75
,
1
,
1
]])
groundtruth_boxlist2
,
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
class_targets1
,
[.
1
,
.
1
,
.
1
,
.
1
],
class_targets2
])
[.
1
,
.
1
,
.
1
,
.
1
],
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
[.
1
,
.
1
,
.
1
,
.
1
]])
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
priors
=
box_list
.
BoxList
(
prior_means
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
def
test_batch_assign_multiclass_targets_with_padded_groundtruth
(
self
):
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
,
class_targets1
,
class_targets2
,
groundtruth_weights1
,
groundtruth_weights2
):
box_list1
=
box_list
.
BoxList
(
groundtruth_boxlist1
)
box_list2
=
box_list
.
BoxList
(
groundtruth_boxlist2
)
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
class_targets1
,
class_targets2
]
gt_weights
=
[
groundtruth_weights1
,
groundtruth_weights2
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
gt_class_targets
,
gt_weights
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_boxlist1
=
np
.
array
([[
0.
,
0.
,
0.2
,
0.2
],
[
0.
,
0.
,
0.
,
0.
]],
dtype
=
np
.
float32
)
groundtruth_weights1
=
np
.
array
([
1
,
0
],
dtype
=
np
.
float32
)
groundtruth_boxlist2
=
np
.
array
([[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
],
[
0
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
groundtruth_weights2
=
np
.
array
([
1
,
1
,
0
],
dtype
=
np
.
float32
)
class_targets1
=
np
.
array
([[
0
,
1
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
class_targets2
=
np
.
array
([[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
0
]],
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
],
[
0
,
0
,
0
,
0
]
],
[
0
,
0
,
0
,
0
,],
],
[[
0
,
0
,
0
,
0
],
[[
0
,
0
,
0
,
0
,
],
[
0
,
0.01231521
,
0
,
0
],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
[
0
,
0
,
0
,
0
]]]
...
@@ -525,68 +693,70 @@ class BatchTargetAssignerTest(tf.test.TestCase):
...
@@ -525,68 +693,70 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[
1
,
0
,
0
,
0
]]]
[
1
,
0
,
0
,
0
]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
groundtruth_boxlist1
,
num_classes
=
3
)
groundtruth_boxlist2
,
class_targets1
,
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
class_targets2
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
groundtruth_weights1
,
multiclass_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
groundtruth_weights2
])
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
with
self
.
test_session
()
as
sess
:
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
match_out_0
,
match_out_1
)
=
sess
.
run
([
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
def
test_batch_assign_multidimensional_targets
(
self
):
def
test_batch_assign_multidimensional_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_boxlist1
,
groundtruth_boxlist2
,
class_targets1
,
class_targets2
):
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
box_list1
=
box_list
.
BoxList
(
groundtruth_boxlist1
)
[[
0
,
0.25123152
,
1
,
1
],
box_list2
=
box_list
.
BoxList
(
groundtruth_boxlist2
)
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
gt_box_batch
=
[
box_list1
,
box_list2
]
))
gt_class_targets
=
[
class_targets1
,
class_targets2
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
gt_box_batch
=
[
box_list1
,
box_list2
]
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
class_targets1
=
tf
.
constant
([[[
0
,
1
,
1
],
multiclass_target_assigner
=
self
.
_get_multi_dimensional_target_assigner
(
[
1
,
1
,
0
]]],
tf
.
float32
)
target_dimensions
=
(
2
,
3
))
class_targets2
=
tf
.
constant
([[[
0
,
1
,
1
],
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
[
1
,
1
,
0
]],
_
)
=
targetassigner
.
batch_assign_targets
(
[[
0
,
0
,
1
],
multiclass_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
[
0
,
0
,
1
]]],
tf
.
float32
)
gt_class_targets
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
gt_class_targets
=
[
class_targets1
,
class_targets2
]
groundtruth_boxlist1
=
np
.
array
([[
0.
,
0.
,
0.2
,
0.2
]],
dtype
=
np
.
float32
)
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
groundtruth_boxlist2
=
np
.
array
([[
0
,
0.25123152
,
1
,
1
],
[
0
,
.
25
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]],
[
0
,
.
1
,
.
5
,
.
5
],
dtype
=
np
.
float32
)
[.
75
,
.
75
,
1
,
1
]])
class_targets1
=
np
.
array
([[
0
,
1
,
0
,
0
]],
dtype
=
np
.
float32
)
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
class_targets2
=
np
.
array
([[
0
,
0
,
0
,
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[
0
,
0
,
1
,
0
]],
dtype
=
np
.
float32
)
[.
1
,
.
1
,
.
1
,
.
1
],
class_targets1
=
np
.
array
([[[
0
,
1
,
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
[
1
,
1
,
0
]]],
dtype
=
np
.
float32
)
priors
=
box_list
.
BoxList
(
prior_means
)
class_targets2
=
np
.
array
([[[
0
,
1
,
1
],
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
[
1
,
1
,
0
]],
[[
0
,
0
,
1
],
[
0
,
0
,
1
]]],
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
],
[
0
,
0
,
0
,
0
]
],
[
0
,
0
,
0
,
0
,],
],
[[
0
,
0
,
0
,
0
],
[[
0
,
0
,
0
,
0
,
],
[
0
,
0.01231521
,
0
,
0
],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
,
1
,
1
],
exp_cls_weights
=
[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]]
[
1
,
1
,
1
,
1
]]
exp_cls_targets
=
[[[[
0.
,
1.
,
1.
],
exp_cls_targets
=
[[[[
0.
,
1.
,
1.
],
[
1.
,
1.
,
0.
]],
[
1.
,
1.
,
0.
]],
[[
0.
,
0.
,
0.
],
[[
0.
,
0.
,
0.
],
...
@@ -605,72 +775,60 @@ class BatchTargetAssignerTest(tf.test.TestCase):
...
@@ -605,72 +775,60 @@ class BatchTargetAssignerTest(tf.test.TestCase):
[
0.
,
0.
,
0.
]]]]
[
0.
,
0.
,
0.
]]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
)
=
self
.
execute
(
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
multiclass_target_assigner
=
self
.
_get_multi_dimensional_target_assigner
(
groundtruth_boxlist1
,
target_dimensions
=
(
2
,
3
))
groundtruth_boxlist2
,
class_targets1
,
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
class_targets2
])
match_list
)
=
targetassigner
.
batch_assign_targets
(
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
multiclass_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
with
self
.
test_session
()
as
sess
:
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
,
match_out_1
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
def
test_batch_assign_empty_groundtruth
(
self
):
def
test_batch_assign_empty_groundtruth
(
self
):
box_coords_expanded
=
tf
.
zeros
((
1
,
4
),
tf
.
float32
)
box_coords
=
tf
.
slice
(
box_coords_expanded
,
[
0
,
0
],
[
0
,
4
])
box_list1
=
box_list
.
BoxList
(
box_coords
)
gt_box_batch
=
[
box_list1
]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
def
graph_fn
(
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
gt_class_targets
):
groundtruth_boxlist
=
box_list
.
BoxList
(
groundtruth_box_corners
)
gt_box_batch
=
[
groundtruth_boxlist
]
gt_class_targets_batch
=
[
gt_class_targets
]
anchors_boxlist
=
box_list
.
BoxList
(
anchor_means
)
anchors_boxlist
.
add_field
(
'stddev'
,
anchor_stddevs
)
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
anchors_boxlist
,
gt_box_batch
,
gt_class_targets_batch
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
)
groundtruth_box_corners
=
np
.
zeros
((
0
,
4
),
dtype
=
np
.
float32
)
anchor_means
=
np
.
array
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
]],
dtype
=
np
.
float32
)
anchor_stddevs
=
np
.
array
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]],
dtype
=
np
.
float32
)
exp_reg_targets
=
[[[
0
,
0
,
0
,
0
],
exp_reg_targets
=
[[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]]
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
]]
exp_cls_weights
=
[[
1
,
1
]]
exp_cls_targets
=
[[[
1
,
0
,
0
,
0
],
exp_cls_targets
=
[[[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]]]
[
1
,
0
,
0
,
0
]]]
exp_reg_weights
=
[[
0
,
0
]]
exp_reg_weights
=
[[
0
,
0
]]
exp_match_0
=
[]
num_classes
=
3
num_classes
=
3
pad
=
1
pad
=
1
gt_class_targets
=
tf
.
zeros
((
0
,
num_classes
+
pad
))
gt_class_targets
=
np
.
zeros
((
0
,
num_classes
+
pad
),
dtype
=
np
.
float32
)
gt_class_targets_batch
=
[
gt_class_targets
]
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
reg_weights_out
)
=
self
.
execute
(
num_classes
=
3
)
graph_fn
,
[
anchor_means
,
anchor_stddevs
,
groundtruth_box_corners
,
gt_class_targets
])
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
match_list
)
=
targetassigner
.
batch_assign_targets
(
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
multiclass_target_assigner
,
priors
,
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
gt_box_batch
,
gt_class_targets_batch
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
1
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
class
CreateTargetAssignerTest
(
tf
.
test
.
TestCase
):
class
CreateTargetAssignerTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/data/BUILD
0 → 100644
View file @
fd7b6887
package
(
default_visibility
=
[
"//visibility:public"
],
)
licenses
([
"notice"
])
exports_files
([
"pet_label_map.pbtxt"
,
])
research/object_detection/data_decoders/BUILD
View file @
fd7b6887
...
@@ -12,9 +12,10 @@ py_library(
...
@@ -12,9 +12,10 @@ py_library(
srcs
=
[
"tf_example_decoder.py"
],
srcs
=
[
"tf_example_decoder.py"
],
deps
=
[
deps
=
[
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/core:data_decoder"
,
"//tensorflow/models/research/object_detection/core:data_decoder"
,
"//tensorflow_models/object_detection/core:standard_fields"
,
"//tensorflow/models/research/object_detection/core:standard_fields"
,
"//tensorflow_models/object_detection/utils:label_map_util"
,
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2"
,
"//tensorflow/models/research/object_detection/utils:label_map_util"
,
],
],
)
)
...
@@ -24,6 +25,7 @@ py_test(
...
@@ -24,6 +25,7 @@ py_test(
deps
=
[
deps
=
[
":tf_example_decoder"
,
":tf_example_decoder"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow_models/object_detection/core:standard_fields"
,
"//tensorflow/models/research/object_detection/core:standard_fields"
,
"//tensorflow/models/research/object_detection/protos:input_reader_py_pb2"
,
],
],
)
)
research/object_detection/data_decoders/tf_example_decoder.py
View file @
fd7b6887
...
@@ -22,6 +22,7 @@ import tensorflow as tf
...
@@ -22,6 +22,7 @@ import tensorflow as tf
from
object_detection.core
import
data_decoder
from
object_detection.core
import
data_decoder
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.protos
import
input_reader_pb2
from
object_detection.utils
import
label_map_util
from
object_detection.utils
import
label_map_util
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
slim_example_decoder
=
tf
.
contrib
.
slim
.
tfexample_decoder
...
@@ -32,12 +33,15 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -32,12 +33,15 @@ class TfExampleDecoder(data_decoder.DataDecoder):
def
__init__
(
self
,
def
__init__
(
self
,
load_instance_masks
=
False
,
load_instance_masks
=
False
,
instance_mask_type
=
input_reader_pb2
.
NUMERICAL_MASKS
,
label_map_proto_file
=
None
,
label_map_proto_file
=
None
,
use_display_name
=
False
):
use_display_name
=
False
):
"""Constructor sets keys_to_features and items_to_handlers.
"""Constructor sets keys_to_features and items_to_handlers.
Args:
Args:
load_instance_masks: whether or not to load and handle instance masks.
load_instance_masks: whether or not to load and handle instance masks.
instance_mask_type: type of instance masks. Options are provided in
input_reader.proto. This is only used if `load_instance_masks` is True.
label_map_proto_file: a file path to a
label_map_proto_file: a file path to a
object_detection.protos.StringIntLabelMap proto. If provided, then the
object_detection.protos.StringIntLabelMap proto. If provided, then the
mapped IDs of 'image/object/class/text' will take precedence over the
mapped IDs of 'image/object/class/text' will take precedence over the
...
@@ -46,6 +50,11 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -46,6 +50,11 @@ class TfExampleDecoder(data_decoder.DataDecoder):
use_display_name: whether or not to use the `display_name` for label
use_display_name: whether or not to use the `display_name` for label
mapping (instead of `name`). Only used if label_map_proto_file is
mapping (instead of `name`). Only used if label_map_proto_file is
provided.
provided.
Raises:
ValueError: If `instance_mask_type` option is not one of
input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
input_reader_pb2.PNG_MASKS.
"""
"""
self
.
keys_to_features
=
{
self
.
keys_to_features
=
{
'image/encoded'
:
'image/encoded'
:
...
@@ -83,6 +92,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -83,6 +92,8 @@ class TfExampleDecoder(data_decoder.DataDecoder):
tf
.
VarLenFeature
(
tf
.
int64
),
tf
.
VarLenFeature
(
tf
.
int64
),
'image/object/group_of'
:
'image/object/group_of'
:
tf
.
VarLenFeature
(
tf
.
int64
),
tf
.
VarLenFeature
(
tf
.
int64
),
'image/object/weight'
:
tf
.
VarLenFeature
(
tf
.
float32
),
}
}
self
.
items_to_handlers
=
{
self
.
items_to_handlers
=
{
fields
.
InputDataFields
.
image
:
slim_example_decoder
.
Image
(
fields
.
InputDataFields
.
image
:
slim_example_decoder
.
Image
(
...
@@ -104,19 +115,47 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -104,19 +115,47 @@ class TfExampleDecoder(data_decoder.DataDecoder):
fields
.
InputDataFields
.
groundtruth_difficult
:
(
fields
.
InputDataFields
.
groundtruth_difficult
:
(
slim_example_decoder
.
Tensor
(
'image/object/difficult'
)),
slim_example_decoder
.
Tensor
(
'image/object/difficult'
)),
fields
.
InputDataFields
.
groundtruth_group_of
:
(
fields
.
InputDataFields
.
groundtruth_group_of
:
(
slim_example_decoder
.
Tensor
(
'image/object/group_of'
))
slim_example_decoder
.
Tensor
(
'image/object/group_of'
)),
fields
.
InputDataFields
.
groundtruth_weights
:
(
slim_example_decoder
.
Tensor
(
'image/object/weight'
)),
}
}
if
load_instance_masks
:
if
load_instance_masks
:
self
.
keys_to_features
[
'image/object/mask'
]
=
tf
.
VarLenFeature
(
tf
.
float32
)
if
instance_mask_type
in
(
input_reader_pb2
.
DEFAULT
,
self
.
items_to_handlers
[
input_reader_pb2
.
NUMERICAL_MASKS
):
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
(
self
.
keys_to_features
[
'image/object/mask'
]
=
(
slim_example_decoder
.
ItemHandlerCallback
(
tf
.
VarLenFeature
(
tf
.
float32
))
[
'image/object/mask'
,
'image/height'
,
'image/width'
],
self
.
items_to_handlers
[
self
.
_reshape_instance_masks
))
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
(
# TODO: Add label_handler that decodes from 'image/object/class/text'
slim_example_decoder
.
ItemHandlerCallback
(
# primarily after the recent tf.contrib.slim changes make into a release
[
'image/object/mask'
,
'image/height'
,
'image/width'
],
# supported by cloudml.
self
.
_reshape_instance_masks
))
label_handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
)
elif
instance_mask_type
==
input_reader_pb2
.
PNG_MASKS
:
self
.
keys_to_features
[
'image/object/mask'
]
=
tf
.
VarLenFeature
(
tf
.
string
)
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
(
slim_example_decoder
.
ItemHandlerCallback
(
[
'image/object/mask'
,
'image/height'
,
'image/width'
],
self
.
_decode_png_instance_masks
))
else
:
raise
ValueError
(
'Did not recognize the `instance_mask_type` option.'
)
if
label_map_proto_file
:
label_map
=
label_map_util
.
get_label_map_dict
(
label_map_proto_file
,
use_display_name
)
# We use a default_value of -1, but we expect all labels to be contained
# in the label map.
table
=
tf
.
contrib
.
lookup
.
HashTable
(
initializer
=
tf
.
contrib
.
lookup
.
KeyValueTensorInitializer
(
keys
=
tf
.
constant
(
list
(
label_map
.
keys
())),
values
=
tf
.
constant
(
list
(
label_map
.
values
()),
dtype
=
tf
.
int64
)),
default_value
=-
1
)
# If the label_map_proto is provided, try to use it in conjunction with
# the class text, and fall back to a materialized ID.
label_handler
=
slim_example_decoder
.
BackupHandler
(
slim_example_decoder
.
LookupTensor
(
'image/object/class/text'
,
table
,
default_value
=
''
),
slim_example_decoder
.
Tensor
(
'image/object/class/label'
))
else
:
label_handler
=
slim_example_decoder
.
Tensor
(
'image/object/class/label'
)
self
.
items_to_handlers
[
self
.
items_to_handlers
[
fields
.
InputDataFields
.
groundtruth_classes
]
=
label_handler
fields
.
InputDataFields
.
groundtruth_classes
]
=
label_handler
...
@@ -140,16 +179,21 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -140,16 +179,21 @@ class TfExampleDecoder(data_decoder.DataDecoder):
[None, 4] containing box corners.
[None, 4] containing box corners.
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
[None] containing classes for the boxes.
[None] containing classes for the boxes.
fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
shape [None] indicating the weights of groundtruth boxes.
fields.InputDataFields.num_groundtruth_boxes - int32 scalar indicating
the number of groundtruth_boxes.
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
[None] containing containing object mask area in pixel squared.
[None] containing containing object mask area in pixel squared.
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
[None] indicating if the boxes enclose a crowd.
[None] indicating if the boxes enclose a crowd.
Optional:
Optional:
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
[None] indicating if the boxes represent `difficult` instances.
[None] indicating if the boxes represent `difficult` instances.
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
[None] indicating if the boxes represent `group_of` instances.
[None] indicating if the boxes represent `group_of` instances.
fields.InputDataFields.groundtruth_instance_masks - 3D
int64
tensor of
fields.InputDataFields.groundtruth_instance_masks - 3D
float32
tensor of
shape [None, None, None] containing instance masks.
shape [None, None, None] containing instance masks.
"""
"""
serialized_example
=
tf
.
reshape
(
tf_example_string_tensor
,
shape
=
[])
serialized_example
=
tf
.
reshape
(
tf_example_string_tensor
,
shape
=
[])
...
@@ -161,13 +205,27 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -161,13 +205,27 @@ class TfExampleDecoder(data_decoder.DataDecoder):
is_crowd
=
fields
.
InputDataFields
.
groundtruth_is_crowd
is_crowd
=
fields
.
InputDataFields
.
groundtruth_is_crowd
tensor_dict
[
is_crowd
]
=
tf
.
cast
(
tensor_dict
[
is_crowd
],
dtype
=
tf
.
bool
)
tensor_dict
[
is_crowd
]
=
tf
.
cast
(
tensor_dict
[
is_crowd
],
dtype
=
tf
.
bool
)
tensor_dict
[
fields
.
InputDataFields
.
image
].
set_shape
([
None
,
None
,
3
])
tensor_dict
[
fields
.
InputDataFields
.
image
].
set_shape
([
None
,
None
,
3
])
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
]
=
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]
def
default_groundtruth_weights
():
return
tf
.
ones
(
[
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])[
0
]],
dtype
=
tf
.
float32
)
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
]
=
tf
.
cond
(
tf
.
greater
(
tf
.
shape
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
])[
0
],
0
),
lambda
:
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
],
default_groundtruth_weights
)
return
tensor_dict
return
tensor_dict
def
_reshape_instance_masks
(
self
,
keys_to_tensors
):
def
_reshape_instance_masks
(
self
,
keys_to_tensors
):
"""Reshape instance segmentation masks.
"""Reshape instance segmentation masks.
The instance segmentation masks are reshaped to [num_instances, height,
The instance segmentation masks are reshaped to [num_instances, height,
width]
and cast to boolean type to save memory
.
width].
Args:
Args:
keys_to_tensors: a dictionary from keys to tensors.
keys_to_tensors: a dictionary from keys to tensors.
...
@@ -184,3 +242,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
...
@@ -184,3 +242,34 @@ class TfExampleDecoder(data_decoder.DataDecoder):
masks
=
tf
.
sparse_tensor_to_dense
(
masks
)
masks
=
tf
.
sparse_tensor_to_dense
(
masks
)
masks
=
tf
.
reshape
(
tf
.
to_float
(
tf
.
greater
(
masks
,
0.0
)),
to_shape
)
masks
=
tf
.
reshape
(
tf
.
to_float
(
tf
.
greater
(
masks
,
0.0
)),
to_shape
)
return
tf
.
cast
(
masks
,
tf
.
float32
)
return
tf
.
cast
(
masks
,
tf
.
float32
)
def
_decode_png_instance_masks
(
self
,
keys_to_tensors
):
"""Decode PNG instance segmentation masks and stack into dense tensor.
The instance segmentation masks are reshaped to [num_instances, height,
width].
Args:
keys_to_tensors: a dictionary from keys to tensors.
Returns:
A 3-D float tensor of shape [num_instances, height, width] with values
in {0, 1}.
"""
def
decode_png_mask
(
image_buffer
):
image
=
tf
.
squeeze
(
tf
.
image
.
decode_image
(
image_buffer
,
channels
=
1
),
axis
=
2
)
image
.
set_shape
([
None
,
None
])
image
=
tf
.
to_float
(
tf
.
greater
(
image
,
0
))
return
image
png_masks
=
keys_to_tensors
[
'image/object/mask'
]
height
=
keys_to_tensors
[
'image/height'
]
width
=
keys_to_tensors
[
'image/width'
]
if
isinstance
(
png_masks
,
tf
.
SparseTensor
):
png_masks
=
tf
.
sparse_tensor_to_dense
(
png_masks
,
default_value
=
''
)
return
tf
.
cond
(
tf
.
greater
(
tf
.
size
(
png_masks
),
0
),
lambda
:
tf
.
map_fn
(
decode_png_mask
,
png_masks
,
dtype
=
tf
.
float32
),
lambda
:
tf
.
zeros
(
tf
.
to_int32
(
tf
.
stack
([
0
,
height
,
width
]))))
research/object_detection/data_decoders/tf_example_decoder_test.py
View file @
fd7b6887
...
@@ -21,6 +21,7 @@ import tensorflow as tf
...
@@ -21,6 +21,7 @@ import tensorflow as tf
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.core
import
standard_fields
as
fields
from
object_detection.data_decoders
import
tf_example_decoder
from
object_detection.data_decoders
import
tf_example_decoder
from
object_detection.protos
import
input_reader_pb2
class
TfExampleDecoderTest
(
tf
.
test
.
TestCase
):
class
TfExampleDecoderTest
(
tf
.
test
.
TestCase
):
...
@@ -57,7 +58,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -57,7 +58,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
return
tf
.
train
.
Feature
(
bytes_list
=
tf
.
train
.
BytesList
(
value
=
[
value
]))
def
testDecodeJpegImage
(
self
):
def
testDecodeJpegImage
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
decoded_jpeg
=
self
.
_DecodeImage
(
encoded_jpeg
)
decoded_jpeg
=
self
.
_DecodeImage
(
encoded_jpeg
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
@@ -78,7 +79,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -78,7 +79,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
def
testDecodeImageKeyAndFilename
(
self
):
def
testDecodeImageKeyAndFilename
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
...
@@ -96,7 +97,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -96,7 +97,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertEqual
(
'filename'
,
tensor_dict
[
fields
.
InputDataFields
.
filename
])
self
.
assertEqual
(
'filename'
,
tensor_dict
[
fields
.
InputDataFields
.
filename
])
def
testDecodePngImage
(
self
):
def
testDecodePngImage
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_png
=
self
.
_EncodeImage
(
image_tensor
,
encoding_type
=
'png'
)
encoded_png
=
self
.
_EncodeImage
(
image_tensor
,
encoding_type
=
'png'
)
decoded_png
=
self
.
_DecodeImage
(
encoded_png
,
encoding_type
=
'png'
)
decoded_png
=
self
.
_DecodeImage
(
encoded_png
,
encoding_type
=
'png'
)
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
@@ -116,8 +117,62 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -116,8 +117,62 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
(
decoded_png
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertAllEqual
(
decoded_png
,
tensor_dict
[
fields
.
InputDataFields
.
image
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
self
.
assertEqual
(
'image_id'
,
tensor_dict
[
fields
.
InputDataFields
.
source_id
])
def
testDecodePngInstanceMasks
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
10
,
10
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
mask_1
=
np
.
random
.
randint
(
0
,
2
,
size
=
(
10
,
10
,
1
)).
astype
(
np
.
uint8
)
mask_2
=
np
.
random
.
randint
(
0
,
2
,
size
=
(
10
,
10
,
1
)).
astype
(
np
.
uint8
)
encoded_png_1
=
self
.
_EncodeImage
(
mask_1
,
encoding_type
=
'png'
)
decoded_png_1
=
np
.
squeeze
(
mask_1
.
astype
(
np
.
float32
))
encoded_png_2
=
self
.
_EncodeImage
(
mask_2
,
encoding_type
=
'png'
)
decoded_png_2
=
np
.
squeeze
(
mask_2
.
astype
(
np
.
float32
))
encoded_masks
=
[
encoded_png_1
,
encoded_png_2
]
decoded_masks
=
np
.
stack
([
decoded_png_1
,
decoded_png_2
])
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/mask'
:
self
.
_BytesFeature
(
encoded_masks
)
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
load_instance_masks
=
True
,
instance_mask_type
=
input_reader_pb2
.
PNG_MASKS
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
decoded_masks
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
])
def
testDecodeEmptyPngInstanceMasks
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
10
,
10
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_masks
=
[]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/mask'
:
self
.
_BytesFeature
(
encoded_masks
),
'image/height'
:
self
.
_Int64Feature
([
10
]),
'image/width'
:
self
.
_Int64Feature
([
10
]),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
load_instance_masks
=
True
,
instance_mask_type
=
input_reader_pb2
.
PNG_MASKS
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
].
shape
,
[
0
,
10
,
10
])
def
testDecodeBoundingBox
(
self
):
def
testDecodeBoundingBox
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_ymins
=
[
0.0
,
4.0
]
bbox_ymins
=
[
0.0
,
4.0
]
bbox_xmins
=
[
1.0
,
5.0
]
bbox_xmins
=
[
1.0
,
5.0
]
...
@@ -144,9 +199,39 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -144,9 +199,39 @@ class TfExampleDecoderTest(tf.test.TestCase):
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
bbox_ymaxs
,
bbox_xmaxs
]).
transpose
()
self
.
assertAllEqual
(
expected_boxes
,
self
.
assertAllEqual
(
expected_boxes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
])
self
.
assertAllEqual
(
2
,
tensor_dict
[
fields
.
InputDataFields
.
num_groundtruth_boxes
])
def
testDecodeDefaultGroundtruthWeights
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_ymins
=
[
0.0
,
4.0
]
bbox_xmins
=
[
1.0
,
5.0
]
bbox_ymaxs
=
[
2.0
,
6.0
]
bbox_xmaxs
=
[
3.0
,
7.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/bbox/ymin'
:
self
.
_FloatFeature
(
bbox_ymins
),
'image/object/bbox/xmin'
:
self
.
_FloatFeature
(
bbox_xmins
),
'image/object/bbox/ymax'
:
self
.
_FloatFeature
(
bbox_ymaxs
),
'image/object/bbox/xmax'
:
self
.
_FloatFeature
(
bbox_xmaxs
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
].
get_shape
().
as_list
()),
[
None
,
4
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllClose
(
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
],
np
.
ones
(
2
,
dtype
=
np
.
float32
))
def
testDecodeObjectLabel
(
self
):
def
testDecodeObjectLabel
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes
=
[
0
,
1
]
bbox_classes
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
@@ -168,8 +253,131 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -168,8 +253,131 @@ class TfExampleDecoderTest(tf.test.TestCase):
self
.
assertAllEqual
(
bbox_classes
,
self
.
assertAllEqual
(
bbox_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelNoText
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes
=
[
1
,
2
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/class/label'
:
self
.
_Int64Feature
(
bbox_classes
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:1
name:'cat'
}
item {
id:2
name:'dog'
}
"""
label_map_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'label_map.pbtxt'
)
with
tf
.
gfile
.
Open
(
label_map_path
,
'wb'
)
as
f
:
f
.
write
(
label_map_string
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
].
get_shape
().
as_list
()),
[
None
])
init
=
tf
.
tables_initializer
()
with
self
.
test_session
()
as
sess
:
sess
.
run
(
init
)
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
bbox_classes
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelUnrecognizedName
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes_text
=
[
'cat'
,
'cheetah'
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/class/text'
:
self
.
_BytesFeature
(
bbox_classes_text
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:2
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'label_map.pbtxt'
)
with
tf
.
gfile
.
Open
(
label_map_path
,
'wb'
)
as
f
:
f
.
write
(
label_map_string
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
tables_initializer
())
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
([
2
,
-
1
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectLabelWithMapping
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
bbox_classes_text
=
[
'cat'
,
'dog'
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/class/text'
:
self
.
_BytesFeature
(
bbox_classes_text
),
})).
SerializeToString
()
label_map_string
=
"""
item {
id:3
name:'cat'
}
item {
id:1
name:'dog'
}
"""
label_map_path
=
os
.
path
.
join
(
self
.
get_temp_dir
(),
'label_map.pbtxt'
)
with
tf
.
gfile
.
Open
(
label_map_path
,
'wb'
)
as
f
:
f
.
write
(
label_map_string
)
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
(
label_map_proto_file
=
label_map_path
)
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
.
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
sess
.
run
(
tf
.
tables_initializer
())
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
([
3
,
1
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
])
def
testDecodeObjectArea
(
self
):
def
testDecodeObjectArea
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_area
=
[
100.
,
174.
]
object_area
=
[
100.
,
174.
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
@@ -190,7 +398,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -190,7 +398,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_area
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_area
])
def
testDecodeObjectIsCrowd
(
self
):
def
testDecodeObjectIsCrowd
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_is_crowd
=
[
0
,
1
]
object_is_crowd
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
@@ -213,7 +421,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -213,7 +421,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_is_crowd
])
fields
.
InputDataFields
.
groundtruth_is_crowd
])
def
testDecodeObjectDifficult
(
self
):
def
testDecodeObjectDifficult
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_difficult
=
[
0
,
1
]
object_difficult
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
...
@@ -236,7 +444,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -236,7 +444,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_difficult
])
fields
.
InputDataFields
.
groundtruth_difficult
])
def
testDecodeObjectGroupOf
(
self
):
def
testDecodeObjectGroupOf
(
self
):
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_group_of
=
[
0
,
1
]
object_group_of
=
[
0
,
1
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
...
@@ -259,13 +467,37 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -259,13 +467,37 @@ class TfExampleDecoderTest(tf.test.TestCase):
[
bool
(
item
)
for
item
in
object_group_of
],
[
bool
(
item
)
for
item
in
object_group_of
],
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_group_of
])
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_group_of
])
def
testDecodeObjectWeight
(
self
):
image_tensor
=
np
.
random
.
randint
(
256
,
size
=
(
4
,
5
,
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
object_weights
=
[
0.75
,
1.0
]
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/encoded'
:
self
.
_BytesFeature
(
encoded_jpeg
),
'image/format'
:
self
.
_BytesFeature
(
'jpeg'
),
'image/object/weight'
:
self
.
_FloatFeature
(
object_weights
),
})).
SerializeToString
()
example_decoder
=
tf_example_decoder
.
TfExampleDecoder
()
tensor_dict
=
example_decoder
.
decode
(
tf
.
convert_to_tensor
(
example
))
self
.
assertAllEqual
((
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
].
get_shape
().
as_list
()),
[
None
])
with
self
.
test_session
()
as
sess
:
tensor_dict
=
sess
.
run
(
tensor_dict
)
self
.
assertAllEqual
(
object_weights
,
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_weights
])
def
testDecodeInstanceSegmentation
(
self
):
def
testDecodeInstanceSegmentation
(
self
):
num_instances
=
4
num_instances
=
4
image_height
=
5
image_height
=
5
image_width
=
3
image_width
=
3
# Randomly generate image.
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
image_height
,
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
image_height
,
image_width
,
image_width
,
3
)).
astype
(
np
.
uint8
)
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
...
@@ -316,7 +548,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
...
@@ -316,7 +548,7 @@ class TfExampleDecoderTest(tf.test.TestCase):
image_height
=
5
image_height
=
5
image_width
=
3
image_width
=
3
# Randomly generate image.
# Randomly generate image.
image_tensor
=
np
.
random
.
randint
(
25
5
,
size
=
(
image_height
,
image_tensor
=
np
.
random
.
randint
(
25
6
,
size
=
(
image_height
,
image_width
,
image_width
,
3
)).
astype
(
np
.
uint8
)
3
)).
astype
(
np
.
uint8
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
encoded_jpeg
=
self
.
_EncodeImage
(
image_tensor
)
...
...
research/object_detection/dataset_tools/BUILD
View file @
fd7b6887
# Tensorflow Object Detection API:
main runnable
s.
# Tensorflow Object Detection API:
dataset tool
s.
package
(
package
(
default_visibility
=
[
"//visibility:public"
],
default_visibility
=
[
"//visibility:public"
],
...
@@ -8,18 +8,43 @@ licenses(["notice"])
...
@@ -8,18 +8,43 @@ licenses(["notice"])
# Apache 2.0
# Apache 2.0
py_binary
(
name
=
"create_coco_tf_record"
,
srcs
=
[
"create_coco_tf_record.py"
,
],
deps
=
[
"//PIL:pil"
,
"//pycocotools"
,
"//tensorflow"
,
"//tensorflow/models/research/object_detection/utils:dataset_util"
,
"//tensorflow/models/research/object_detection/utils:label_map_util"
,
],
)
py_test
(
name
=
"create_coco_tf_record_test"
,
srcs
=
[
"create_coco_tf_record_test.py"
,
],
deps
=
[
":create_coco_tf_record"
,
"//tensorflow"
,
],
)
py_binary
(
py_binary
(
name
=
"create_kitti_tf_record"
,
name
=
"create_kitti_tf_record"
,
srcs
=
[
srcs
=
[
"create_kitti_tf_record.py"
,
"create_kitti_tf_record.py"
,
],
],
deps
=
[
deps
=
[
"//
third_party/py/
PIL:pil"
,
"//PIL:pil"
,
"//
third_party/py/
lxml"
,
"//lxml"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:dataset_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:dataset_util"
,
"//tensorflow
_
models/object_detection/utils:label_map_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:label_map_util"
,
"//tensorflow
_
models/object_detection/utils:np_box_ops"
,
"//tensorflow
/
models/
research/
object_detection/utils:np_box_ops"
,
],
],
)
)
...
@@ -40,11 +65,11 @@ py_binary(
...
@@ -40,11 +65,11 @@ py_binary(
"create_pascal_tf_record.py"
,
"create_pascal_tf_record.py"
,
],
],
deps
=
[
deps
=
[
"//
third_party/py/
PIL:pil"
,
"//PIL:pil"
,
"//
third_party/py/
lxml"
,
"//lxml"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:dataset_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:dataset_util"
,
"//tensorflow
_
models/object_detection/utils:label_map_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:label_map_util"
,
],
],
)
)
...
@@ -65,11 +90,11 @@ py_binary(
...
@@ -65,11 +90,11 @@ py_binary(
"create_pet_tf_record.py"
,
"create_pet_tf_record.py"
,
],
],
deps
=
[
deps
=
[
"//
third_party/py/
PIL:pil"
,
"//PIL:pil"
,
"//
third_party/py/
lxml"
,
"//lxml"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:dataset_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:dataset_util"
,
"//tensorflow
_
models/object_detection/utils:label_map_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:label_map_util"
,
],
],
)
)
...
@@ -78,8 +103,8 @@ py_library(
...
@@ -78,8 +103,8 @@ py_library(
srcs
=
[
"oid_tfrecord_creation.py"
],
srcs
=
[
"oid_tfrecord_creation.py"
],
deps
=
[
deps
=
[
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/core:standard_fields"
,
"//tensorflow
/
models/
research/
object_detection/core:standard_fields"
,
"//tensorflow
_
models/object_detection/utils:dataset_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:dataset_util"
,
],
],
)
)
...
@@ -88,9 +113,9 @@ py_test(
...
@@ -88,9 +113,9 @@ py_test(
srcs
=
[
"oid_tfrecord_creation_test.py"
],
srcs
=
[
"oid_tfrecord_creation_test.py"
],
deps
=
[
deps
=
[
":oid_tfrecord_creation"
,
":oid_tfrecord_creation"
,
"//
third_party/py/
contextlib2"
,
"//contextlib2"
,
"//
third_party/py/
pandas"
,
"//pandas"
,
"//
third_party/py/
tensorflow"
,
"//tensorflow"
,
],
],
)
)
...
@@ -99,9 +124,9 @@ py_binary(
...
@@ -99,9 +124,9 @@ py_binary(
srcs
=
[
"create_oid_tf_record.py"
],
srcs
=
[
"create_oid_tf_record.py"
],
deps
=
[
deps
=
[
":oid_tfrecord_creation"
,
":oid_tfrecord_creation"
,
"//
third_party/py/
contextlib2"
,
"//contextlib2"
,
"//
third_party/py/
pandas"
,
"//pandas"
,
"//tensorflow"
,
"//tensorflow"
,
"//tensorflow
_
models/object_detection/utils:label_map_util"
,
"//tensorflow
/
models/
research/
object_detection/utils:label_map_util"
,
],
],
)
)
research/object_detection/dataset_tools/__init__.py
View file @
fd7b6887
research/object_detection/dataset_tools/create_coco_tf_record.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
hashlib
import
io
import
json
import
os
import
numpy
as
np
import
PIL.Image
from
pycocotools
import
mask
import
tensorflow
as
tf
from
object_detection.utils
import
dataset_util
from
object_detection.utils
import
label_map_util
flags
=
tf
.
app
.
flags
tf
.
flags
.
DEFINE_boolean
(
'include_masks'
,
False
,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.'
)
tf
.
flags
.
DEFINE_string
(
'train_image_dir'
,
''
,
'Training image directory.'
)
tf
.
flags
.
DEFINE_string
(
'val_image_dir'
,
''
,
'Validation image directory.'
)
tf
.
flags
.
DEFINE_string
(
'test_image_dir'
,
''
,
'Test image directory.'
)
tf
.
flags
.
DEFINE_string
(
'train_annotations_file'
,
''
,
'Training annotations JSON file.'
)
tf
.
flags
.
DEFINE_string
(
'val_annotations_file'
,
''
,
'Validation annotations JSON file.'
)
tf
.
flags
.
DEFINE_string
(
'testdev_annotations_file'
,
''
,
'Test-dev annotations JSON file.'
)
tf
.
flags
.
DEFINE_string
(
'output_dir'
,
'/tmp/'
,
'Output data directory.'
)
FLAGS
=
flags
.
FLAGS
tf
.
logging
.
set_verbosity
(
tf
.
logging
.
INFO
)
def
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
False
):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
annotations_list:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height
=
image
[
'height'
]
image_width
=
image
[
'width'
]
filename
=
image
[
'file_name'
]
image_id
=
image
[
'id'
]
full_path
=
os
.
path
.
join
(
image_dir
,
filename
)
with
tf
.
gfile
.
GFile
(
full_path
,
'rb'
)
as
fid
:
encoded_jpg
=
fid
.
read
()
encoded_jpg_io
=
io
.
BytesIO
(
encoded_jpg
)
image
=
PIL
.
Image
.
open
(
encoded_jpg_io
)
key
=
hashlib
.
sha256
(
encoded_jpg
).
hexdigest
()
xmin
=
[]
xmax
=
[]
ymin
=
[]
ymax
=
[]
is_crowd
=
[]
category_names
=
[]
category_ids
=
[]
area
=
[]
encoded_mask_png
=
[]
num_annotations_skipped
=
0
for
object_annotations
in
annotations_list
:
(
x
,
y
,
width
,
height
)
=
tuple
(
object_annotations
[
'bbox'
])
if
width
<=
0
or
height
<=
0
:
num_annotations_skipped
+=
1
continue
if
x
+
width
>
image_width
or
y
+
height
>
image_height
:
num_annotations_skipped
+=
1
continue
xmin
.
append
(
float
(
x
)
/
image_width
)
xmax
.
append
(
float
(
x
+
width
)
/
image_width
)
ymin
.
append
(
float
(
y
)
/
image_height
)
ymax
.
append
(
float
(
y
+
height
)
/
image_height
)
is_crowd
.
append
(
object_annotations
[
'iscrowd'
])
category_id
=
int
(
object_annotations
[
'category_id'
])
category_ids
.
append
(
category_id
)
category_names
.
append
(
category_index
[
category_id
][
'name'
].
encode
(
'utf8'
))
area
.
append
(
object_annotations
[
'area'
])
if
include_masks
:
run_len_encoding
=
mask
.
frPyObjects
(
object_annotations
[
'segmentation'
],
image_height
,
image_width
)
binary_mask
=
mask
.
decode
(
run_len_encoding
)
if
not
object_annotations
[
'iscrowd'
]:
binary_mask
=
np
.
amax
(
binary_mask
,
axis
=
2
)
pil_image
=
PIL
.
Image
.
fromarray
(
binary_mask
)
output_io
=
io
.
BytesIO
()
pil_image
.
save
(
output_io
,
format
=
'PNG'
)
encoded_mask_png
.
append
(
output_io
.
getvalue
())
feature_dict
=
{
'image/height'
:
dataset_util
.
int64_feature
(
image_height
),
'image/width'
:
dataset_util
.
int64_feature
(
image_width
),
'image/filename'
:
dataset_util
.
bytes_feature
(
filename
.
encode
(
'utf8'
)),
'image/source_id'
:
dataset_util
.
bytes_feature
(
str
(
image_id
).
encode
(
'utf8'
)),
'image/key/sha256'
:
dataset_util
.
bytes_feature
(
key
.
encode
(
'utf8'
)),
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
.
encode
(
'utf8'
)),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
xmin
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
xmax
),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
ymin
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
ymax
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
category_ids
),
'image/object/is_crowd'
:
dataset_util
.
int64_list_feature
(
is_crowd
),
'image/object/area'
:
dataset_util
.
float_list_feature
(
area
),
}
if
include_masks
:
feature_dict
[
'image/object/mask'
]
=
(
dataset_util
.
bytes_list_feature
(
encoded_mask_png
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
feature_dict
))
return
key
,
example
,
num_annotations_skipped
def
_create_tf_record_from_coco_annotations
(
annotations_file
,
image_dir
,
output_path
,
include_masks
):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
annotations_file: JSON file containing bounding box annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
"""
with
tf
.
gfile
.
GFile
(
annotations_file
,
'r'
)
as
fid
:
groundtruth_data
=
json
.
load
(
fid
)
images
=
groundtruth_data
[
'images'
]
category_index
=
label_map_util
.
create_category_index
(
groundtruth_data
[
'categories'
])
annotations_index
=
{}
if
'annotations'
in
groundtruth_data
:
tf
.
logging
.
info
(
'Found groundtruth annotations. Building annotations index.'
)
for
annotation
in
groundtruth_data
[
'annotations'
]:
image_id
=
annotation
[
'image_id'
]
if
image_id
not
in
annotations_index
:
annotations_index
[
image_id
]
=
[]
annotations_index
[
image_id
].
append
(
annotation
)
missing_annotation_count
=
0
for
image
in
images
:
image_id
=
image
[
'id'
]
if
image_id
not
in
annotations_index
:
missing_annotation_count
+=
1
annotations_index
[
image_id
]
=
[]
tf
.
logging
.
info
(
'%d images are missing annotations.'
,
missing_annotation_count
)
tf
.
logging
.
info
(
'writing to output path: %s'
,
output_path
)
writer
=
tf
.
python_io
.
TFRecordWriter
(
output_path
)
total_num_annotations_skipped
=
0
for
idx
,
image
in
enumerate
(
images
):
if
idx
%
100
==
0
:
tf
.
logging
.
info
(
'On image %d of %d'
,
idx
,
len
(
images
))
annotations_list
=
annotations_index
[
image
[
'id'
]]
_
,
tf_example
,
num_annotations_skipped
=
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
)
total_num_annotations_skipped
+=
num_annotations_skipped
writer
.
write
(
tf_example
.
SerializeToString
())
writer
.
close
()
tf
.
logging
.
info
(
'Finished writing, skipped %d annotations.'
,
total_num_annotations_skipped
)
def
main
(
_
):
assert
FLAGS
.
train_image_dir
,
'`train_image_dir` missing.'
assert
FLAGS
.
val_image_dir
,
'`val_image_dir` missing.'
assert
FLAGS
.
test_image_dir
,
'`test_image_dir` missing.'
assert
FLAGS
.
train_annotations_file
,
'`train_annotations_file` missing.'
assert
FLAGS
.
val_annotations_file
,
'`val_annotations_file` missing.'
assert
FLAGS
.
testdev_annotations_file
,
'`testdev_annotations_file` missing.'
if
not
tf
.
gfile
.
IsDirectory
(
FLAGS
.
output_dir
):
tf
.
gfile
.
MakeDirs
(
FLAGS
.
output_dir
)
train_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'coco_train.record'
)
val_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'coco_val.record'
)
testdev_output_path
=
os
.
path
.
join
(
FLAGS
.
output_dir
,
'coco_testdev.record'
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
train_annotations_file
,
FLAGS
.
train_image_dir
,
train_output_path
,
FLAGS
.
include_masks
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
val_annotations_file
,
FLAGS
.
val_image_dir
,
val_output_path
,
FLAGS
.
include_masks
)
_create_tf_record_from_coco_annotations
(
FLAGS
.
testdev_annotations_file
,
FLAGS
.
test_image_dir
,
testdev_output_path
,
FLAGS
.
include_masks
)
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
research/object_detection/dataset_tools/create_coco_tf_record_test.py
0 → 100644
View file @
fd7b6887
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Test for create_coco_tf_record.py."""
import
io
import
os
import
numpy
as
np
import
PIL.Image
import
tensorflow
as
tf
from
object_detection.dataset_tools
import
create_coco_tf_record
class
CreateCocoTFRecordTest
(
tf
.
test
.
TestCase
):
def
_assertProtoEqual
(
self
,
proto_field
,
expectation
):
"""Helper function to assert if a proto field equals some value.
Args:
proto_field: The protobuf field to compare.
expectation: The expected value of the protobuf field.
"""
proto_list
=
[
p
for
p
in
proto_field
]
self
.
assertListEqual
(
proto_list
,
expectation
)
def
test_create_tf_example
(
self
):
image_file_name
=
'tmp_image.jpg'
image_data
=
np
.
random
.
rand
(
256
,
256
,
3
)
tmp_dir
=
self
.
get_temp_dir
()
save_path
=
os
.
path
.
join
(
tmp_dir
,
image_file_name
)
image
=
PIL
.
Image
.
fromarray
(
image_data
,
'RGB'
)
image
.
save
(
save_path
)
image
=
{
'file_name'
:
image_file_name
,
'height'
:
256
,
'width'
:
256
,
'id'
:
11
,
}
annotations_list
=
[{
'area'
:
.
5
,
'iscrowd'
:
False
,
'image_id'
:
11
,
'bbox'
:
[
64
,
64
,
128
,
128
],
'category_id'
:
2
,
'id'
:
1000
,
}]
image_dir
=
tmp_dir
category_index
=
{
1
:
{
'name'
:
'dog'
,
'id'
:
1
},
2
:
{
'name'
:
'cat'
,
'id'
:
2
},
3
:
{
'name'
:
'human'
,
'id'
:
3
}
}
(
_
,
example
,
num_annotations_skipped
)
=
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
)
self
.
assertEqual
(
num_annotations_skipped
,
0
)
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/height'
].
int64_list
.
value
,
[
256
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/width'
].
int64_list
.
value
,
[
256
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/filename'
].
bytes_list
.
value
,
[
image_file_name
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/source_id'
].
bytes_list
.
value
,
[
str
(
image
[
'id'
])])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/format'
].
bytes_list
.
value
,
[
'jpeg'
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmin'
].
float_list
.
value
,
[
0.25
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymin'
].
float_list
.
value
,
[
0.25
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmax'
].
float_list
.
value
,
[
0.75
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
0.75
])
def
test_create_tf_example_with_instance_masks
(
self
):
image_file_name
=
'tmp_image.jpg'
image_data
=
np
.
random
.
rand
(
8
,
8
,
3
)
tmp_dir
=
self
.
get_temp_dir
()
save_path
=
os
.
path
.
join
(
tmp_dir
,
image_file_name
)
image
=
PIL
.
Image
.
fromarray
(
image_data
,
'RGB'
)
image
.
save
(
save_path
)
image
=
{
'file_name'
:
image_file_name
,
'height'
:
8
,
'width'
:
8
,
'id'
:
11
,
}
annotations_list
=
[{
'area'
:
.
5
,
'iscrowd'
:
False
,
'image_id'
:
11
,
'bbox'
:
[
0
,
0
,
8
,
8
],
'segmentation'
:
[[
4
,
0
,
0
,
0
,
0
,
4
],
[
8
,
4
,
4
,
8
,
8
,
8
]],
'category_id'
:
1
,
'id'
:
1000
,
}]
image_dir
=
tmp_dir
category_index
=
{
1
:
{
'name'
:
'dog'
,
'id'
:
1
},
}
(
_
,
example
,
num_annotations_skipped
)
=
create_coco_tf_record
.
create_tf_example
(
image
,
annotations_list
,
image_dir
,
category_index
,
include_masks
=
True
)
self
.
assertEqual
(
num_annotations_skipped
,
0
)
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/height'
].
int64_list
.
value
,
[
8
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/width'
].
int64_list
.
value
,
[
8
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/filename'
].
bytes_list
.
value
,
[
image_file_name
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/source_id'
].
bytes_list
.
value
,
[
str
(
image
[
'id'
])])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/format'
].
bytes_list
.
value
,
[
'jpeg'
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmin'
].
float_list
.
value
,
[
0
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymin'
].
float_list
.
value
,
[
0
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/xmax'
].
float_list
.
value
,
[
1
])
self
.
_assertProtoEqual
(
example
.
features
.
feature
[
'image/object/bbox/ymax'
].
float_list
.
value
,
[
1
])
encoded_mask_pngs
=
[
io
.
BytesIO
(
encoded_masks
)
for
encoded_masks
in
example
.
features
.
feature
[
'image/object/mask'
].
bytes_list
.
value
]
pil_masks
=
[
np
.
array
(
PIL
.
Image
.
open
(
encoded_mask_png
))
for
encoded_mask_png
in
encoded_mask_pngs
]
self
.
assertTrue
(
len
(
pil_masks
)
==
1
)
self
.
assertAllEqual
(
pil_masks
[
0
],
[[
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
],
[
1
,
1
,
0
,
0
,
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
0
,
0
,
1
],
[
0
,
0
,
0
,
0
,
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
,
0
,
1
,
1
,
1
],
[
0
,
0
,
0
,
0
,
1
,
1
,
1
,
1
]])
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
research/object_detection/dataset_tools/create_kitti_tf_record.py
View file @
fd7b6887
...
@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
...
@@ -120,7 +120,7 @@ def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
# Filter all bounding boxes of this frame that are of a legal class, and
# Filter all bounding boxes of this frame that are of a legal class, and
# don't overlap with a dontcare region.
# don't overlap with a dontcare region.
# TODO
(talremez)
filter out targets that are truncated or heavily occluded.
# TODO filter out targets that are truncated or heavily occluded.
annotation_for_image
=
filter_annotations
(
img_anno
,
classes_to_use
)
annotation_for_image
=
filter_annotations
(
img_anno
,
classes_to_use
)
example
=
prepare_example
(
image_path
,
annotation_for_image
,
label_map_dict
)
example
=
prepare_example
(
image_path
,
annotation_for_image
,
label_map_dict
)
...
...
research/object_detection/dataset_tools/create_kitti_tf_record_test.py
View file @
fd7b6887
...
@@ -24,7 +24,7 @@ import tensorflow as tf
...
@@ -24,7 +24,7 @@ import tensorflow as tf
from
object_detection.dataset_tools
import
create_kitti_tf_record
from
object_detection.dataset_tools
import
create_kitti_tf_record
class
DictToTFExample
Test
(
tf
.
test
.
TestCase
):
class
CreateKittiTFRecord
Test
(
tf
.
test
.
TestCase
):
def
_assertProtoEqual
(
self
,
proto_field
,
expectation
):
def
_assertProtoEqual
(
self
,
proto_field
,
expectation
):
"""Helper function to assert if a proto field equals some value.
"""Helper function to assert if a proto field equals some value.
...
...
Prev
1
2
3
4
5
6
7
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment