Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
e7de233b
Commit
e7de233b
authored
Oct 27, 2017
by
Vivek Rathod
Browse files
updates changes in object_detecion/cores directory.
parent
edcd29f2
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
2135 additions
and
327 deletions
+2135
-327
research/object_detection/core/BUILD
research/object_detection/core/BUILD
+5
-0
research/object_detection/core/box_list_ops.py
research/object_detection/core/box_list_ops.py
+20
-11
research/object_detection/core/box_predictor.py
research/object_detection/core/box_predictor.py
+31
-12
research/object_detection/core/data_parser.py
research/object_detection/core/data_parser.py
+41
-0
research/object_detection/core/keypoint_ops.py
research/object_detection/core/keypoint_ops.py
+51
-0
research/object_detection/core/keypoint_ops_test.py
research/object_detection/core/keypoint_ops_test.py
+32
-0
research/object_detection/core/losses.py
research/object_detection/core/losses.py
+72
-2
research/object_detection/core/losses_test.py
research/object_detection/core/losses_test.py
+313
-0
research/object_detection/core/model.py
research/object_detection/core/model.py
+20
-6
research/object_detection/core/preprocessor.py
research/object_detection/core/preprocessor.py
+763
-156
research/object_detection/core/preprocessor_test.py
research/object_detection/core/preprocessor_test.py
+680
-129
research/object_detection/core/standard_fields.py
research/object_detection/core/standard_fields.py
+58
-3
research/object_detection/core/target_assigner.py
research/object_detection/core/target_assigner.py
+14
-8
research/object_detection/core/target_assigner_test.py
research/object_detection/core/target_assigner_test.py
+35
-0
No files found.
research/object_detection/core/BUILD
View file @
e7de233b
...
@@ -264,6 +264,11 @@ py_library(
...
@@ -264,6 +264,11 @@ py_library(
srcs
=
[
"data_decoder.py"
],
srcs
=
[
"data_decoder.py"
],
)
)
py_library
(
name
=
"data_parser"
,
srcs
=
[
"data_parser.py"
],
)
py_library
(
py_library
(
name
=
"box_predictor"
,
name
=
"box_predictor"
,
srcs
=
[
"box_predictor.py"
],
srcs
=
[
"box_predictor.py"
],
...
...
research/object_detection/core/box_list_ops.py
View file @
e7de233b
...
@@ -584,7 +584,8 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
...
@@ -584,7 +584,8 @@ def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
[
'Incorrect field size: actual vs expected.'
,
num_entries
,
num_boxes
])
[
'Incorrect field size: actual vs expected.'
,
num_entries
,
num_boxes
])
with
tf
.
control_dependencies
([
length_assert
]):
with
tf
.
control_dependencies
([
length_assert
]):
# TODO: Remove with tf.device when top_k operation runs correctly on GPU.
# TODO: Remove with tf.device when top_k operation runs
# correctly on GPU.
with
tf
.
device
(
'/cpu:0'
):
with
tf
.
device
(
'/cpu:0'
):
_
,
sorted_indices
=
tf
.
nn
.
top_k
(
field_to_sort
,
num_boxes
,
sorted
=
True
)
_
,
sorted_indices
=
tf
.
nn
.
top_k
(
field_to_sort
,
num_boxes
,
sorted
=
True
)
...
@@ -655,7 +656,7 @@ def filter_greater_than(boxlist, thresh, scope=None):
...
@@ -655,7 +656,7 @@ def filter_greater_than(boxlist, thresh, scope=None):
This op keeps the collection of boxes whose corresponding scores are
This op keeps the collection of boxes whose corresponding scores are
greater than the input threshold.
greater than the input threshold.
TODO: Change function name to
F
ilter
S
cores
G
reater
T
han
TODO: Change function name to
f
ilter
_s
cores
_g
reater
_t
han
Args:
Args:
boxlist: BoxList holding N boxes. Must contain a 'scores' field
boxlist: BoxList holding N boxes. Must contain a 'scores' field
...
@@ -772,18 +773,25 @@ def to_normalized_coordinates(boxlist, height, width,
...
@@ -772,18 +773,25 @@ def to_normalized_coordinates(boxlist, height, width,
return
scale
(
boxlist
,
1
/
height
,
1
/
width
)
return
scale
(
boxlist
,
1
/
height
,
1
/
width
)
def
to_absolute_coordinates
(
boxlist
,
height
,
width
,
def
to_absolute_coordinates
(
boxlist
,
check_range
=
True
,
scope
=
None
):
height
,
width
,
check_range
=
True
,
maximum_normalized_coordinate
=
1.01
,
scope
=
None
):
"""Converts normalized box coordinates to absolute pixel coordinates.
"""Converts normalized box coordinates to absolute pixel coordinates.
This function raises an assertion failed error when the maximum box coordinate
This function raises an assertion failed error when the maximum box coordinate
value is larger than 1.01 (in which case coordinates are already absolute).
value is larger than maximum_normalized_coordinate (in which case coordinates
are already absolute).
Args:
Args:
boxlist: BoxList with coordinates in range [0, 1].
boxlist: BoxList with coordinates in range [0, 1].
height: Maximum value for height of absolute box coordinates.
height: Maximum value for height of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
width: Maximum value for width of absolute box coordinates.
check_range: If True, checks if the coordinates are normalized or not.
check_range: If True, checks if the coordinates are normalized or not.
maximum_normalized_coordinate: Maximum coordinate value to be considered
as normalized, default to 1.01.
scope: name scope.
scope: name scope.
Returns:
Returns:
...
@@ -797,9 +805,10 @@ def to_absolute_coordinates(boxlist, height, width,
...
@@ -797,9 +805,10 @@ def to_absolute_coordinates(boxlist, height, width,
# Ensure range of input boxes is correct.
# Ensure range of input boxes is correct.
if
check_range
:
if
check_range
:
box_maximum
=
tf
.
reduce_max
(
boxlist
.
get
())
box_maximum
=
tf
.
reduce_max
(
boxlist
.
get
())
max_assert
=
tf
.
Assert
(
tf
.
greater_equal
(
1.01
,
box_maximum
),
max_assert
=
tf
.
Assert
(
tf
.
greater_equal
(
maximum_normalized_coordinate
,
box_maximum
),
[
'maximum box coordinate value is larger '
[
'maximum box coordinate value is larger '
'than 1.01: '
,
box_maximum
])
'than %f: '
%
maximum_normalized_coordinate
,
box_maximum
])
with
tf
.
control_dependencies
([
max_assert
]):
with
tf
.
control_dependencies
([
max_assert
]):
width
=
tf
.
identity
(
width
)
width
=
tf
.
identity
(
width
)
...
@@ -927,9 +936,9 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
...
@@ -927,9 +936,9 @@ def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
iou_
=
iou
(
selected_boxes
,
pool_boxes
)
iou_
=
iou
(
selected_boxes
,
pool_boxes
)
match_indicator
=
tf
.
to_float
(
tf
.
greater
(
iou_
,
iou_thresh
))
match_indicator
=
tf
.
to_float
(
tf
.
greater
(
iou_
,
iou_thresh
))
num_matches
=
tf
.
reduce_sum
(
match_indicator
,
1
)
num_matches
=
tf
.
reduce_sum
(
match_indicator
,
1
)
# TODO: Handle the case where some boxes in selected_boxes do not
match to any
# TODO: Handle the case where some boxes in selected_boxes do not
# boxes in pool_boxes. For such boxes without any matches, we
should return
#
match to any
boxes in pool_boxes. For such boxes without any matches, we
# the original boxes without voting.
#
should return
the original boxes without voting.
match_assert
=
tf
.
Assert
(
match_assert
=
tf
.
Assert
(
tf
.
reduce_all
(
tf
.
greater
(
num_matches
,
0
)),
tf
.
reduce_all
(
tf
.
greater
(
num_matches
,
0
)),
[
'Each box in selected_boxes must match with at least one box '
[
'Each box in selected_boxes must match with at least one box '
...
...
research/object_detection/core/box_predictor.py
View file @
e7de233b
...
@@ -278,6 +278,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
...
@@ -278,6 +278,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
box_code_size
,
box_code_size
,
conv_hyperparams
=
None
,
conv_hyperparams
=
None
,
predict_instance_masks
=
False
,
predict_instance_masks
=
False
,
mask_height
=
14
,
mask_width
=
14
,
mask_prediction_conv_depth
=
256
,
mask_prediction_conv_depth
=
256
,
predict_keypoints
=
False
):
predict_keypoints
=
False
):
"""Constructor.
"""Constructor.
...
@@ -300,6 +302,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
...
@@ -300,6 +302,8 @@ class MaskRCNNBoxPredictor(BoxPredictor):
ops.
ops.
predict_instance_masks: Whether to predict object masks inside detection
predict_instance_masks: Whether to predict object masks inside detection
boxes.
boxes.
mask_height: Desired output mask height. The default value is 14.
mask_width: Desired output mask width. The default value is 14.
mask_prediction_conv_depth: The depth for the first conv2d_transpose op
mask_prediction_conv_depth: The depth for the first conv2d_transpose op
applied to the image_features in the mask prediciton branch.
applied to the image_features in the mask prediciton branch.
predict_keypoints: Whether to predict keypoints insde detection boxes.
predict_keypoints: Whether to predict keypoints insde detection boxes.
...
@@ -315,10 +319,10 @@ class MaskRCNNBoxPredictor(BoxPredictor):
...
@@ -315,10 +319,10 @@ class MaskRCNNBoxPredictor(BoxPredictor):
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_conv_hyperparams
=
conv_hyperparams
self
.
_predict_instance_masks
=
predict_instance_masks
self
.
_predict_instance_masks
=
predict_instance_masks
self
.
_mask_height
=
mask_height
self
.
_mask_width
=
mask_width
self
.
_mask_prediction_conv_depth
=
mask_prediction_conv_depth
self
.
_mask_prediction_conv_depth
=
mask_prediction_conv_depth
self
.
_predict_keypoints
=
predict_keypoints
self
.
_predict_keypoints
=
predict_keypoints
if
self
.
_predict_instance_masks
:
raise
ValueError
(
'Mask prediction is unimplemented.'
)
if
self
.
_predict_keypoints
:
if
self
.
_predict_keypoints
:
raise
ValueError
(
'Keypoint prediction is unimplemented.'
)
raise
ValueError
(
'Keypoint prediction is unimplemented.'
)
if
((
self
.
_predict_instance_masks
or
self
.
_predict_keypoints
)
and
if
((
self
.
_predict_instance_masks
or
self
.
_predict_keypoints
)
and
...
@@ -339,6 +343,11 @@ class MaskRCNNBoxPredictor(BoxPredictor):
...
@@ -339,6 +343,11 @@ class MaskRCNNBoxPredictor(BoxPredictor):
have been folded into the batch dimension. Thus we output 1 for the
have been folded into the batch dimension. Thus we output 1 for the
anchors dimension.
anchors dimension.
Also optionally predicts instance masks.
The mask prediction head is based on the Mask RCNN paper with the following
modifications: We replace the deconvolution layer with a bilinear resize
and a convolution.
Args:
Args:
image_features: A float tensor of shape [batch_size, height, width,
image_features: A float tensor of shape [batch_size, height, width,
channels] containing features for a batch of images.
channels] containing features for a batch of images.
...
@@ -397,15 +406,18 @@ class MaskRCNNBoxPredictor(BoxPredictor):
...
@@ -397,15 +406,18 @@ class MaskRCNNBoxPredictor(BoxPredictor):
if
self
.
_predict_instance_masks
:
if
self
.
_predict_instance_masks
:
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
):
upsampled_features
=
slim
.
conv2d_transpose
(
upsampled_features
=
tf
.
image
.
resize_bilinear
(
image_features
,
image_features
,
[
self
.
_mask_height
,
self
.
_mask_width
],
align_corners
=
True
)
upsampled_features
=
slim
.
conv2d
(
upsampled_features
,
num_outputs
=
self
.
_mask_prediction_conv_depth
,
num_outputs
=
self
.
_mask_prediction_conv_depth
,
kernel_size
=
[
2
,
2
],
kernel_size
=
[
2
,
2
])
stride
=
2
)
mask_predictions
=
slim
.
conv2d
(
upsampled_features
,
mask_predictions
=
slim
.
conv2d
(
upsampled_features
,
num_outputs
=
self
.
num_classes
,
num_outputs
=
self
.
num_classes
,
activation_fn
=
None
,
activation_fn
=
None
,
kernel_size
=
[
1
,
1
])
kernel_size
=
[
3
,
3
])
instance_masks
=
tf
.
expand_dims
(
tf
.
transpose
(
mask_predictions
,
instance_masks
=
tf
.
expand_dims
(
tf
.
transpose
(
mask_predictions
,
perm
=
[
0
,
3
,
1
,
2
]),
perm
=
[
0
,
3
,
1
,
2
]),
axis
=
1
,
axis
=
1
,
...
@@ -437,7 +449,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
...
@@ -437,7 +449,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
dropout_keep_prob
,
dropout_keep_prob
,
kernel_size
,
kernel_size
,
box_code_size
,
box_code_size
,
apply_sigmoid_to_scores
=
False
):
apply_sigmoid_to_scores
=
False
,
class_prediction_bias_init
=
0.0
):
"""Constructor.
"""Constructor.
Args:
Args:
...
@@ -464,6 +477,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
...
@@ -464,6 +477,8 @@ class ConvolutionalBoxPredictor(BoxPredictor):
box_code_size: Size of encoding for each box.
box_code_size: Size of encoding for each box.
apply_sigmoid_to_scores: if True, apply the sigmoid on the output
apply_sigmoid_to_scores: if True, apply the sigmoid on the output
class_predictions.
class_predictions.
class_prediction_bias_init: constant value to initialize bias of the last
conv2d layer before class prediction.
Raises:
Raises:
ValueError: if min_depth > max_depth.
ValueError: if min_depth > max_depth.
...
@@ -480,6 +495,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
...
@@ -480,6 +495,7 @@ class ConvolutionalBoxPredictor(BoxPredictor):
self
.
_box_code_size
=
box_code_size
self
.
_box_code_size
=
box_code_size
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_dropout_keep_prob
=
dropout_keep_prob
self
.
_apply_sigmoid_to_scores
=
apply_sigmoid_to_scores
self
.
_apply_sigmoid_to_scores
=
apply_sigmoid_to_scores
self
.
_class_prediction_bias_init
=
class_prediction_bias_init
def
_predict
(
self
,
image_features
,
num_predictions_per_location
):
def
_predict
(
self
,
image_features
,
num_predictions_per_location
):
"""Computes encoded object locations and corresponding confidences.
"""Computes encoded object locations and corresponding confidences.
...
@@ -499,15 +515,16 @@ class ConvolutionalBoxPredictor(BoxPredictor):
...
@@ -499,15 +515,16 @@ class ConvolutionalBoxPredictor(BoxPredictor):
[batch_size, num_anchors, num_classes + 1] representing the class
[batch_size, num_anchors, num_classes + 1] representing the class
predictions for the proposals.
predictions for the proposals.
"""
"""
features_depth
=
static_shape
.
get_depth
(
image_features
.
get_shape
())
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
# Add a slot for the background class.
# Add a slot for the background class.
num_class_slots
=
self
.
num_classes
+
1
num_class_slots
=
self
.
num_classes
+
1
net
=
image_features
net
=
image_features
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
),
\
with
slim
.
arg_scope
(
self
.
_conv_hyperparams
),
\
slim
.
arg_scope
([
slim
.
dropout
],
is_training
=
self
.
_is_training
):
slim
.
arg_scope
([
slim
.
dropout
],
is_training
=
self
.
_is_training
):
# Add additional conv layers before the predictor.
# Add additional conv layers before the class predictor.
features_depth
=
static_shape
.
get_depth
(
image_features
.
get_shape
())
depth
=
max
(
min
(
features_depth
,
self
.
_max_depth
),
self
.
_min_depth
)
tf
.
logging
.
info
(
'depth of additional conv before box predictor: {}'
.
format
(
depth
))
if
depth
>
0
and
self
.
_num_layers_before_predictor
>
0
:
if
depth
>
0
and
self
.
_num_layers_before_predictor
>
0
:
for
i
in
range
(
self
.
_num_layers_before_predictor
):
for
i
in
range
(
self
.
_num_layers_before_predictor
):
net
=
slim
.
conv2d
(
net
=
slim
.
conv2d
(
...
@@ -522,7 +539,9 @@ class ConvolutionalBoxPredictor(BoxPredictor):
...
@@ -522,7 +539,9 @@ class ConvolutionalBoxPredictor(BoxPredictor):
net
=
slim
.
dropout
(
net
,
keep_prob
=
self
.
_dropout_keep_prob
)
net
=
slim
.
dropout
(
net
,
keep_prob
=
self
.
_dropout_keep_prob
)
class_predictions_with_background
=
slim
.
conv2d
(
class_predictions_with_background
=
slim
.
conv2d
(
net
,
num_predictions_per_location
*
num_class_slots
,
net
,
num_predictions_per_location
*
num_class_slots
,
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'ClassPredictor'
)
[
self
.
_kernel_size
,
self
.
_kernel_size
],
scope
=
'ClassPredictor'
,
biases_initializer
=
tf
.
constant_initializer
(
self
.
_class_prediction_bias_init
))
if
self
.
_apply_sigmoid_to_scores
:
if
self
.
_apply_sigmoid_to_scores
:
class_predictions_with_background
=
tf
.
sigmoid
(
class_predictions_with_background
=
tf
.
sigmoid
(
class_predictions_with_background
)
class_predictions_with_background
)
...
...
research/object_detection/core/data_parser.py
0 → 100644
View file @
e7de233b
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Interface for data parsers.
Data parser parses input data and returns a dictionary of numpy arrays
keyed by the entries in standard_fields.py. Since the parser parses records
to numpy arrays (materialized tensors) directly, it is used to read data for
evaluation/visualization; to parse the data during training, DataDecoder should
be used.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
class
DataToNumpyParser
(
object
):
__metaclass__
=
ABCMeta
@
abstractmethod
def
parse
(
self
,
input_data
):
"""Parses input and returns a numpy array or a dictionary of numpy arrays.
Args:
input_data: an input data
Returns:
A numpy array or a dictionary of numpy arrays or None, if input
cannot be parsed.
"""
pass
research/object_detection/core/keypoint_ops.py
View file @
e7de233b
...
@@ -229,3 +229,54 @@ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
...
@@ -229,3 +229,54 @@ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
transpose
(
new_keypoints
,
[
1
,
0
,
2
])
new_keypoints
=
tf
.
transpose
(
new_keypoints
,
[
1
,
0
,
2
])
return
new_keypoints
return
new_keypoints
def
flip_vertical
(
keypoints
,
flip_point
,
flip_permutation
,
scope
=
None
):
"""Flips the keypoints vertically around the flip_point.
This operation flips the y coordinate for each keypoint around the flip_point
and also permutes the keypoints in a manner specified by flip_permutation.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the y coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'FlipVertical'
):
keypoints
=
tf
.
transpose
(
keypoints
,
[
1
,
0
,
2
])
keypoints
=
tf
.
gather
(
keypoints
,
flip_permutation
)
v
,
u
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
v
=
flip_point
*
2.0
-
v
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
transpose
(
new_keypoints
,
[
1
,
0
,
2
])
return
new_keypoints
def
rot90
(
keypoints
,
scope
=
None
):
"""Rotates the keypoints counter-clockwise by 90 degrees.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
scope
,
'Rot90'
):
keypoints
=
tf
.
transpose
(
keypoints
,
[
1
,
0
,
2
])
v
,
u
=
tf
.
split
(
value
=
keypoints
[:,
:,
::
-
1
],
num_or_size_splits
=
2
,
axis
=
2
)
v
=
1.0
-
v
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
transpose
(
new_keypoints
,
[
1
,
0
,
2
])
return
new_keypoints
research/object_detection/core/keypoint_ops_test.py
View file @
e7de233b
...
@@ -163,6 +163,38 @@ class KeypointOpsTest(tf.test.TestCase):
...
@@ -163,6 +163,38 @@ class KeypointOpsTest(tf.test.TestCase):
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_flip_vertical
(
self
):
keypoints
=
tf
.
constant
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
[[
0.4
,
0.4
],
[
0.5
,
0.5
],
[
0.6
,
0.6
]]
])
flip_permutation
=
[
0
,
2
,
1
]
expected_keypoints
=
tf
.
constant
([
[[
0.9
,
0.1
],
[
0.7
,
0.3
],
[
0.8
,
0.2
]],
[[
0.6
,
0.4
],
[
0.4
,
0.6
],
[
0.5
,
0.5
]],
])
output
=
keypoint_ops
.
flip_vertical
(
keypoints
,
0.5
,
flip_permutation
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_rot90
(
self
):
keypoints
=
tf
.
constant
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
[[
0.4
,
0.6
],
[
0.5
,
0.6
],
[
0.6
,
0.7
]]
])
expected_keypoints
=
tf
.
constant
([
[[
0.9
,
0.1
],
[
0.8
,
0.2
],
[
0.7
,
0.3
]],
[[
0.4
,
0.4
],
[
0.4
,
0.5
],
[
0.3
,
0.6
]],
])
output
=
keypoint_ops
.
rot90
(
keypoints
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/core/losses.py
View file @
e7de233b
...
@@ -72,7 +72,7 @@ class Loss(object):
...
@@ -72,7 +72,7 @@ class Loss(object):
@
abstractmethod
@
abstractmethod
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
**
params
):
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
**
params
):
"""Method to be overriden by implementations.
"""Method to be overrid
d
en by implementations.
Args:
Args:
prediction_tensor: a tensor representing predicted quantities
prediction_tensor: a tensor representing predicted quantities
...
@@ -238,17 +238,85 @@ class WeightedSigmoidClassificationLoss(Loss):
...
@@ -238,17 +238,85 @@ class WeightedSigmoidClassificationLoss(Loss):
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
)
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
)
class
SigmoidFocalClassificationLoss
(
Loss
):
"""Sigmoid focal cross entropy loss.
Focal loss down-weights well classified examples and focusses on the hard
examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
"""
def
__init__
(
self
,
anchorwise_output
=
False
,
gamma
=
2.0
,
alpha
=
0.25
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
alpha: optional alpha weighting factor to balance positives vs negatives.
"""
self
.
_anchorwise_output
=
anchorwise_output
self
.
_alpha
=
alpha
self
.
_gamma
=
gamma
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
,
class_indices
=
None
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
class_indices: (Optional) A 1-D integer tensor of class indices.
If provided, computes loss only for the specified class indices.
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
weights
=
tf
.
expand_dims
(
weights
,
2
)
if
class_indices
is
not
None
:
weights
*=
tf
.
reshape
(
ops
.
indices_to_dense_vector
(
class_indices
,
tf
.
shape
(
prediction_tensor
)[
2
]),
[
1
,
1
,
-
1
])
per_entry_cross_ent
=
(
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
target_tensor
,
logits
=
prediction_tensor
))
prediction_probabilities
=
tf
.
sigmoid
(
prediction_tensor
)
p_t
=
((
target_tensor
*
prediction_probabilities
)
+
((
1
-
target_tensor
)
*
(
1
-
prediction_probabilities
)))
modulating_factor
=
1.0
if
self
.
_gamma
:
modulating_factor
=
tf
.
pow
(
1.0
-
p_t
,
self
.
_gamma
)
alpha_weight_factor
=
1.0
if
self
.
_alpha
is
not
None
:
alpha_weight_factor
=
(
target_tensor
*
self
.
_alpha
+
(
1
-
target_tensor
)
*
(
1
-
self
.
_alpha
))
focal_cross_entropy_loss
=
(
modulating_factor
*
alpha_weight_factor
*
per_entry_cross_ent
)
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
focal_cross_entropy_loss
*
weights
,
2
)
return
tf
.
reduce_sum
(
focal_cross_entropy_loss
*
weights
)
class
WeightedSoftmaxClassificationLoss
(
Loss
):
class
WeightedSoftmaxClassificationLoss
(
Loss
):
"""Softmax loss function."""
"""Softmax loss function."""
def
__init__
(
self
,
anchorwise_output
=
False
):
def
__init__
(
self
,
anchorwise_output
=
False
,
logit_scale
=
1.0
):
"""Constructor.
"""Constructor.
Args:
Args:
anchorwise_output: Whether to output loss per anchor (default False)
anchorwise_output: Whether to output loss per anchor (default False)
logit_scale: When this value is high, the prediction is "diffused" and
when this value is low, the prediction is made peakier.
(default 1.0)
"""
"""
self
.
_anchorwise_output
=
anchorwise_output
self
.
_anchorwise_output
=
anchorwise_output
self
.
_logit_scale
=
logit_scale
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
"""Compute loss function.
...
@@ -264,6 +332,8 @@ class WeightedSoftmaxClassificationLoss(Loss):
...
@@ -264,6 +332,8 @@ class WeightedSoftmaxClassificationLoss(Loss):
loss: a (scalar) tensor representing the value of the loss function
loss: a (scalar) tensor representing the value of the loss function
"""
"""
num_classes
=
prediction_tensor
.
get_shape
().
as_list
()[
-
1
]
num_classes
=
prediction_tensor
.
get_shape
().
as_list
()[
-
1
]
prediction_tensor
=
tf
.
divide
(
prediction_tensor
,
self
.
_logit_scale
,
name
=
'scale_logit'
)
per_row_cross_ent
=
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
per_row_cross_ent
=
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
tf
.
reshape
(
target_tensor
,
[
-
1
,
num_classes
]),
labels
=
tf
.
reshape
(
target_tensor
,
[
-
1
,
num_classes
]),
logits
=
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
num_classes
])))
logits
=
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
num_classes
])))
...
...
research/object_detection/core/losses_test.py
View file @
e7de233b
...
@@ -225,6 +225,286 @@ class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
...
@@ -225,6 +225,286 @@ class WeightedSigmoidClassificationLossTest(tf.test.TestCase):
self
.
assertAllClose
(
loss_output
,
exp_loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
_logit
(
probability
):
return
math
.
log
(
probability
/
(
1.
-
probability
))
class
SigmoidFocalClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testEasyExamplesProduceSmallLossComparedToSigmoidXEntropy
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.97
)],
[
_logit
(
0.90
)],
[
_logit
(
0.73
)],
[
_logit
(
0.27
)],
[
_logit
(
0.09
)],
[
_logit
(
0.03
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
/
focal_loss
)))
self
.
assertAllClose
(
order_of_ratio
,
[[
1000
,
100
,
10
,
10
,
100
,
1000
]])
def
testHardExamplesProduceLossComparableToSigmoidXEntropy
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.55
)],
[
_logit
(
0.52
)],
[
_logit
(
0.50
)],
[
_logit
(
0.48
)],
[
_logit
(
0.45
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
/
focal_loss
)))
self
.
assertAllClose
(
order_of_ratio
,
[[
1.
,
1.
,
1.
,
1.
,
1.
]])
def
testNonAnchorWiseOutputComparableToSigmoidXEntropy
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.55
)],
[
_logit
(
0.52
)],
[
_logit
(
0.50
)],
[
_logit
(
0.48
)],
[
_logit
(
0.45
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
gamma
=
2.0
,
alpha
=
None
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
False
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
/
focal_loss
)))
self
.
assertAlmostEqual
(
order_of_ratio
,
1.
)
def
testIgnoreNegativeExampleLossViaAlphaMultiplier
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.55
)],
[
_logit
(
0.52
)],
[
_logit
(
0.50
)],
[
_logit
(
0.48
)],
[
_logit
(
0.45
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
1.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
self
.
assertAllClose
(
focal_loss
[
0
][
3
:],
[
0.
,
0.
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
[
0
][:
3
]
/
focal_loss
[
0
][:
3
])))
self
.
assertAllClose
(
order_of_ratio
,
[
1.
,
1.
,
1.
])
def
testIgnorePositiveExampleLossViaAlphaMultiplier
(
self
):
prediction_tensor
=
tf
.
constant
([[[
_logit
(
0.55
)],
[
_logit
(
0.52
)],
[
_logit
(
0.50
)],
[
_logit
(
0.48
)],
[
_logit
(
0.45
)]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
1
],
[
1
],
[
1
],
[
0
],
[
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
gamma
=
2.0
,
alpha
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
self
.
assertAllClose
(
focal_loss
[
0
][:
3
],
[
0.
,
0.
,
0.
])
order_of_ratio
=
np
.
power
(
10
,
np
.
floor
(
np
.
log10
(
sigmoid_loss
[
0
][
3
:]
/
focal_loss
[
0
][
3
:])))
self
.
assertAllClose
(
order_of_ratio
,
[
1.
,
1.
])
def
testSimilarToSigmoidXEntropyWithHalfAlphaAndZeroGammaUpToAScale
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
alpha
=
0.5
,
gamma
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
self
.
assertAllClose
(
sigmoid_loss
,
focal_loss
*
2
)
def
testSameAsSigmoidXEntropyWithNoAlphaAndZeroGamma
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
True
,
alpha
=
None
,
gamma
=
0.0
)
sigmoid_loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
anchorwise_output
=
True
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
sigmoid_loss
=
sigmoid_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
sigmoid_loss
,
focal_loss
=
sess
.
run
([
sigmoid_loss
,
focal_loss
])
self
.
assertAllClose
(
sigmoid_loss
,
focal_loss
)
def
testExpectedLossWithAlphaOneAndZeroGamma
(
self
):
# All zeros correspond to 0.5 probability.
prediction_tensor
=
tf
.
constant
([[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
alpha
=
1.0
,
gamma
=
0.0
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
focal_loss
=
sess
.
run
(
focal_loss
)
self
.
assertAllClose
(
(
-
math
.
log
(.
5
)
*
# x-entropy per class per anchor
1.0
*
# alpha
8
),
# positives from 8 anchors
focal_loss
)
def
testExpectedLossWithAlpha75AndZeroGamma
(
self
):
# All zeros correspond to 0.5 probability.
prediction_tensor
=
tf
.
constant
([[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]],
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
focal_loss_op
=
losses
.
SigmoidFocalClassificationLoss
(
anchorwise_output
=
False
,
alpha
=
0.75
,
gamma
=
0.0
)
focal_loss
=
focal_loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
with
self
.
test_session
()
as
sess
:
focal_loss
=
sess
.
run
(
focal_loss
)
self
.
assertAllClose
(
(
-
math
.
log
(.
5
)
*
# x-entropy per class per anchor.
((
0.75
*
# alpha for positives.
8
)
+
# positives from 8 anchors.
(
0.25
*
# alpha for negatives.
8
*
2
))),
# negatives from 8 anchors for two classes.
focal_loss
)
class
WeightedSoftmaxClassificationLossTest
(
tf
.
test
.
TestCase
):
class
WeightedSoftmaxClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
def
testReturnsCorrectLoss
(
self
):
...
@@ -282,6 +562,39 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
...
@@ -282,6 +562,39 @@ class WeightedSoftmaxClassificationLossTest(tf.test.TestCase):
loss_output
=
sess
.
run
(
loss
)
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLossWithHighLogitScaleSetting
(
self
):
"""At very high logit_scale, all predictions will be ~0.33."""
# TODO(yonib): Also test logit_scale with anchorwise=False.
logit_scale
=
10e16
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
0
,
1
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
(
anchorwise_output
=
True
,
logit_scale
=
logit_scale
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
uniform_distribution_loss
=
-
math
.
log
(.
33333333333
)
exp_loss
=
np
.
matrix
([[
uniform_distribution_loss
]
*
4
,
[
uniform_distribution_loss
]
*
4
])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
BootstrappedSigmoidClassificationLossTest
(
tf
.
test
.
TestCase
):
class
BootstrappedSigmoidClassificationLossTest
(
tf
.
test
.
TestCase
):
...
...
research/object_detection/core/model.py
View file @
e7de233b
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Abstract detection model.
"""Abstract detection model.
This file defines a generic base class for detection models. Programs that are
This file defines a generic base class for detection models. Programs that are
...
@@ -87,6 +86,18 @@ class DetectionModel(object):
...
@@ -87,6 +86,18 @@ class DetectionModel(object):
raise
RuntimeError
(
'Groundtruth tensor %s has not been provided'
,
field
)
raise
RuntimeError
(
'Groundtruth tensor %s has not been provided'
,
field
)
return
self
.
_groundtruth_lists
[
field
]
return
self
.
_groundtruth_lists
[
field
]
def
groundtruth_has_field
(
self
,
field
):
"""Determines whether the groundtruth includes the given field.
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints}
Returns:
True if the groundtruth includes the given field, False otherwise.
"""
return
field
in
self
.
_groundtruth_lists
@
abstractmethod
@
abstractmethod
def
preprocess
(
self
,
inputs
):
def
preprocess
(
self
,
inputs
):
"""Input preprocessing.
"""Input preprocessing.
...
@@ -148,7 +159,8 @@ class DetectionModel(object):
...
@@ -148,7 +159,8 @@ class DetectionModel(object):
Outputs adhere to the following conventions:
Outputs adhere to the following conventions:
* Classes are integers in [0, num_classes); background classes are removed
* Classes are integers in [0, num_classes); background classes are removed
and the first non-background class is mapped to 0.
and the first non-background class is mapped to 0. If the model produces
class-agnostic detections, then no output is produced for classes.
* Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
* Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
format and normalized relative to the image window.
format and normalized relative to the image window.
* `num_detections` is provided for settings where detections are padded to a
* `num_detections` is provided for settings where detections are padded to a
...
@@ -168,6 +180,8 @@ class DetectionModel(object):
...
@@ -168,6 +180,8 @@ class DetectionModel(object):
detection_boxes: [batch, max_detections, 4]
detection_boxes: [batch, max_detections, 4]
detection_scores: [batch, max_detections]
detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections]
detection_classes: [batch, max_detections]
(If a model is producing class-agnostic detections, this field may be
missing)
instance_masks: [batch, max_detections, image_height, image_width]
instance_masks: [batch, max_detections, image_height, image_width]
(optional)
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
...
@@ -207,13 +221,13 @@ class DetectionModel(object):
...
@@ -207,13 +221,13 @@ class DetectionModel(object):
groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
tensors of shape [num_boxes, num_classes] containing the class targets
tensors of shape [num_boxes, num_classes] containing the class targets
with the 0th index assumed to map to the first non-background class.
with the 0th index assumed to map to the first non-background class.
groundtruth_masks_list: a list of
2
-D tf.float32 tensors of
groundtruth_masks_list: a list of
3
-D tf.float32 tensors of
shape [
max_detection
s, height_in, width_in] containing instance
shape [
num_boxe
s, height_in, width_in] containing instance
masks with values in {0, 1}. If None, no masks are provided.
masks with values in {0, 1}. If None, no masks are provided.
Mask resolution `height_in`x`width_in` must agree with the resolution
Mask resolution `height_in`x`width_in` must agree with the resolution
of the input image tensor provided to the `preprocess` function.
of the input image tensor provided to the `preprocess` function.
groundtruth_keypoints_list: a list of
2
-D tf.float32 tensors of
groundtruth_keypoints_list: a list of
3
-D tf.float32 tensors of
shape [
batch, max_detection
s, num_keypoints, 2] containing keypoints.
shape [
num_boxe
s, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
missing keypoints should be encoded as NaN.
"""
"""
...
...
research/object_detection/core/preprocessor.py
View file @
e7de233b
...
@@ -12,7 +12,6 @@
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
# ==============================================================================
# ==============================================================================
"""Preprocess images and bounding boxes for detection.
"""Preprocess images and bounding boxes for detection.
We perform two sets of operations in preprocessing stage:
We perform two sets of operations in preprocessing stage:
...
@@ -147,28 +146,12 @@ def normalize_image(image, original_minval, original_maxval, target_minval,
...
@@ -147,28 +146,12 @@ def normalize_image(image, original_minval, original_maxval, target_minval,
return
image
return
image
def
flip_boxes
(
boxes
):
def
retain_boxes_above_threshold
(
boxes
,
"""Left-right flip the boxes.
labels
,
label_scores
,
Args:
masks
=
None
,
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
keypoints
=
None
,
Boxes are in normalized form meaning their coordinates vary
threshold
=
0.0
):
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
"""
# Flip boxes.
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
1
)
flipped_xmin
=
tf
.
subtract
(
1.0
,
xmax
)
flipped_xmax
=
tf
.
subtract
(
1.0
,
xmin
)
flipped_boxes
=
tf
.
concat
([
ymin
,
flipped_xmin
,
ymax
,
flipped_xmax
],
1
)
return
flipped_boxes
def
retain_boxes_above_threshold
(
boxes
,
labels
,
label_scores
,
masks
=
None
,
keypoints
=
None
,
threshold
=
0.0
):
"""Retains boxes whose label score is above a given threshold.
"""Retains boxes whose label score is above a given threshold.
If the label score for a box is missing (represented by NaN), the box is
If the label score for a box is missing (represented by NaN), the box is
...
@@ -221,8 +204,68 @@ def retain_boxes_above_threshold(
...
@@ -221,8 +204,68 @@ def retain_boxes_above_threshold(
return
result
return
result
def
_flip_masks
(
masks
):
def
_flip_boxes_left_right
(
boxes
):
"""Left-right flips masks.
"""Left-right flip the boxes.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
"""
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
1
)
flipped_xmin
=
tf
.
subtract
(
1.0
,
xmax
)
flipped_xmax
=
tf
.
subtract
(
1.0
,
xmin
)
flipped_boxes
=
tf
.
concat
([
ymin
,
flipped_xmin
,
ymax
,
flipped_xmax
],
1
)
return
flipped_boxes
def
_flip_boxes_up_down
(
boxes
):
"""Up-down flip the boxes.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
"""
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
1
)
flipped_ymin
=
tf
.
subtract
(
1.0
,
ymax
)
flipped_ymax
=
tf
.
subtract
(
1.0
,
ymin
)
flipped_boxes
=
tf
.
concat
([
flipped_ymin
,
xmin
,
flipped_ymax
,
xmax
],
1
)
return
flipped_boxes
def
_rot90_boxes
(
boxes
):
"""Rotate boxes counter-clockwise by 90 degrees.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Rotated boxes.
"""
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
1
)
rotated_ymin
=
tf
.
subtract
(
1.0
,
xmax
)
rotated_ymax
=
tf
.
subtract
(
1.0
,
xmin
)
rotated_xmin
=
ymin
rotated_xmax
=
ymax
rotated_boxes
=
tf
.
concat
(
[
rotated_ymin
,
rotated_xmin
,
rotated_ymax
,
rotated_xmax
],
1
)
return
rotated_boxes
def
_flip_masks_left_right
(
masks
):
"""Left-right flip masks.
Args:
Args:
masks: rank 3 float32 tensor with shape
masks: rank 3 float32 tensor with shape
...
@@ -235,14 +278,42 @@ def _flip_masks(masks):
...
@@ -235,14 +278,42 @@ def _flip_masks(masks):
return
masks
[:,
:,
::
-
1
]
return
masks
[:,
:,
::
-
1
]
def
random_horizontal_flip
(
def
_flip_masks_up_down
(
masks
):
image
,
"""Up-down flip masks.
Args:
masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
Returns:
flipped masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
"""
return
masks
[:,
::
-
1
,
:]
def
_rot90_masks
(
masks
):
"""Rotate masks counter-clockwise by 90 degrees.
Args:
masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
Returns:
rotated masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
"""
masks
=
tf
.
transpose
(
masks
,
[
0
,
2
,
1
])
return
masks
[:,
::
-
1
,
:]
def
random_horizontal_flip
(
image
,
boxes
=
None
,
boxes
=
None
,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
keypoint_flip_permutation
=
None
,
keypoint_flip_permutation
=
None
,
seed
=
None
):
seed
=
None
):
"""Randomly
decides whether to mirror
the image and detections or
not
.
"""Randomly
flips
the image and detections
h
or
izontally
.
The probability of flipping the image is 50%.
The probability of flipping the image is 50%.
...
@@ -259,14 +330,14 @@ def random_horizontal_flip(
...
@@ -259,14 +330,14 @@ def random_horizontal_flip(
keypoints: (optional) rank 3 float32 tensor with shape
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing keypoint flip
keypoint_flip_permutation: rank 1 int32 tensor containing
the
keypoint flip
permutation.
permutation.
seed: random seed
seed: random seed
Returns:
Returns:
image: image which is the same shape as input image.
image: image which is the same shape as input image.
If boxes, masks, keypoints, and keypoint_flip_permutation
is
not None,
If boxes, masks, keypoints, and keypoint_flip_permutation
are
not None,
the function also returns the following tensors.
the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
...
@@ -280,6 +351,7 @@ def random_horizontal_flip(
...
@@ -280,6 +351,7 @@ def random_horizontal_flip(
Raises:
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
"""
"""
def
_flip_image
(
image
):
def
_flip_image
(
image
):
# flip image
# flip image
image_flipped
=
tf
.
image
.
flip_left_right
(
image
)
image_flipped
=
tf
.
image
.
flip_left_right
(
image
)
...
@@ -292,10 +364,7 @@ def random_horizontal_flip(
...
@@ -292,10 +364,7 @@ def random_horizontal_flip(
with
tf
.
name_scope
(
'RandomHorizontalFlip'
,
values
=
[
image
,
boxes
]):
with
tf
.
name_scope
(
'RandomHorizontalFlip'
,
values
=
[
image
,
boxes
]):
result
=
[]
result
=
[]
# random variable defining whether to do flip or not
# random variable defining whether to do flip or not
do_a_flip_random
=
tf
.
random_uniform
([],
seed
=
seed
)
do_a_flip_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
# flip only if there are bounding boxes in image!
do_a_flip_random
=
tf
.
logical_and
(
tf
.
greater
(
tf
.
size
(
boxes
),
0
),
tf
.
greater
(
do_a_flip_random
,
0.5
))
# flip image
# flip image
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
...
@@ -303,14 +372,14 @@ def random_horizontal_flip(
...
@@ -303,14 +372,14 @@ def random_horizontal_flip(
# flip boxes
# flip boxes
if
boxes
is
not
None
:
if
boxes
is
not
None
:
boxes
=
tf
.
cond
(
boxes
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_boxes_left_right
(
boxes
),
do_a_flip_random
,
lambda
:
flip_boxes
(
boxes
),
lambda
:
boxes
)
lambda
:
boxes
)
result
.
append
(
boxes
)
result
.
append
(
boxes
)
# flip masks
# flip masks
if
masks
is
not
None
:
if
masks
is
not
None
:
masks
=
tf
.
cond
(
masks
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_masks_left_right
(
masks
),
do_a_flip_random
,
lambda
:
_flip_masks
(
masks
),
lambda
:
masks
)
lambda
:
masks
)
result
.
append
(
masks
)
result
.
append
(
masks
)
# flip keypoints
# flip keypoints
...
@@ -325,6 +394,174 @@ def random_horizontal_flip(
...
@@ -325,6 +394,174 @@ def random_horizontal_flip(
return
tuple
(
result
)
return
tuple
(
result
)
def
random_vertical_flip
(
image
,
boxes
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoint_flip_permutation
=
None
,
seed
=
None
):
"""Randomly flips the image and detections vertically.
The probability of flipping the image is 50%.
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
containing the bounding boxes.
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
seed: random seed
Returns:
image: image which is the same shape as input image.
If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
"""
def
_flip_image
(
image
):
# flip image
image_flipped
=
tf
.
image
.
flip_up_down
(
image
)
return
image_flipped
if
keypoints
is
not
None
and
keypoint_flip_permutation
is
None
:
raise
ValueError
(
'keypoints are provided but keypoints_flip_permutation is not provided'
)
with
tf
.
name_scope
(
'RandomVerticalFlip'
,
values
=
[
image
,
boxes
]):
result
=
[]
# random variable defining whether to do flip or not
do_a_flip_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
# flip image
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
result
.
append
(
image
)
# flip boxes
if
boxes
is
not
None
:
boxes
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_boxes_up_down
(
boxes
),
lambda
:
boxes
)
result
.
append
(
boxes
)
# flip masks
if
masks
is
not
None
:
masks
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_masks_up_down
(
masks
),
lambda
:
masks
)
result
.
append
(
masks
)
# flip keypoints
if
keypoints
is
not
None
and
keypoint_flip_permutation
is
not
None
:
permutation
=
keypoint_flip_permutation
keypoints
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
keypoint_ops
.
flip_vertical
(
keypoints
,
0.5
,
permutation
),
lambda
:
keypoints
)
result
.
append
(
keypoints
)
return
tuple
(
result
)
def
random_rotation90
(
image
,
boxes
=
None
,
masks
=
None
,
keypoints
=
None
,
seed
=
None
):
"""Randomly rotates the image and detections 90 degrees counter-clockwise.
The probability of rotating the image is 50%. This can be combined with
random_horizontal_flip and random_vertical_flip to produce an output with a
uniform distribution of the eight possible 90 degree rotation / reflection
combinations.
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
containing the bounding boxes.
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
seed: random seed
Returns:
image: image which is the same shape as input image.
If boxes, masks, and keypoints, are not None,
the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
def
_rot90_image
(
image
):
# flip image
image_rotated
=
tf
.
image
.
rot90
(
image
)
return
image_rotated
with
tf
.
name_scope
(
'RandomRotation90'
,
values
=
[
image
,
boxes
]):
result
=
[]
# random variable defining whether to rotate by 90 degrees or not
do_a_rot90_random
=
tf
.
greater
(
tf
.
random_uniform
([],
seed
=
seed
),
0.5
)
# flip image
image
=
tf
.
cond
(
do_a_rot90_random
,
lambda
:
_rot90_image
(
image
),
lambda
:
image
)
result
.
append
(
image
)
# flip boxes
if
boxes
is
not
None
:
boxes
=
tf
.
cond
(
do_a_rot90_random
,
lambda
:
_rot90_boxes
(
boxes
),
lambda
:
boxes
)
result
.
append
(
boxes
)
# flip masks
if
masks
is
not
None
:
masks
=
tf
.
cond
(
do_a_rot90_random
,
lambda
:
_rot90_masks
(
masks
),
lambda
:
masks
)
result
.
append
(
masks
)
# flip keypoints
if
keypoints
is
not
None
:
keypoints
=
tf
.
cond
(
do_a_rot90_random
,
lambda
:
keypoint_ops
.
rot90
(
keypoints
),
lambda
:
keypoints
)
result
.
append
(
keypoints
)
return
tuple
(
result
)
def
random_pixel_value_scale
(
image
,
minval
=
0.9
,
maxval
=
1.1
,
seed
=
None
):
def
random_pixel_value_scale
(
image
,
minval
=
0.9
,
maxval
=
1.1
,
seed
=
None
):
"""Scales each value in the pixels of the image.
"""Scales each value in the pixels of the image.
...
@@ -602,6 +839,7 @@ def random_jitter_boxes(boxes, ratio=0.05, seed=None):
...
@@ -602,6 +839,7 @@ def random_jitter_boxes(boxes, ratio=0.05, seed=None):
def
_strict_random_crop_image
(
image
,
def
_strict_random_crop_image
(
image
,
boxes
,
boxes
,
labels
,
labels
,
label_scores
=
None
,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
min_object_covered
=
1.0
,
min_object_covered
=
1.0
,
...
@@ -625,6 +863,8 @@ def _strict_random_crop_image(image,
...
@@ -625,6 +863,8 @@ def _strict_random_crop_image(image,
between [0, 1].
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
labels: rank 1 int32 tensor containing the object classes.
label_scores: (optional) float32 tensor of shape [num_instances]
representing the score for each box.
masks: (optional) rank 3 float32 tensor with shape
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
are of the same height, width as the input `image`.
...
@@ -645,8 +885,8 @@ def _strict_random_crop_image(image,
...
@@ -645,8 +885,8 @@ def _strict_random_crop_image(image,
Boxes are in normalized form.
Boxes are in normalized form.
labels: new labels.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
If
label_scores,
masks, or keypoints is not None, the function also returns:
label_scores: rank 1 float32 tensor with shape [num_instances].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
containing instance masks.
keypoints: rank 3 float32 tensor with shape
keypoints: rank 3 float32 tensor with shape
...
@@ -682,6 +922,9 @@ def _strict_random_crop_image(image,
...
@@ -682,6 +922,9 @@ def _strict_random_crop_image(image,
boxlist
=
box_list
.
BoxList
(
boxes
)
boxlist
=
box_list
.
BoxList
(
boxes
)
boxlist
.
add_field
(
'labels'
,
labels
)
boxlist
.
add_field
(
'labels'
,
labels
)
if
label_scores
is
not
None
:
boxlist
.
add_field
(
'label_scores'
,
label_scores
)
im_boxlist
=
box_list
.
BoxList
(
im_box_rank2
)
im_boxlist
=
box_list
.
BoxList
(
im_box_rank2
)
# remove boxes that are outside cropped image
# remove boxes that are outside cropped image
...
@@ -702,6 +945,10 @@ def _strict_random_crop_image(image,
...
@@ -702,6 +945,10 @@ def _strict_random_crop_image(image,
result
=
[
new_image
,
new_boxes
,
new_labels
]
result
=
[
new_image
,
new_boxes
,
new_labels
]
if
label_scores
is
not
None
:
new_label_scores
=
overlapping_boxlist
.
get_field
(
'label_scores'
)
result
.
append
(
new_label_scores
)
if
masks
is
not
None
:
if
masks
is
not
None
:
masks_of_boxes_inside_window
=
tf
.
gather
(
masks
,
inside_window_ids
)
masks_of_boxes_inside_window
=
tf
.
gather
(
masks
,
inside_window_ids
)
masks_of_boxes_completely_inside_window
=
tf
.
gather
(
masks_of_boxes_completely_inside_window
=
tf
.
gather
(
...
@@ -729,6 +976,7 @@ def _strict_random_crop_image(image,
...
@@ -729,6 +976,7 @@ def _strict_random_crop_image(image,
def
random_crop_image
(
image
,
def
random_crop_image
(
image
,
boxes
,
boxes
,
labels
,
labels
,
label_scores
=
None
,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
min_object_covered
=
1.0
,
min_object_covered
=
1.0
,
...
@@ -761,6 +1009,8 @@ def random_crop_image(image,
...
@@ -761,6 +1009,8 @@ def random_crop_image(image,
between [0, 1].
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
labels: rank 1 int32 tensor containing the object classes.
label_scores: (optional) float32 tensor of shape [num_instances].
representing the score for each box.
masks: (optional) rank 3 float32 tensor with shape
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
are of the same height, width as the input `image`.
...
@@ -786,8 +1036,9 @@ def random_crop_image(image,
...
@@ -786,8 +1036,9 @@ def random_crop_image(image,
form.
form.
labels: new labels.
labels: new labels.
If masks, or keypoints are not None, the function also returns:
If label_scores, masks, or keypoints are not None, the function also
returns:
label_scores: new scores.
masks: rank 3 float32 tensor with shape [num_instances, height, width]
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
containing instance masks.
keypoints: rank 3 float32 tensor with shape
keypoints: rank 3 float32 tensor with shape
...
@@ -799,6 +1050,7 @@ def random_crop_image(image,
...
@@ -799,6 +1050,7 @@ def random_crop_image(image,
image
,
image
,
boxes
,
boxes
,
labels
,
labels
,
label_scores
=
label_scores
,
masks
=
masks
,
masks
=
masks
,
keypoints
=
keypoints
,
keypoints
=
keypoints
,
min_object_covered
=
min_object_covered
,
min_object_covered
=
min_object_covered
,
...
@@ -814,13 +1066,15 @@ def random_crop_image(image,
...
@@ -814,13 +1066,15 @@ def random_crop_image(image,
do_a_crop_random
=
tf
.
greater
(
do_a_crop_random
,
random_coef
)
do_a_crop_random
=
tf
.
greater
(
do_a_crop_random
,
random_coef
)
outputs
=
[
image
,
boxes
,
labels
]
outputs
=
[
image
,
boxes
,
labels
]
if
label_scores
is
not
None
:
outputs
.
append
(
label_scores
)
if
masks
is
not
None
:
if
masks
is
not
None
:
outputs
.
append
(
masks
)
outputs
.
append
(
masks
)
if
keypoints
is
not
None
:
if
keypoints
is
not
None
:
outputs
.
append
(
keypoints
)
outputs
.
append
(
keypoints
)
result
=
tf
.
cond
(
do_a_crop_random
,
result
=
tf
.
cond
(
do_a_crop_random
,
strict_random_crop_image_fn
,
strict_random_crop_image_fn
,
lambda
:
tuple
(
outputs
))
lambda
:
tuple
(
outputs
))
return
result
return
result
...
@@ -865,7 +1119,7 @@ def random_pad_image(image,
...
@@ -865,7 +1119,7 @@ def random_pad_image(image,
form.
form.
"""
"""
if
pad_color
is
None
:
if
pad_color
is
None
:
pad_color
=
tf
.
reduce_mean
(
image
,
reduction_indice
s
=
[
0
,
1
])
pad_color
=
tf
.
reduce_mean
(
image
,
axi
s
=
[
0
,
1
])
image_shape
=
tf
.
shape
(
image
)
image_shape
=
tf
.
shape
(
image
)
image_height
=
image_shape
[
0
]
image_height
=
image_shape
[
0
]
...
@@ -902,16 +1156,22 @@ def random_pad_image(image,
...
@@ -902,16 +1156,22 @@ def random_pad_image(image,
lambda
:
tf
.
constant
(
0
,
dtype
=
tf
.
int32
))
lambda
:
tf
.
constant
(
0
,
dtype
=
tf
.
int32
))
new_image
=
tf
.
image
.
pad_to_bounding_box
(
new_image
=
tf
.
image
.
pad_to_bounding_box
(
image
,
offset_height
=
offset_height
,
offset_width
=
offset_width
,
image
,
target_height
=
target_height
,
target_width
=
target_width
)
offset_height
=
offset_height
,
offset_width
=
offset_width
,
target_height
=
target_height
,
target_width
=
target_width
)
# Setting color of the padded pixels
# Setting color of the padded pixels
image_ones
=
tf
.
ones_like
(
image
)
image_ones
=
tf
.
ones_like
(
image
)
image_ones_padded
=
tf
.
image
.
pad_to_bounding_box
(
image_ones_padded
=
tf
.
image
.
pad_to_bounding_box
(
image_ones
,
offset_height
=
offset_height
,
offset_width
=
offset_width
,
image_ones
,
target_height
=
target_height
,
target_width
=
target_width
)
offset_height
=
offset_height
,
image_color_paded
=
(
1.0
-
image_ones_padded
)
*
pad_color
offset_width
=
offset_width
,
new_image
+=
image_color_paded
target_height
=
target_height
,
target_width
=
target_width
)
image_color_padded
=
(
1.0
-
image_ones_padded
)
*
pad_color
new_image
+=
image_color_padded
# setting boxes
# setting boxes
new_window
=
tf
.
to_float
(
new_window
=
tf
.
to_float
(
...
@@ -931,13 +1191,14 @@ def random_pad_image(image,
...
@@ -931,13 +1191,14 @@ def random_pad_image(image,
def
random_crop_pad_image
(
image
,
def
random_crop_pad_image
(
image
,
boxes
,
boxes
,
labels
,
labels
,
label_scores
=
None
,
min_object_covered
=
1.0
,
min_object_covered
=
1.0
,
aspect_ratio_range
=
(
0.75
,
1.33
),
aspect_ratio_range
=
(
0.75
,
1.33
),
area_range
=
(
0.1
,
1.0
),
area_range
=
(
0.1
,
1.0
),
overlap_thresh
=
0.3
,
overlap_thresh
=
0.3
,
random_coef
=
0.0
,
random_coef
=
0.0
,
min_padded_size_ratio
=
None
,
min_padded_size_ratio
=
(
1.0
,
1.0
)
,
max_padded_size_ratio
=
None
,
max_padded_size_ratio
=
(
2.0
,
2.0
)
,
pad_color
=
None
,
pad_color
=
None
,
seed
=
None
):
seed
=
None
):
"""Randomly crops and pads the image.
"""Randomly crops and pads the image.
...
@@ -960,6 +1221,7 @@ def random_crop_pad_image(image,
...
@@ -960,6 +1221,7 @@ def random_crop_pad_image(image,
between [0, 1].
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
labels: rank 1 int32 tensor containing the object classes.
label_scores: rank 1 float32 containing the label scores.
min_object_covered: the cropped image must cover at least this fraction of
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
...
@@ -972,11 +1234,9 @@ def random_crop_pad_image(image,
...
@@ -972,11 +1234,9 @@ def random_crop_pad_image(image,
cropped image, and if it is 1.0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width. If None, it will
input image's height and width.
be set to [0.0, 0.0].
max_padded_size_ratio: max ratio of padded image height and width to the
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. If None, it will
input image's height and width.
be set to [2.0, 2.0].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the randomly
if set as None, it will be set to average color of the randomly
cropped image.
cropped image.
...
@@ -987,18 +1247,17 @@ def random_crop_pad_image(image,
...
@@ -987,18 +1247,17 @@ def random_crop_pad_image(image,
padded_boxes: boxes which is the same rank as input boxes. Boxes are in
padded_boxes: boxes which is the same rank as input boxes. Boxes are in
normalized form.
normalized form.
cropped_labels: cropped labels.
cropped_labels: cropped labels.
if label_scores is not None also returns:
cropped_label_scores: cropped label scores.
"""
"""
image_size
=
tf
.
shape
(
image
)
image_size
=
tf
.
shape
(
image
)
image_height
=
image_size
[
0
]
image_height
=
image_size
[
0
]
image_width
=
image_size
[
1
]
image_width
=
image_size
[
1
]
if
min_padded_size_ratio
is
None
:
result
=
random_crop_image
(
min_padded_size_ratio
=
tf
.
constant
([
0.0
,
0.0
],
tf
.
float32
)
if
max_padded_size_ratio
is
None
:
max_padded_size_ratio
=
tf
.
constant
([
2.0
,
2.0
],
tf
.
float32
)
cropped_image
,
cropped_boxes
,
cropped_labels
=
random_crop_image
(
image
=
image
,
image
=
image
,
boxes
=
boxes
,
boxes
=
boxes
,
labels
=
labels
,
labels
=
labels
,
label_scores
=
label_scores
,
min_object_covered
=
min_object_covered
,
min_object_covered
=
min_object_covered
,
aspect_ratio_range
=
aspect_ratio_range
,
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
area_range
=
area_range
,
...
@@ -1006,6 +1265,8 @@ def random_crop_pad_image(image,
...
@@ -1006,6 +1265,8 @@ def random_crop_pad_image(image,
random_coef
=
random_coef
,
random_coef
=
random_coef
,
seed
=
seed
)
seed
=
seed
)
cropped_image
,
cropped_boxes
,
cropped_labels
=
result
[:
3
]
min_image_size
=
tf
.
to_int32
(
min_image_size
=
tf
.
to_int32
(
tf
.
to_float
(
tf
.
stack
([
image_height
,
image_width
]))
*
tf
.
to_float
(
tf
.
stack
([
image_height
,
image_width
]))
*
min_padded_size_ratio
)
min_padded_size_ratio
)
...
@@ -1021,12 +1282,19 @@ def random_crop_pad_image(image,
...
@@ -1021,12 +1282,19 @@ def random_crop_pad_image(image,
pad_color
=
pad_color
,
pad_color
=
pad_color
,
seed
=
seed
)
seed
=
seed
)
return
padded_image
,
padded_boxes
,
cropped_labels
cropped_padded_output
=
(
padded_image
,
padded_boxes
,
cropped_labels
)
if
label_scores
is
not
None
:
cropped_label_scores
=
result
[
3
]
cropped_padded_output
+=
(
cropped_label_scores
,)
return
cropped_padded_output
def
random_crop_to_aspect_ratio
(
image
,
def
random_crop_to_aspect_ratio
(
image
,
boxes
,
boxes
,
labels
,
labels
,
label_scores
=
None
,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
aspect_ratio
=
1.0
,
aspect_ratio
=
1.0
,
...
@@ -1051,6 +1319,8 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1051,6 +1319,8 @@ def random_crop_to_aspect_ratio(image,
between [0, 1].
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
labels: rank 1 int32 tensor containing the object classes.
label_scores: (optional) float32 tensor of shape [num_instances]
representing the score for each box.
masks: (optional) rank 3 float32 tensor with shape
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
are of the same height, width as the input `image`.
...
@@ -1068,8 +1338,8 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1068,8 +1338,8 @@ def random_crop_to_aspect_ratio(image,
Boxes are in normalized form.
Boxes are in normalized form.
labels: new labels.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
If
label_scores,
masks, or keypoints is not None, the function also returns:
label_scores: new label scores.
masks: rank 3 float32 tensor with shape [num_instances, height, width]
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
containing instance masks.
keypoints: rank 3 float32 tensor with shape
keypoints: rank 3 float32 tensor with shape
...
@@ -1088,21 +1358,16 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1088,21 +1358,16 @@ def random_crop_to_aspect_ratio(image,
orig_aspect_ratio
=
tf
.
to_float
(
orig_width
)
/
tf
.
to_float
(
orig_height
)
orig_aspect_ratio
=
tf
.
to_float
(
orig_width
)
/
tf
.
to_float
(
orig_height
)
new_aspect_ratio
=
tf
.
constant
(
aspect_ratio
,
dtype
=
tf
.
float32
)
new_aspect_ratio
=
tf
.
constant
(
aspect_ratio
,
dtype
=
tf
.
float32
)
def
target_height_fn
():
def
target_height_fn
():
return
tf
.
to_int32
(
return
tf
.
to_int32
(
tf
.
round
(
tf
.
to_float
(
orig_width
)
/
new_aspect_ratio
))
tf
.
round
(
tf
.
to_float
(
orig_height
)
*
orig_aspect_ratio
/
new_aspect_ratio
))
target_height
=
tf
.
cond
(
orig_aspect_ratio
>=
new_aspect_ratio
,
target_height
=
tf
.
cond
(
lambda
:
orig_height
,
target_height_fn
)
orig_aspect_ratio
>=
new_aspect_ratio
,
lambda
:
orig_height
,
target_height_fn
)
def
target_width_fn
():
def
target_width_fn
():
return
tf
.
to_int32
(
return
tf
.
to_int32
(
tf
.
round
(
tf
.
to_float
(
orig_height
)
*
new_aspect_ratio
))
tf
.
round
(
tf
.
to_float
(
orig_width
)
*
new_aspect_ratio
/
orig_aspect_ratio
))
target_width
=
tf
.
cond
(
orig_aspect_ratio
<=
new_aspect_ratio
,
target_width
=
tf
.
cond
(
lambda
:
orig_width
,
target_width_fn
)
orig_aspect_ratio
<=
new_aspect_ratio
,
lambda
:
orig_width
,
target_width_fn
)
# either offset_height = 0 and offset_width is randomly chosen from
# either offset_height = 0 and offset_width is randomly chosen from
# [0, offset_width - target_width), or else offset_width = 0 and
# [0, offset_width - target_width), or else offset_width = 0 and
...
@@ -1122,6 +1387,9 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1122,6 +1387,9 @@ def random_crop_to_aspect_ratio(image,
boxlist
=
box_list
.
BoxList
(
boxes
)
boxlist
=
box_list
.
BoxList
(
boxes
)
boxlist
.
add_field
(
'labels'
,
labels
)
boxlist
.
add_field
(
'labels'
,
labels
)
if
label_scores
is
not
None
:
boxlist
.
add_field
(
'label_scores'
,
label_scores
)
im_boxlist
=
box_list
.
BoxList
(
tf
.
expand_dims
(
im_box
,
0
))
im_boxlist
=
box_list
.
BoxList
(
tf
.
expand_dims
(
im_box
,
0
))
# remove boxes whose overlap with the image is less than overlap_thresh
# remove boxes whose overlap with the image is less than overlap_thresh
...
@@ -1133,13 +1401,16 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1133,13 +1401,16 @@ def random_crop_to_aspect_ratio(image,
new_boxlist
=
box_list_ops
.
change_coordinate_frame
(
overlapping_boxlist
,
new_boxlist
=
box_list_ops
.
change_coordinate_frame
(
overlapping_boxlist
,
im_box
)
im_box
)
new_boxlist
=
box_list_ops
.
clip_to_window
(
new_boxlist
,
new_boxlist
=
box_list_ops
.
clip_to_window
(
new_boxlist
,
tf
.
constant
(
tf
.
constant
([
0.0
,
0.0
,
1.0
,
1.0
],
[
0.0
,
0.0
,
1.0
,
1.0
],
tf
.
float32
))
tf
.
float32
))
new_boxes
=
new_boxlist
.
get
()
new_boxes
=
new_boxlist
.
get
()
result
=
[
new_image
,
new_boxes
,
new_labels
]
result
=
[
new_image
,
new_boxes
,
new_labels
]
if
label_scores
is
not
None
:
new_label_scores
=
overlapping_boxlist
.
get_field
(
'label_scores'
)
result
.
append
(
new_label_scores
)
if
masks
is
not
None
:
if
masks
is
not
None
:
masks_inside_window
=
tf
.
gather
(
masks
,
keep_ids
)
masks_inside_window
=
tf
.
gather
(
masks
,
keep_ids
)
masks_box_begin
=
tf
.
stack
([
0
,
offset_height
,
offset_width
])
masks_box_begin
=
tf
.
stack
([
0
,
offset_height
,
offset_width
])
...
@@ -1158,6 +1429,122 @@ def random_crop_to_aspect_ratio(image,
...
@@ -1158,6 +1429,122 @@ def random_crop_to_aspect_ratio(image,
return
tuple
(
result
)
return
tuple
(
result
)
def
random_pad_to_aspect_ratio
(
image
,
boxes
,
masks
=
None
,
keypoints
=
None
,
aspect_ratio
=
1.0
,
min_padded_size_ratio
=
(
1.0
,
1.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
seed
=
None
):
"""Randomly zero pads an image to the specified aspect ratio.
Pads the image so that the resulting image will have the specified aspect
ratio without scaling less than the min_padded_size_ratio or more than the
max_padded_size_ratio. If the min_padded_size_ratio or max_padded_size_ratio
is lower than what is possible to maintain the aspect ratio, then this method
will use the least padding to achieve the specified aspect ratio.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
aspect_ratio: aspect ratio of the final image.
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width.
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If label_scores, masks, or keypoints is not None, the function also returns:
label_scores: new label scores.
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: If image is not a 3D tensor.
"""
if
len
(
image
.
get_shape
())
!=
3
:
raise
ValueError
(
'Image should be 3D tensor'
)
with
tf
.
name_scope
(
'RandomPadToAspectRatio'
,
values
=
[
image
]):
image_shape
=
tf
.
shape
(
image
)
image_height
=
tf
.
to_float
(
image_shape
[
0
])
image_width
=
tf
.
to_float
(
image_shape
[
1
])
image_aspect_ratio
=
image_width
/
image_height
new_aspect_ratio
=
tf
.
constant
(
aspect_ratio
,
dtype
=
tf
.
float32
)
target_height
=
tf
.
cond
(
image_aspect_ratio
<=
new_aspect_ratio
,
lambda
:
image_height
,
lambda
:
image_width
/
new_aspect_ratio
)
target_width
=
tf
.
cond
(
image_aspect_ratio
>=
new_aspect_ratio
,
lambda
:
image_width
,
lambda
:
image_height
*
new_aspect_ratio
)
min_height
=
tf
.
maximum
(
min_padded_size_ratio
[
0
]
*
image_height
,
target_height
)
min_width
=
tf
.
maximum
(
min_padded_size_ratio
[
1
]
*
image_width
,
target_width
)
max_height
=
tf
.
maximum
(
max_padded_size_ratio
[
0
]
*
image_height
,
target_height
)
max_width
=
tf
.
maximum
(
max_padded_size_ratio
[
1
]
*
image_width
,
target_width
)
min_scale
=
tf
.
maximum
(
min_height
/
target_height
,
min_width
/
target_width
)
max_scale
=
tf
.
minimum
(
max_height
/
target_height
,
max_width
/
target_width
)
scale
=
tf
.
random_uniform
([],
min_scale
,
max_scale
,
seed
=
seed
)
target_height
=
scale
*
target_height
target_width
=
scale
*
target_width
new_image
=
tf
.
image
.
pad_to_bounding_box
(
image
,
0
,
0
,
tf
.
to_int32
(
target_height
),
tf
.
to_int32
(
target_width
))
im_box
=
tf
.
stack
([
0.0
,
0.0
,
target_height
/
image_height
,
target_width
/
image_width
])
boxlist
=
box_list
.
BoxList
(
boxes
)
new_boxlist
=
box_list_ops
.
change_coordinate_frame
(
boxlist
,
im_box
)
new_boxes
=
new_boxlist
.
get
()
result
=
[
new_image
,
new_boxes
]
if
masks
is
not
None
:
new_masks
=
tf
.
expand_dims
(
masks
,
-
1
)
new_masks
=
tf
.
image
.
pad_to_bounding_box
(
new_masks
,
0
,
0
,
tf
.
to_int32
(
target_height
),
tf
.
to_int32
(
target_width
))
new_masks
=
tf
.
squeeze
(
new_masks
,
[
-
1
])
result
.
append
(
new_masks
)
if
keypoints
is
not
None
:
new_keypoints
=
keypoint_ops
.
change_coordinate_frame
(
keypoints
,
im_box
)
result
.
append
(
new_keypoints
)
return
tuple
(
result
)
def
random_black_patches
(
image
,
def
random_black_patches
(
image
,
max_black_patches
=
10
,
max_black_patches
=
10
,
probability
=
0.5
,
probability
=
0.5
,
...
@@ -1213,8 +1600,8 @@ def random_black_patches(image,
...
@@ -1213,8 +1600,8 @@ def random_black_patches(image,
with
tf
.
name_scope
(
'RandomBlackPatchInImage'
,
values
=
[
image
]):
with
tf
.
name_scope
(
'RandomBlackPatchInImage'
,
values
=
[
image
]):
for
_
in
range
(
max_black_patches
):
for
_
in
range
(
max_black_patches
):
random_prob
=
tf
.
random_uniform
(
[],
minval
=
0.0
,
maxval
=
1.0
,
random_prob
=
tf
.
random_uniform
(
dtype
=
tf
.
float32
,
seed
=
random_seed
)
[],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
,
seed
=
random_seed
)
image
=
tf
.
cond
(
image
=
tf
.
cond
(
tf
.
greater
(
random_prob
,
probability
),
lambda
:
image
,
tf
.
greater
(
random_prob
,
probability
),
lambda
:
image
,
lambda
:
add_black_patch_to_image
(
image
))
lambda
:
add_black_patch_to_image
(
image
))
...
@@ -1255,9 +1642,7 @@ def random_resize_method(image, target_size):
...
@@ -1255,9 +1642,7 @@ def random_resize_method(image, target_size):
return
resized_image
return
resized_image
def
_compute_new_static_size
(
image
,
def
_compute_new_static_size
(
image
,
min_dimension
,
max_dimension
):
min_dimension
,
max_dimension
):
"""Compute new static shape for resize_to_range method."""
"""Compute new static shape for resize_to_range method."""
image_shape
=
image
.
get_shape
().
as_list
()
image_shape
=
image
.
get_shape
().
as_list
()
orig_height
=
image_shape
[
0
]
orig_height
=
image_shape
[
0
]
...
@@ -1292,9 +1677,7 @@ def _compute_new_static_size(image,
...
@@ -1292,9 +1677,7 @@ def _compute_new_static_size(image,
return
tf
.
constant
(
new_size
)
return
tf
.
constant
(
new_size
)
def
_compute_new_dynamic_size
(
image
,
def
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
):
min_dimension
,
max_dimension
):
"""Compute new dynamic shape for resize_to_range method."""
"""Compute new dynamic shape for resize_to_range method."""
image_shape
=
tf
.
shape
(
image
)
image_shape
=
tf
.
shape
(
image
)
orig_height
=
tf
.
to_float
(
image_shape
[
0
])
orig_height
=
tf
.
to_float
(
image_shape
[
0
])
...
@@ -1335,6 +1718,7 @@ def resize_to_range(image,
...
@@ -1335,6 +1718,7 @@ def resize_to_range(image,
masks
=
None
,
masks
=
None
,
min_dimension
=
None
,
min_dimension
=
None
,
max_dimension
=
None
,
max_dimension
=
None
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
align_corners
=
False
):
align_corners
=
False
):
"""Resizes an image so its dimensions are within the provided value.
"""Resizes an image so its dimensions are within the provided value.
...
@@ -1352,6 +1736,8 @@ def resize_to_range(image,
...
@@ -1352,6 +1736,8 @@ def resize_to_range(image,
dimension.
dimension.
max_dimension: (optional) (scalar) maximum allowed size
max_dimension: (optional) (scalar) maximum allowed size
of the larger image dimension.
of the larger image dimension.
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
and output. Defaults to False.
...
@@ -1372,25 +1758,71 @@ def resize_to_range(image,
...
@@ -1372,25 +1758,71 @@ def resize_to_range(image,
with
tf
.
name_scope
(
'ResizeToRange'
,
values
=
[
image
,
min_dimension
]):
with
tf
.
name_scope
(
'ResizeToRange'
,
values
=
[
image
,
min_dimension
]):
if
image
.
get_shape
().
is_fully_defined
():
if
image
.
get_shape
().
is_fully_defined
():
new_size
=
_compute_new_static_size
(
image
,
min_dimension
,
new_size
=
_compute_new_static_size
(
image
,
min_dimension
,
max_dimension
)
max_dimension
)
else
:
else
:
new_size
=
_compute_new_dynamic_size
(
image
,
min_dimension
,
new_size
=
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
)
max_dimension
)
new_image
=
tf
.
image
.
resize_images
(
new_image
=
tf
.
image
.
resize_images
(
image
,
new_size
,
image
,
new_size
,
method
=
method
,
align_corners
=
align_corners
)
align_corners
=
align_corners
)
result
=
new_image
result
=
new_image
if
masks
is
not
None
:
if
masks
is
not
None
:
new_masks
=
tf
.
expand_dims
(
masks
,
3
)
new_masks
=
tf
.
expand_dims
(
masks
,
3
)
new_masks
=
tf
.
image
.
resize_nearest_neighbor
(
new_masks
,
new_size
,
new_masks
=
tf
.
image
.
resize_nearest_neighbor
(
align_corners
=
align_corners
)
new_masks
,
new_size
,
align_corners
=
align_corners
)
new_masks
=
tf
.
squeeze
(
new_masks
,
3
)
new_masks
=
tf
.
squeeze
(
new_masks
,
3
)
result
=
[
new_image
,
new_masks
]
result
=
[
new_image
,
new_masks
]
return
result
return
result
# TODO: Make sure the static shapes are preserved.
def
resize_to_min_dimension
(
image
,
masks
=
None
,
min_dimension
=
600
):
"""Resizes image and masks given the min size maintaining the aspect ratio.
If one of the image dimensions is smaller that min_dimension, it will scale
the image such that its smallest dimension is equal to min_dimension.
Otherwise, will keep the image size as is.
Args:
image: a tensor of size [height, width, channels].
masks: (optional) a tensors of size [num_instances, height, width].
min_dimension: minimum image dimension.
Returns:
a tuple containing the following:
Resized image. A tensor of size [new_height, new_width, channels].
(optional) Resized masks. A tensor of
size [num_instances, new_height, new_width].
Raises:
ValueError: if the image is not a 3D tensor.
"""
if
len
(
image
.
get_shape
())
!=
3
:
raise
ValueError
(
'Image should be 3D tensor'
)
with
tf
.
name_scope
(
'ResizeGivenMinDimension'
,
values
=
[
image
,
min_dimension
]):
image_height
=
tf
.
shape
(
image
)[
0
]
image_width
=
tf
.
shape
(
image
)[
1
]
min_image_dimension
=
tf
.
minimum
(
image_height
,
image_width
)
min_target_dimension
=
tf
.
maximum
(
min_image_dimension
,
min_dimension
)
target_ratio
=
tf
.
to_float
(
min_target_dimension
)
/
tf
.
to_float
(
min_image_dimension
)
target_height
=
tf
.
to_int32
(
tf
.
to_float
(
image_height
)
*
target_ratio
)
target_width
=
tf
.
to_int32
(
tf
.
to_float
(
image_width
)
*
target_ratio
)
image
=
tf
.
image
.
resize_bilinear
(
tf
.
expand_dims
(
image
,
axis
=
0
),
size
=
[
target_height
,
target_width
],
align_corners
=
True
)
result
=
tf
.
squeeze
(
image
,
axis
=
0
)
if
masks
is
not
None
:
masks
=
tf
.
image
.
resize_nearest_neighbor
(
tf
.
expand_dims
(
masks
,
axis
=
3
),
size
=
[
target_height
,
target_width
],
align_corners
=
True
)
result
=
(
result
,
tf
.
squeeze
(
masks
,
axis
=
3
))
return
result
def
scale_boxes_to_pixel_coordinates
(
image
,
boxes
,
keypoints
=
None
):
def
scale_boxes_to_pixel_coordinates
(
image
,
boxes
,
keypoints
=
None
):
"""Scales boxes from normalized to pixel coordinates.
"""Scales boxes from normalized to pixel coordinates.
...
@@ -1433,7 +1865,8 @@ def resize_image(image,
...
@@ -1433,7 +1865,8 @@ def resize_image(image,
with
tf
.
name_scope
(
with
tf
.
name_scope
(
'ResizeImage'
,
'ResizeImage'
,
values
=
[
image
,
new_height
,
new_width
,
method
,
align_corners
]):
values
=
[
image
,
new_height
,
new_width
,
method
,
align_corners
]):
new_image
=
tf
.
image
.
resize_images
(
image
,
[
new_height
,
new_width
],
new_image
=
tf
.
image
.
resize_images
(
image
,
[
new_height
,
new_width
],
method
=
method
,
method
=
method
,
align_corners
=
align_corners
)
align_corners
=
align_corners
)
result
=
new_image
result
=
new_image
...
@@ -1451,8 +1884,7 @@ def resize_image(image,
...
@@ -1451,8 +1884,7 @@ def resize_image(image,
new_masks
=
tf
.
reshape
(
masks
,
[
0
,
new_size
[
0
],
new_size
[
1
]])
new_masks
=
tf
.
reshape
(
masks
,
[
0
,
new_size
[
0
],
new_size
[
1
]])
return
new_masks
return
new_masks
masks
=
tf
.
cond
(
num_instances
>
0
,
masks
=
tf
.
cond
(
num_instances
>
0
,
resize_masks_branch
,
resize_masks_branch
,
reshape_masks_branch
)
reshape_masks_branch
)
result
=
[
new_image
,
masks
]
result
=
[
new_image
,
masks
]
...
@@ -1520,6 +1952,7 @@ def rgb_to_gray(image):
...
@@ -1520,6 +1952,7 @@ def rgb_to_gray(image):
def
ssd_random_crop
(
image
,
def
ssd_random_crop
(
image
,
boxes
,
boxes
,
labels
,
labels
,
label_scores
=
None
,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
min_object_covered
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
min_object_covered
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
...
@@ -1542,6 +1975,7 @@ def ssd_random_crop(image,
...
@@ -1542,6 +1975,7 @@ def ssd_random_crop(image,
between [0, 1].
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
labels: rank 1 int32 tensor containing the object classes.
label_scores: rank 1 float32 tensor containing the scores.
masks: (optional) rank 3 float32 tensor with shape
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
are of the same height, width as the input `image`.
...
@@ -1567,13 +2001,14 @@ def ssd_random_crop(image,
...
@@ -1567,13 +2001,14 @@ def ssd_random_crop(image,
Boxes are in normalized form.
Boxes are in normalized form.
labels: new labels.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
If
label_scores,
masks, or keypoints is not None, the function also returns:
label_scores: new label scores.
masks: rank 3 float32 tensor with shape [num_instances, height, width]
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
containing instance masks.
keypoints: rank 3 float32 tensor with shape
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
[num_instances, num_keypoints, 2]
"""
"""
def
random_crop_selector
(
selected_result
,
index
):
def
random_crop_selector
(
selected_result
,
index
):
"""Applies random_crop_image to selected result.
"""Applies random_crop_image to selected result.
...
@@ -1587,8 +2022,12 @@ def ssd_random_crop(image,
...
@@ -1587,8 +2022,12 @@ def ssd_random_crop(image,
"""
"""
i
=
3
i
=
3
image
,
boxes
,
labels
=
selected_result
[:
i
]
image
,
boxes
,
labels
=
selected_result
[:
i
]
selected_label_scores
=
None
selected_masks
=
None
selected_masks
=
None
selected_keypoints
=
None
selected_keypoints
=
None
if
label_scores
is
not
None
:
selected_label_scores
=
selected_result
[
i
]
i
+=
1
if
masks
is
not
None
:
if
masks
is
not
None
:
selected_masks
=
selected_result
[
i
]
selected_masks
=
selected_result
[
i
]
i
+=
1
i
+=
1
...
@@ -1599,6 +2038,7 @@ def ssd_random_crop(image,
...
@@ -1599,6 +2038,7 @@ def ssd_random_crop(image,
image
=
image
,
image
=
image
,
boxes
=
boxes
,
boxes
=
boxes
,
labels
=
labels
,
labels
=
labels
,
label_scores
=
selected_label_scores
,
masks
=
selected_masks
,
masks
=
selected_masks
,
keypoints
=
selected_keypoints
,
keypoints
=
selected_keypoints
,
min_object_covered
=
min_object_covered
[
index
],
min_object_covered
=
min_object_covered
[
index
],
...
@@ -1610,7 +2050,8 @@ def ssd_random_crop(image,
...
@@ -1610,7 +2050,8 @@ def ssd_random_crop(image,
result
=
_apply_with_random_selector_tuples
(
result
=
_apply_with_random_selector_tuples
(
tuple
(
tuple
(
t
for
t
in
(
image
,
boxes
,
labels
,
masks
,
keypoints
)
if
t
is
not
None
),
t
for
t
in
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
)
if
t
is
not
None
),
random_crop_selector
,
random_crop_selector
,
num_cases
=
len
(
min_object_covered
))
num_cases
=
len
(
min_object_covered
))
return
result
return
result
...
@@ -1619,13 +2060,14 @@ def ssd_random_crop(image,
...
@@ -1619,13 +2060,14 @@ def ssd_random_crop(image,
def
ssd_random_crop_pad
(
image
,
def
ssd_random_crop_pad
(
image
,
boxes
,
boxes
,
labels
,
labels
,
label_scores
=
None
,
min_object_covered
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
min_object_covered
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
aspect_ratio_range
=
((
0.5
,
2.0
),)
*
6
,
aspect_ratio_range
=
((
0.5
,
2.0
),)
*
6
,
area_range
=
((
0.1
,
1.0
),)
*
6
,
area_range
=
((
0.1
,
1.0
),)
*
6
,
overlap_thresh
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
overlap_thresh
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
6
,
random_coef
=
(
0.15
,)
*
6
,
min_padded_size_ratio
=
(
None
,)
*
6
,
min_padded_size_ratio
=
(
(
1.0
,
1.0
)
,)
*
6
,
max_padded_size_ratio
=
(
None
,)
*
6
,
max_padded_size_ratio
=
(
(
2.0
,
2.0
)
,)
*
6
,
pad_color
=
(
None
,)
*
6
,
pad_color
=
(
None
,)
*
6
,
seed
=
None
):
seed
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
"""Random crop preprocessing with default parameters as in SSD paper.
...
@@ -1642,6 +2084,8 @@ def ssd_random_crop_pad(image,
...
@@ -1642,6 +2084,8 @@ def ssd_random_crop_pad(image,
between [0, 1].
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
labels: rank 1 int32 tensor containing the object classes.
label_scores: float32 tensor of shape [num_instances] representing the
score for each box.
min_object_covered: the cropped image must cover at least this fraction of
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
...
@@ -1654,11 +2098,9 @@ def ssd_random_crop_pad(image,
...
@@ -1654,11 +2098,9 @@ def ssd_random_crop_pad(image,
cropped image, and if it is 1.0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width. If None, it will
input image's height and width.
be set to [0.0, 0.0].
max_padded_size_ratio: max ratio of padded image height and width to the
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. If None, it will
input image's height and width.
be set to [2.0, 2.0].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the randomly
if set as None, it will be set to average color of the randomly
cropped image.
cropped image.
...
@@ -1669,14 +2111,21 @@ def ssd_random_crop_pad(image,
...
@@ -1669,14 +2111,21 @@ def ssd_random_crop_pad(image,
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
form.
new_labels: new labels.
new_labels: new labels.
new_label_scores: new label scores.
"""
"""
def
random_crop_pad_selector
(
image_boxes_labels
,
index
):
def
random_crop_pad_selector
(
image_boxes_labels
,
index
):
image
,
boxes
,
labels
=
image_boxes_labels
i
=
3
image
,
boxes
,
labels
=
image_boxes_labels
[:
i
]
selected_label_scores
=
None
if
label_scores
is
not
None
:
selected_label_scores
=
image_boxes_labels
[
i
]
return
random_crop_pad_image
(
return
random_crop_pad_image
(
image
,
image
,
boxes
,
boxes
,
labels
,
labels
,
selected_label_scores
,
min_object_covered
=
min_object_covered
[
index
],
min_object_covered
=
min_object_covered
[
index
],
aspect_ratio_range
=
aspect_ratio_range
[
index
],
aspect_ratio_range
=
aspect_ratio_range
[
index
],
area_range
=
area_range
[
index
],
area_range
=
area_range
[
index
],
...
@@ -1687,17 +2136,17 @@ def ssd_random_crop_pad(image,
...
@@ -1687,17 +2136,17 @@ def ssd_random_crop_pad(image,
pad_color
=
pad_color
[
index
],
pad_color
=
pad_color
[
index
],
seed
=
seed
)
seed
=
seed
)
new_image
,
new_boxes
,
new_labels
=
_apply_with_random_selector_tuples
(
return
_apply_with_random_selector_tuples
(
(
image
,
boxes
,
labels
),
tuple
(
t
for
t
in
(
image
,
boxes
,
labels
,
label_scores
)
if
t
is
not
None
),
random_crop_pad_selector
,
random_crop_pad_selector
,
num_cases
=
len
(
min_object_covered
))
num_cases
=
len
(
min_object_covered
))
return
new_image
,
new_boxes
,
new_labels
def
ssd_random_crop_fixed_aspect_ratio
(
def
ssd_random_crop_fixed_aspect_ratio
(
image
,
image
,
boxes
,
boxes
,
labels
,
labels
,
label_scores
=
None
,
masks
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoints
=
None
,
min_object_covered
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
min_object_covered
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
...
@@ -1722,6 +2171,8 @@ def ssd_random_crop_fixed_aspect_ratio(
...
@@ -1722,6 +2171,8 @@ def ssd_random_crop_fixed_aspect_ratio(
between [0, 1].
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
labels: rank 1 int32 tensor containing the object classes.
label_scores: (optional) float32 tensor of shape [num_instances]
representing the score for each box.
masks: (optional) rank 3 float32 tensor with shape
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
are of the same height, width as the input `image`.
...
@@ -1747,23 +2198,26 @@ def ssd_random_crop_fixed_aspect_ratio(
...
@@ -1747,23 +2198,26 @@ def ssd_random_crop_fixed_aspect_ratio(
Boxes are in normalized form.
Boxes are in normalized form.
labels: new labels.
labels: new labels.
If masks
,
or keypoints is not None, the function also returns:
If masks or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
containing instance masks.
keypoints: rank 3 float32 tensor with shape
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
[num_instances, num_keypoints, 2]
"""
"""
aspect_ratio_range
=
((
aspect_ratio
,
aspect_ratio
),)
*
len
(
area_range
)
aspect_ratio_range
=
((
aspect_ratio
,
aspect_ratio
),)
*
len
(
area_range
)
crop_result
=
ssd_random_crop
(
image
,
boxes
,
labels
,
masks
,
keypoints
,
crop_result
=
ssd_random_crop
(
min_object_covered
,
aspect_ratio_range
,
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
,
min_object_covered
,
area_range
,
overlap_thresh
,
random_coef
,
seed
)
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
)
i
=
3
i
=
3
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_label_scores
=
None
new_masks
=
None
new_masks
=
None
new_keypoints
=
None
new_keypoints
=
None
if
label_scores
is
not
None
:
new_label_scores
=
crop_result
[
i
]
i
+=
1
if
masks
is
not
None
:
if
masks
is
not
None
:
new_masks
=
crop_result
[
i
]
new_masks
=
crop_result
[
i
]
i
+=
1
i
+=
1
...
@@ -1773,6 +2227,7 @@ def ssd_random_crop_fixed_aspect_ratio(
...
@@ -1773,6 +2227,7 @@ def ssd_random_crop_fixed_aspect_ratio(
new_image
,
new_image
,
new_boxes
,
new_boxes
,
new_labels
,
new_labels
,
new_label_scores
,
new_masks
,
new_masks
,
new_keypoints
,
new_keypoints
,
aspect_ratio
=
aspect_ratio
,
aspect_ratio
=
aspect_ratio
,
...
@@ -1781,11 +2236,121 @@ def ssd_random_crop_fixed_aspect_ratio(
...
@@ -1781,11 +2236,121 @@ def ssd_random_crop_fixed_aspect_ratio(
return
result
return
result
def
get_default_func_arg_map
(
include_instance_masks
=
False
,
def
ssd_random_crop_pad_fixed_aspect_ratio
(
image
,
boxes
,
labels
,
label_scores
=
None
,
masks
=
None
,
keypoints
=
None
,
min_object_covered
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
aspect_ratio
=
1.0
,
aspect_ratio_range
=
((
0.5
,
2.0
),)
*
7
,
area_range
=
((
0.1
,
1.0
),)
*
7
,
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
7
,
min_padded_size_ratio
=
(
1.0
,
1.0
),
max_padded_size_ratio
=
(
2.0
,
2.0
),
seed
=
None
):
"""Random crop and pad preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
The only difference is that after the initial crop, images are zero-padded
to a fixed aspect ratio instead of being resized to that aspect ratio.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
label_scores: (optional) float32 tensor of shape [num_instances]
representing the score for each box.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio: the final aspect ratio to pad to.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width.
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
crop_result
=
ssd_random_crop
(
image
,
boxes
,
labels
,
label_scores
,
masks
,
keypoints
,
min_object_covered
,
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
)
i
=
3
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_label_scores
=
None
new_masks
=
None
new_keypoints
=
None
if
label_scores
is
not
None
:
new_label_scores
=
crop_result
[
i
]
i
+=
1
if
masks
is
not
None
:
new_masks
=
crop_result
[
i
]
i
+=
1
if
keypoints
is
not
None
:
new_keypoints
=
crop_result
[
i
]
result
=
random_pad_to_aspect_ratio
(
new_image
,
new_boxes
,
new_masks
,
new_keypoints
,
aspect_ratio
=
aspect_ratio
,
min_padded_size_ratio
=
min_padded_size_ratio
,
max_padded_size_ratio
=
max_padded_size_ratio
,
seed
=
seed
)
result
=
list
(
result
)
if
new_label_scores
is
not
None
:
result
.
insert
(
2
,
new_label_scores
)
result
.
insert
(
2
,
new_labels
)
result
=
tuple
(
result
)
return
result
def
get_default_func_arg_map
(
include_label_scores
=
False
,
include_instance_masks
=
False
,
include_keypoints
=
False
):
include_keypoints
=
False
):
"""Returns the default mapping from a preprocessor function to its args.
"""Returns the default mapping from a preprocessor function to its args.
Args:
Args:
include_label_scores: If True, preprocessing functions will modify the
label scores, too.
include_instance_masks: If True, preprocessing functions will modify the
include_instance_masks: If True, preprocessing functions will modify the
instance masks, too.
instance masks, too.
include_keypoints: If True, preprocessing functions will modify the
include_keypoints: If True, preprocessing functions will modify the
...
@@ -1794,6 +2359,10 @@ def get_default_func_arg_map(include_instance_masks=False,
...
@@ -1794,6 +2359,10 @@ def get_default_func_arg_map(include_instance_masks=False,
Returns:
Returns:
A map from preprocessing functions to the arguments they receive.
A map from preprocessing functions to the arguments they receive.
"""
"""
groundtruth_label_scores
=
None
if
include_label_scores
:
groundtruth_label_scores
=
(
fields
.
InputDataFields
.
groundtruth_label_scores
)
groundtruth_instance_masks
=
None
groundtruth_instance_masks
=
None
if
include_instance_masks
:
if
include_instance_masks
:
groundtruth_instance_masks
=
(
groundtruth_instance_masks
=
(
...
@@ -1805,12 +2374,24 @@ def get_default_func_arg_map(include_instance_masks=False,
...
@@ -1805,12 +2374,24 @@ def get_default_func_arg_map(include_instance_masks=False,
prep_func_arg_map
=
{
prep_func_arg_map
=
{
normalize_image
:
(
fields
.
InputDataFields
.
image
,),
normalize_image
:
(
fields
.
InputDataFields
.
image
,),
random_horizontal_flip
:
(
fields
.
InputDataFields
.
image
,
random_horizontal_flip
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
random_vertical_flip
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
random_rotation90
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_instance_masks
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
groundtruth_keypoints
,),
random_pixel_value_scale
:
(
fields
.
InputDataFields
.
image
,),
random_pixel_value_scale
:
(
fields
.
InputDataFields
.
image
,),
random_image_scale
:
(
fields
.
InputDataFields
.
image
,
random_image_scale
:
(
fields
.
InputDataFields
.
image
,
groundtruth_instance_masks
,),
groundtruth_instance_masks
,),
random_rgb_to_gray
:
(
fields
.
InputDataFields
.
image
,),
random_rgb_to_gray
:
(
fields
.
InputDataFields
.
image
,),
random_adjust_brightness
:
(
fields
.
InputDataFields
.
image
,),
random_adjust_brightness
:
(
fields
.
InputDataFields
.
image
,),
...
@@ -1819,54 +2400,79 @@ def get_default_func_arg_map(include_instance_masks=False,
...
@@ -1819,54 +2400,79 @@ def get_default_func_arg_map(include_instance_masks=False,
random_adjust_saturation
:
(
fields
.
InputDataFields
.
image
,),
random_adjust_saturation
:
(
fields
.
InputDataFields
.
image
,),
random_distort_color
:
(
fields
.
InputDataFields
.
image
,),
random_distort_color
:
(
fields
.
InputDataFields
.
image
,),
random_jitter_boxes
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,),
random_jitter_boxes
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,),
random_crop_image
:
(
fields
.
InputDataFields
.
image
,
random_crop_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_scores
,
groundtruth_instance_masks
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
groundtruth_keypoints
,),
random_pad_image
:
(
fields
.
InputDataFields
.
image
,
random_pad_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
),
fields
.
InputDataFields
.
groundtruth_boxes
),
random_crop_pad_image
:
(
fields
.
InputDataFields
.
image
,
random_crop_pad_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
),
fields
.
InputDataFields
.
groundtruth_classes
,
random_crop_to_aspect_ratio
:
(
fields
.
InputDataFields
.
image
,
groundtruth_label_scores
),
random_crop_to_aspect_ratio
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_scores
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
random_pad_to_aspect_ratio
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_instance_masks
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
groundtruth_keypoints
,),
random_black_patches
:
(
fields
.
InputDataFields
.
image
,),
random_black_patches
:
(
fields
.
InputDataFields
.
image
,),
retain_boxes_above_threshold
:
(
retain_boxes_above_threshold
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_label_scores
,
groundtruth_label_scores
,
groundtruth_instance_masks
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
groundtruth_keypoints
,),
image_to_float
:
(
fields
.
InputDataFields
.
image
,),
image_to_float
:
(
fields
.
InputDataFields
.
image
,),
random_resize_method
:
(
fields
.
InputDataFields
.
image
,),
random_resize_method
:
(
fields
.
InputDataFields
.
image
,),
resize_to_range
:
(
fields
.
InputDataFields
.
image
,
resize_to_range
:
(
fields
.
InputDataFields
.
image
,
groundtruth_instance_masks
,),
resize_to_min_dimension
:
(
fields
.
InputDataFields
.
image
,
groundtruth_instance_masks
,),
groundtruth_instance_masks
,),
scale_boxes_to_pixel_coordinates
:
(
scale_boxes_to_pixel_coordinates
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_keypoints
,),
groundtruth_keypoints
,),
flip_boxes
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,),
resize_image
:
(
resize_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
image
,
groundtruth_instance_masks
,),
groundtruth_instance_masks
,),
subtract_channel_mean
:
(
fields
.
InputDataFields
.
image
,),
subtract_channel_mean
:
(
fields
.
InputDataFields
.
image
,),
one_hot_encoding
:
(
fields
.
InputDataFields
.
groundtruth_image_classes
,),
one_hot_encoding
:
(
fields
.
InputDataFields
.
groundtruth_image_classes
,),
rgb_to_gray
:
(
fields
.
InputDataFields
.
image
,),
rgb_to_gray
:
(
fields
.
InputDataFields
.
image
,),
ssd_random_crop
:
(
fields
.
InputDataFields
.
image
,
ssd_random_crop
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_scores
,
groundtruth_instance_masks
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
groundtruth_keypoints
,),
ssd_random_crop_pad
:
(
fields
.
InputDataFields
.
image
,
ssd_random_crop_pad
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
),
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_scores
),
ssd_random_crop_fixed_aspect_ratio
:
(
ssd_random_crop_fixed_aspect_ratio
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_scores
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
ssd_random_crop_pad_fixed_aspect_ratio
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_label_scores
,
groundtruth_instance_masks
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
groundtruth_keypoints
,),
}
}
...
@@ -1936,6 +2542,7 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
...
@@ -1936,6 +2542,7 @@ def preprocess(tensor_dict, preprocess_options, func_arg_map=None):
def
get_arg
(
key
):
def
get_arg
(
key
):
return
tensor_dict
[
key
]
if
key
is
not
None
else
None
return
tensor_dict
[
key
]
if
key
is
not
None
else
None
args
=
[
get_arg
(
a
)
for
a
in
arg_names
]
args
=
[
get_arg
(
a
)
for
a
in
arg_names
]
results
=
func
(
*
args
,
**
params
)
results
=
func
(
*
args
,
**
params
)
if
not
isinstance
(
results
,
(
list
,
tuple
)):
if
not
isinstance
(
results
,
(
list
,
tuple
)):
...
...
research/object_detection/core/preprocessor_test.py
View file @
e7de233b
...
@@ -60,6 +60,10 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -60,6 +60,10 @@ class PreprocessorTest(tf.test.TestCase):
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
return
images
def
createEmptyTestBoxes
(
self
):
boxes
=
tf
.
constant
([[]],
dtype
=
tf
.
float32
)
return
boxes
def
createTestBoxes
(
self
):
def
createTestBoxes
(
self
):
boxes
=
tf
.
constant
(
boxes
=
tf
.
constant
(
[[
0.0
,
0.25
,
0.75
,
1.0
],
[
0.25
,
0.5
,
0.75
,
1.0
]],
dtype
=
tf
.
float32
)
[[
0.0
,
0.25
,
0.75
,
1.0
],
[
0.25
,
0.5
,
0.75
,
1.0
]],
dtype
=
tf
.
float32
)
...
@@ -162,7 +166,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -162,7 +166,7 @@ class PreprocessorTest(tf.test.TestCase):
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
return
images
def
expectedImagesAfter
Mirroring
(
self
):
def
expectedImagesAfter
LeftRightFlip
(
self
):
images_r
=
tf
.
constant
([[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
-
1
],
images_r
=
tf
.
constant
([[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
-
1
],
[
0
,
0
,
0
,
-
1
],
[
0
,
0
,
0.5
,
0.5
]]],
[
0
,
0
,
0
,
-
1
],
[
0
,
0
,
0.5
,
0.5
]]],
dtype
=
tf
.
float32
)
dtype
=
tf
.
float32
)
...
@@ -178,17 +182,54 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -178,17 +182,54 @@ class PreprocessorTest(tf.test.TestCase):
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
return
images
def
expectedBoxesAfterMirroring
(
self
):
def
expectedImagesAfterUpDownFlip
(
self
):
images_r
=
tf
.
constant
([[[
0.5
,
0.5
,
0
,
0
],
[
-
1
,
0
,
0
,
0
],
[
-
1
,
-
1
,
0
,
0
],
[
0
,
0
,
0
,
0
]]],
dtype
=
tf
.
float32
)
images_r
=
tf
.
expand_dims
(
images_r
,
3
)
images_g
=
tf
.
constant
([[[
0.5
,
0.5
,
0
,
0.5
],
[
-
1
,
0
,
0.5
,
0.5
],
[
-
1
,
-
1
,
0
,
0
],
[
-
1
,
-
1
,
0
,
0
]]],
dtype
=
tf
.
float32
)
images_g
=
tf
.
expand_dims
(
images_g
,
3
)
images_b
=
tf
.
constant
([[[
0.5
,
0.5
,
0.5
,
0
],
[
-
1
,
0
,
0
,
-
1
],
[
-
1
,
-
1
,
0
,
0.5
],
[
0
,
0
,
0.5
,
-
1
]]],
dtype
=
tf
.
float32
)
images_b
=
tf
.
expand_dims
(
images_b
,
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
def
expectedImagesAfterRot90
(
self
):
images_r
=
tf
.
constant
([[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
-
1
,
0
,
0.5
],
[
0
,
-
1
,
-
1
,
0.5
]]],
dtype
=
tf
.
float32
)
images_r
=
tf
.
expand_dims
(
images_r
,
3
)
images_g
=
tf
.
constant
([[[
0
,
0
,
0.5
,
0.5
],
[
0
,
0
,
0.5
,
0
],
[
-
1
,
-
1
,
0
,
0.5
],
[
-
1
,
-
1
,
-
1
,
0.5
]]],
dtype
=
tf
.
float32
)
images_g
=
tf
.
expand_dims
(
images_g
,
3
)
images_b
=
tf
.
constant
([[[
-
1
,
0.5
,
-
1
,
0
],
[
0.5
,
0
,
0
,
0.5
],
[
0
,
-
1
,
0
,
0.5
],
[
0
,
-
1
,
-
1
,
0.5
]]],
dtype
=
tf
.
float32
)
images_b
=
tf
.
expand_dims
(
images_b
,
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
def
expectedBoxesAfterLeftRightFlip
(
self
):
boxes
=
tf
.
constant
([[
0.0
,
0.0
,
0.75
,
0.75
],
[
0.25
,
0.0
,
0.75
,
0.5
]],
boxes
=
tf
.
constant
([[
0.0
,
0.0
,
0.75
,
0.75
],
[
0.25
,
0.0
,
0.75
,
0.5
]],
dtype
=
tf
.
float32
)
dtype
=
tf
.
float32
)
return
boxes
return
boxes
def
expectedBoxesAfter
XY
(
self
):
def
expectedBoxesAfter
UpDownFlip
(
self
):
boxes
=
tf
.
constant
([[
0.25
,
0.
0
,
1.0
,
0.75
],
[
0.5
,
0.
25
,
1
,
0.75
]],
boxes
=
tf
.
constant
([[
0.25
,
0.
25
,
1.0
,
1.0
],
[
0.
2
5
,
0.
5
,
0.75
,
1.0
]],
dtype
=
tf
.
float32
)
dtype
=
tf
.
float32
)
return
boxes
return
boxes
def
expectedMasksAfterMirroring
(
self
):
def
expectedBoxesAfterRot90
(
self
):
boxes
=
tf
.
constant
(
[[
0.0
,
0.0
,
0.75
,
0.75
],
[
0.0
,
0.25
,
0.5
,
0.75
]],
dtype
=
tf
.
float32
)
return
boxes
def
expectedMasksAfterLeftRightFlip
(
self
):
mask
=
np
.
array
([
mask
=
np
.
array
([
[[
0.0
,
0.0
,
255.0
],
[[
0.0
,
0.0
,
255.0
],
[
0.0
,
0.0
,
255.0
],
[
0.0
,
0.0
,
255.0
],
...
@@ -198,6 +239,26 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -198,6 +239,26 @@ class PreprocessorTest(tf.test.TestCase):
[
0.0
,
255.0
,
255.0
]]])
[
0.0
,
255.0
,
255.0
]]])
return
tf
.
constant
(
mask
,
dtype
=
tf
.
float32
)
return
tf
.
constant
(
mask
,
dtype
=
tf
.
float32
)
def
expectedMasksAfterUpDownFlip
(
self
):
mask
=
np
.
array
([
[[
255.0
,
0.0
,
0.0
],
[
255.0
,
0.0
,
0.0
],
[
255.0
,
0.0
,
0.0
]],
[[
255.0
,
255.0
,
0.0
],
[
255.0
,
255.0
,
0.0
],
[
255.0
,
255.0
,
0.0
]]])
return
tf
.
constant
(
mask
,
dtype
=
tf
.
float32
)
def
expectedMasksAfterRot90
(
self
):
mask
=
np
.
array
([
[[
0.0
,
0.0
,
0.0
],
[
0.0
,
0.0
,
0.0
],
[
255.0
,
255.0
,
255.0
]],
[[
0.0
,
0.0
,
0.0
],
[
255.0
,
255.0
,
255.0
],
[
255.0
,
255.0
,
255.0
]]])
return
tf
.
constant
(
mask
,
dtype
=
tf
.
float32
)
def
expectedLabelScoresAfterThresholding
(
self
):
def
expectedLabelScoresAfterThresholding
(
self
):
return
tf
.
constant
([
1.0
],
dtype
=
tf
.
float32
)
return
tf
.
constant
([
1.0
],
dtype
=
tf
.
float32
)
...
@@ -326,42 +387,62 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -326,42 +387,62 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
self
.
assertAllClose
(
retained_label_scores_
,
expected_retained_label_scores_
)
retained_label_scores_
,
expected_retained_label_scores_
)
def
test
Random
FlipBoxes
(
self
):
def
testFlipBoxes
LeftRight
(
self
):
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
flipped_boxes
=
preprocessor
.
_flip_boxes_left_right
(
boxes
)
expected_boxes
=
self
.
expectedBoxesAfterLeftRightFlip
()
with
self
.
test_session
()
as
sess
:
flipped_boxes
,
expected_boxes
=
sess
.
run
([
flipped_boxes
,
expected_boxes
])
self
.
assertAllEqual
(
flipped_boxes
.
flatten
(),
expected_boxes
.
flatten
())
# Case where the boxes are flipped.
def
testFlipBoxesUpDown
(
self
):
boxes_expected1
=
self
.
expectedBoxesAfterMirroring
()
boxes
=
self
.
createTestBoxes
()
flipped_boxes
=
preprocessor
.
_flip_boxes_up_down
(
boxes
)
# Case where the boxes are not flipped.
expected_boxes
=
self
.
expectedBoxesAfterUpDownFlip
()
boxes_expected2
=
boxes
with
self
.
test_session
()
as
sess
:
flipped_boxes
,
expected_boxes
=
sess
.
run
([
flipped_boxes
,
expected_boxes
])
self
.
assertAllEqual
(
flipped_boxes
.
flatten
(),
expected_boxes
.
flatten
())
# After elementwise multiplication, the result should be all-zero since one
def
testRot90Boxes
(
self
):
# of them is all-zero.
boxes
=
self
.
createTestBoxes
()
boxes_diff
=
tf
.
multiply
(
rotated_boxes
=
preprocessor
.
_rot90_boxes
(
boxes
)
tf
.
squared_difference
(
boxes
,
boxes_expected1
),
expected_boxes
=
self
.
expectedBoxesAfterRot90
()
tf
.
squared_difference
(
boxes
,
boxes_expected2
))
with
self
.
test_session
()
as
sess
:
expected_result
=
tf
.
zeros_like
(
boxes_diff
)
rotated_boxes
,
expected_boxes
=
sess
.
run
([
rotated_boxes
,
expected_boxes
])
self
.
assertAllEqual
(
rotated_boxes
.
flatten
(),
expected_boxes
.
flatten
())
def
testFlipMasksLeftRight
(
self
):
test_mask
=
self
.
createTestMasks
()
flipped_mask
=
preprocessor
.
_flip_masks_left_right
(
test_mask
)
expected_mask
=
self
.
expectedMasksAfterLeftRightFlip
()
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
(
boxes_diff
,
expected_
result
)
=
sess
.
run
([
boxes_diff
,
expected_
result
])
flipped_mask
,
expected_
mask
=
sess
.
run
([
flipped_mask
,
expected_
mask
])
self
.
assertAllEqual
(
boxes_diff
,
expected_result
)
self
.
assertAllEqual
(
flipped_mask
.
flatten
(),
expected_mask
.
flatten
()
)
def
testFlipMasks
(
self
):
def
testFlipMasks
UpDown
(
self
):
test_mask
=
self
.
createTestMasks
()
test_mask
=
self
.
createTestMasks
()
flipped_mask
=
preprocessor
.
_flip_masks
(
test_mask
)
flipped_mask
=
preprocessor
.
_flip_masks
_up_down
(
test_mask
)
expected_mask
=
self
.
expectedMasksAfter
Mirroring
()
expected_mask
=
self
.
expectedMasksAfter
UpDownFlip
()
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
flipped_mask
,
expected_mask
=
sess
.
run
([
flipped_mask
,
expected_mask
])
flipped_mask
,
expected_mask
=
sess
.
run
([
flipped_mask
,
expected_mask
])
self
.
assertAllEqual
(
flipped_mask
.
flatten
(),
expected_mask
.
flatten
())
self
.
assertAllEqual
(
flipped_mask
.
flatten
(),
expected_mask
.
flatten
())
def
testRot90Masks
(
self
):
test_mask
=
self
.
createTestMasks
()
rotated_mask
=
preprocessor
.
_rot90_masks
(
test_mask
)
expected_mask
=
self
.
expectedMasksAfterRot90
()
with
self
.
test_session
()
as
sess
:
rotated_mask
,
expected_mask
=
sess
.
run
([
rotated_mask
,
expected_mask
])
self
.
assertAllEqual
(
rotated_mask
.
flatten
(),
expected_mask
.
flatten
())
def
testRandomHorizontalFlip
(
self
):
def
testRandomHorizontalFlip
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
images
=
self
.
expectedImagesAfterNormalization
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
images_expected1
=
self
.
expectedImagesAfter
Mirroring
()
images_expected1
=
self
.
expectedImagesAfter
LeftRightFlip
()
boxes_expected1
=
self
.
expectedBoxesAfter
Mirroring
()
boxes_expected1
=
self
.
expectedBoxesAfter
LeftRightFlip
()
images_expected2
=
images
images_expected2
=
images
boxes_expected2
=
boxes
boxes_expected2
=
boxes
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
...
@@ -385,6 +466,31 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -385,6 +466,31 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
boxes_diff_
,
boxes_diff_expected_
)
self
.
assertAllClose
(
boxes_diff_
,
boxes_diff_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomHorizontalFlipWithEmptyBoxes
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
boxes
=
self
.
createEmptyTestBoxes
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
images_expected1
=
self
.
expectedImagesAfterLeftRightFlip
()
boxes_expected
=
self
.
createEmptyTestBoxes
()
images_expected2
=
images
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
images_diff1
=
tf
.
squared_difference
(
images
,
images_expected1
)
images_diff2
=
tf
.
squared_difference
(
images
,
images_expected2
)
images_diff
=
tf
.
multiply
(
images_diff1
,
images_diff2
)
images_diff_expected
=
tf
.
zeros_like
(
images_diff
)
with
self
.
test_session
()
as
sess
:
(
images_diff_
,
images_diff_expected_
,
boxes_
,
boxes_expected_
)
=
sess
.
run
([
images_diff
,
images_diff_expected
,
boxes
,
boxes_expected
])
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRunRandomHorizontalFlipWithMaskAndKeypoints
(
self
):
def
testRunRandomHorizontalFlipWithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
image_height
=
3
image_height
=
3
...
@@ -416,6 +522,176 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -416,6 +522,176 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertTrue
(
masks
is
not
None
)
self
.
assertTrue
(
masks
is
not
None
)
self
.
assertTrue
(
keypoints
is
not
None
)
self
.
assertTrue
(
keypoints
is
not
None
)
def
testRandomVerticalFlip
(
self
):
preprocess_options
=
[(
preprocessor
.
random_vertical_flip
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
boxes
=
self
.
createTestBoxes
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
images_expected1
=
self
.
expectedImagesAfterUpDownFlip
()
boxes_expected1
=
self
.
expectedBoxesAfterUpDownFlip
()
images_expected2
=
images
boxes_expected2
=
boxes
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_diff1
=
tf
.
squared_difference
(
boxes
,
boxes_expected1
)
boxes_diff2
=
tf
.
squared_difference
(
boxes
,
boxes_expected2
)
boxes_diff
=
tf
.
multiply
(
boxes_diff1
,
boxes_diff2
)
boxes_diff_expected
=
tf
.
zeros_like
(
boxes_diff
)
images_diff1
=
tf
.
squared_difference
(
images
,
images_expected1
)
images_diff2
=
tf
.
squared_difference
(
images
,
images_expected2
)
images_diff
=
tf
.
multiply
(
images_diff1
,
images_diff2
)
images_diff_expected
=
tf
.
zeros_like
(
images_diff
)
with
self
.
test_session
()
as
sess
:
(
images_diff_
,
images_diff_expected_
,
boxes_diff_
,
boxes_diff_expected_
)
=
sess
.
run
([
images_diff
,
images_diff_expected
,
boxes_diff
,
boxes_diff_expected
])
self
.
assertAllClose
(
boxes_diff_
,
boxes_diff_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomVerticalFlipWithEmptyBoxes
(
self
):
preprocess_options
=
[(
preprocessor
.
random_vertical_flip
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
boxes
=
self
.
createEmptyTestBoxes
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
images_expected1
=
self
.
expectedImagesAfterUpDownFlip
()
boxes_expected
=
self
.
createEmptyTestBoxes
()
images_expected2
=
images
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
images_diff1
=
tf
.
squared_difference
(
images
,
images_expected1
)
images_diff2
=
tf
.
squared_difference
(
images
,
images_expected2
)
images_diff
=
tf
.
multiply
(
images_diff1
,
images_diff2
)
images_diff_expected
=
tf
.
zeros_like
(
images_diff
)
with
self
.
test_session
()
as
sess
:
(
images_diff_
,
images_diff_expected_
,
boxes_
,
boxes_expected_
)
=
sess
.
run
([
images_diff
,
images_diff_expected
,
boxes
,
boxes_expected
])
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRunRandomVerticalFlipWithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_vertical_flip
,
{})]
image_height
=
3
image_width
=
3
images
=
tf
.
random_uniform
([
1
,
image_height
,
image_width
,
3
])
boxes
=
self
.
createTestBoxes
()
masks
=
self
.
createTestMasks
()
keypoints
=
self
.
createTestKeypoints
()
keypoint_flip_permutation
=
self
.
createKeypointFlipPermutation
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
}
preprocess_options
=
[
(
preprocessor
.
random_vertical_flip
,
{
'keypoint_flip_permutation'
:
keypoint_flip_permutation
})]
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
,
include_keypoints
=
True
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
preprocessor_arg_map
)
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
keypoints
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
with
self
.
test_session
()
as
sess
:
boxes
,
masks
,
keypoints
=
sess
.
run
([
boxes
,
masks
,
keypoints
])
self
.
assertTrue
(
boxes
is
not
None
)
self
.
assertTrue
(
masks
is
not
None
)
self
.
assertTrue
(
keypoints
is
not
None
)
def
testRandomRotation90
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rotation90
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
boxes
=
self
.
createTestBoxes
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
images_expected1
=
self
.
expectedImagesAfterRot90
()
boxes_expected1
=
self
.
expectedBoxesAfterRot90
()
images_expected2
=
images
boxes_expected2
=
boxes
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_diff1
=
tf
.
squared_difference
(
boxes
,
boxes_expected1
)
boxes_diff2
=
tf
.
squared_difference
(
boxes
,
boxes_expected2
)
boxes_diff
=
tf
.
multiply
(
boxes_diff1
,
boxes_diff2
)
boxes_diff_expected
=
tf
.
zeros_like
(
boxes_diff
)
images_diff1
=
tf
.
squared_difference
(
images
,
images_expected1
)
images_diff2
=
tf
.
squared_difference
(
images
,
images_expected2
)
images_diff
=
tf
.
multiply
(
images_diff1
,
images_diff2
)
images_diff_expected
=
tf
.
zeros_like
(
images_diff
)
with
self
.
test_session
()
as
sess
:
(
images_diff_
,
images_diff_expected_
,
boxes_diff_
,
boxes_diff_expected_
)
=
sess
.
run
([
images_diff
,
images_diff_expected
,
boxes_diff
,
boxes_diff_expected
])
self
.
assertAllClose
(
boxes_diff_
,
boxes_diff_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRandomRotation90WithEmptyBoxes
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rotation90
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
boxes
=
self
.
createEmptyTestBoxes
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
images_expected1
=
self
.
expectedImagesAfterRot90
()
boxes_expected
=
self
.
createEmptyTestBoxes
()
images_expected2
=
images
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
images_diff1
=
tf
.
squared_difference
(
images
,
images_expected1
)
images_diff2
=
tf
.
squared_difference
(
images
,
images_expected2
)
images_diff
=
tf
.
multiply
(
images_diff1
,
images_diff2
)
images_diff_expected
=
tf
.
zeros_like
(
images_diff
)
with
self
.
test_session
()
as
sess
:
(
images_diff_
,
images_diff_expected_
,
boxes_
,
boxes_expected_
)
=
sess
.
run
([
images_diff
,
images_diff_expected
,
boxes
,
boxes_expected
])
self
.
assertAllClose
(
boxes_
,
boxes_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRunRandomRotation90WithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rotation90
,
{})]
image_height
=
3
image_width
=
3
images
=
tf
.
random_uniform
([
1
,
image_height
,
image_width
,
3
])
boxes
=
self
.
createTestBoxes
()
masks
=
self
.
createTestMasks
()
keypoints
=
self
.
createTestKeypoints
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
,
include_keypoints
=
True
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
preprocessor_arg_map
)
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
keypoints
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
with
self
.
test_session
()
as
sess
:
boxes
,
masks
,
keypoints
=
sess
.
run
([
boxes
,
masks
,
keypoints
])
self
.
assertTrue
(
boxes
is
not
None
)
self
.
assertTrue
(
masks
is
not
None
)
self
.
assertTrue
(
keypoints
is
not
None
)
def
testRandomPixelValueScale
(
self
):
def
testRandomPixelValueScale
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
@@ -600,9 +876,11 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -600,9 +876,11 @@ class PreprocessorTest(tf.test.TestCase):
images
=
self
.
createTestImages
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
...
@@ -637,7 +915,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -637,7 +915,7 @@ class PreprocessorTest(tf.test.TestCase):
tensor_dict
=
{
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
tensor_dict
,
preprocessing_options
)
...
@@ -671,9 +949,11 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -671,9 +949,11 @@ class PreprocessorTest(tf.test.TestCase):
images
=
self
.
createTestImages
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxesOutOfImage
()
boxes
=
self
.
createTestBoxesOutOfImage
()
labels
=
self
.
createTestLabels
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
...
@@ -703,9 +983,13 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -703,9 +983,13 @@ class PreprocessorTest(tf.test.TestCase):
images
=
self
.
createTestImages
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
label_scores
=
self
.
createTestLabelScores
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_label_scores
:
label_scores
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
...
@@ -720,6 +1004,8 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -720,6 +1004,8 @@ class PreprocessorTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_boxes
]
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_label_scores
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_label_scores
]
boxes_shape
=
tf
.
shape
(
boxes
)
boxes_shape
=
tf
.
shape
(
boxes
)
distorted_boxes_shape
=
tf
.
shape
(
distorted_boxes
)
distorted_boxes_shape
=
tf
.
shape
(
distorted_boxes
)
images_shape
=
tf
.
shape
(
images
)
images_shape
=
tf
.
shape
(
images
)
...
@@ -728,15 +1014,18 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -728,15 +1014,18 @@ class PreprocessorTest(tf.test.TestCase):
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
(
boxes_shape_
,
distorted_boxes_shape_
,
images_shape_
,
(
boxes_shape_
,
distorted_boxes_shape_
,
images_shape_
,
distorted_images_shape_
,
images_
,
distorted_images_
,
distorted_images_shape_
,
images_
,
distorted_images_
,
boxes_
,
distorted_boxes_
,
labels_
,
distorted_labels_
)
=
sess
.
run
(
boxes_
,
distorted_boxes_
,
labels_
,
distorted_labels_
,
label_scores_
,
distorted_label_scores_
)
=
sess
.
run
(
[
boxes_shape
,
distorted_boxes_shape
,
images_shape
,
[
boxes_shape
,
distorted_boxes_shape
,
images_shape
,
distorted_images_shape
,
images
,
distorted_images
,
distorted_images_shape
,
images
,
distorted_images
,
boxes
,
distorted_boxes
,
labels
,
distorted_labels
])
boxes
,
distorted_boxes
,
labels
,
distorted_labels
,
label_scores
,
distorted_label_scores
])
self
.
assertAllEqual
(
boxes_shape_
,
distorted_boxes_shape_
)
self
.
assertAllEqual
(
boxes_shape_
,
distorted_boxes_shape_
)
self
.
assertAllEqual
(
images_shape_
,
distorted_images_shape_
)
self
.
assertAllEqual
(
images_shape_
,
distorted_images_shape_
)
self
.
assertAllClose
(
images_
,
distorted_images_
)
self
.
assertAllClose
(
images_
,
distorted_images_
)
self
.
assertAllClose
(
boxes_
,
distorted_boxes_
)
self
.
assertAllClose
(
boxes_
,
distorted_boxes_
)
self
.
assertAllEqual
(
labels_
,
distorted_labels_
)
self
.
assertAllEqual
(
labels_
,
distorted_labels_
)
self
.
assertAllEqual
(
label_scores_
,
distorted_label_scores_
)
def
testRandomCropWithMockSampleDistortedBoundingBox
(
self
):
def
testRandomCropWithMockSampleDistortedBoundingBox
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
...
@@ -751,9 +1040,12 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -751,9 +1040,12 @@ class PreprocessorTest(tf.test.TestCase):
[
0.2
,
0.4
,
0.75
,
0.75
],
[
0.2
,
0.4
,
0.75
,
0.75
],
[
0.3
,
0.1
,
0.4
,
0.7
]],
dtype
=
tf
.
float32
)
[
0.3
,
0.1
,
0.4
,
0.7
]],
dtype
=
tf
.
float32
)
labels
=
tf
.
constant
([
1
,
7
,
11
],
dtype
=
tf
.
int32
)
labels
=
tf
.
constant
([
1
,
7
,
11
],
dtype
=
tf
.
int32
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
...
@@ -786,6 +1078,36 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -786,6 +1078,36 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
distorted_boxes_
,
expected_boxes_
)
self
.
assertAllClose
(
distorted_boxes_
,
expected_boxes_
)
self
.
assertAllEqual
(
distorted_labels_
,
expected_labels_
)
self
.
assertAllEqual
(
distorted_labels_
,
expected_labels_
)
def
testStrictRandomCropImageWithLabelScores
(
self
):
image
=
self
.
createColorfulTestImage
()[
0
]
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
label_scores
=
self
.
createTestLabelScores
()
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
new_image
,
new_boxes
,
new_labels
,
new_label_scores
=
(
preprocessor
.
_strict_random_crop_image
(
image
,
boxes
,
labels
,
label_scores
))
with
self
.
test_session
()
as
sess
:
new_image
,
new_boxes
,
new_labels
,
new_label_scores
=
(
sess
.
run
(
[
new_image
,
new_boxes
,
new_labels
,
new_label_scores
])
)
expected_boxes
=
np
.
array
(
[[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
]],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
new_image
.
shape
,
[
190
,
237
,
3
])
self
.
assertAllEqual
(
new_label_scores
,
[
1.0
,
0.5
])
self
.
assertAllClose
(
new_boxes
.
flatten
(),
expected_boxes
.
flatten
())
def
testStrictRandomCropImageWithMasks
(
self
):
def
testStrictRandomCropImageWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()[
0
]
image
=
self
.
createColorfulTestImage
()[
0
]
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
...
@@ -799,17 +1121,15 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -799,17 +1121,15 @@ class PreprocessorTest(tf.test.TestCase):
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
(
new_image
,
new_boxes
,
new_labels
,
new_image
,
new_boxes
,
new_labels
,
new_masks
=
(
new_masks
)
=
preprocessor
.
_strict_random_crop_image
(
preprocessor
.
_strict_random_crop_image
(
image
,
boxes
,
labels
,
masks
=
masks
)
image
,
boxes
,
labels
,
masks
=
masks
)
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
new_image
,
new_boxes
,
new_labels
,
new_masks
=
sess
.
run
([
new_image
,
new_boxes
,
new_labels
,
new_masks
=
sess
.
run
(
new_image
,
new_boxes
,
new_labels
,
new_masks
])
[
new_image
,
new_boxes
,
new_labels
,
new_masks
])
expected_boxes
=
np
.
array
(
expected_boxes
=
np
.
array
([
[[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
]],
dtype
=
np
.
float32
)
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
new_image
.
shape
,
[
190
,
237
,
3
])
self
.
assertAllEqual
(
new_image
.
shape
,
[
190
,
237
,
3
])
self
.
assertAllEqual
(
new_masks
.
shape
,
[
2
,
190
,
237
])
self
.
assertAllEqual
(
new_masks
.
shape
,
[
2
,
190
,
237
])
self
.
assertAllClose
(
self
.
assertAllClose
(
...
@@ -828,17 +1148,16 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -828,17 +1148,16 @@ class PreprocessorTest(tf.test.TestCase):
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
(
new_image
,
new_boxes
,
new_labels
,
new_image
,
new_boxes
,
new_labels
,
new_keypoints
=
(
new_keypoints
)
=
preprocessor
.
_strict_random_crop_image
(
preprocessor
.
_strict_random_crop_image
(
image
,
boxes
,
labels
,
keypoints
=
keypoints
)
image
,
boxes
,
labels
,
keypoints
=
keypoints
)
)
with
self
.
test_session
()
as
sess
:
with
self
.
test_session
()
as
sess
:
new_image
,
new_boxes
,
new_labels
,
new_keypoints
=
sess
.
run
(
[
new_image
,
new_boxes
,
new_labels
,
new_keypoints
=
sess
.
run
(
new_image
,
new_boxes
,
new_labels
,
new_keypoints
])
[
new_image
,
new_boxes
,
new_labels
,
new_keypoints
])
expected_boxes
=
np
.
array
([
expected_boxes
=
np
.
array
([
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],],
dtype
=
np
.
float32
)
],
dtype
=
np
.
float32
)
expected_keypoints
=
np
.
array
([
expected_keypoints
=
np
.
array
([
[[
np
.
nan
,
np
.
nan
],
[[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
],
...
@@ -1038,9 +1357,10 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1038,9 +1357,10 @@ class PreprocessorTest(tf.test.TestCase):
preprocessing_options
=
[
preprocessing_options
=
[
(
preprocessor
.
retain_boxes_above_threshold
,
{
'threshold'
:
0.6
})
(
preprocessor
.
retain_boxes_above_threshold
,
{
'threshold'
:
0.6
})
]
]
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_label_scores
=
True
)
retained_tensor_dict
=
preprocessor
.
preprocess
(
retained_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
retained_boxes
=
retained_tensor_dict
[
retained_boxes
=
retained_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
fields
.
InputDataFields
.
groundtruth_boxes
]
retained_labels
=
retained_tensor_dict
[
retained_labels
=
retained_tensor_dict
[
...
@@ -1076,6 +1396,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1076,6 +1396,7 @@ class PreprocessorTest(tf.test.TestCase):
}
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_label_scores
=
True
,
include_instance_masks
=
True
)
include_instance_masks
=
True
)
preprocessing_options
=
[
preprocessing_options
=
[
...
@@ -1107,6 +1428,7 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1107,6 +1428,7 @@ class PreprocessorTest(tf.test.TestCase):
}
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_label_scores
=
True
,
include_keypoints
=
True
)
include_keypoints
=
True
)
preprocessing_options
=
[
preprocessing_options
=
[
...
@@ -1214,6 +1536,94 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1214,6 +1536,94 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
expected_keypoints
.
flatten
())
def
testRunRandomPadToAspectRatioWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
masks
=
tf
.
random_uniform
([
2
,
200
,
400
],
dtype
=
tf
.
float32
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_pad_to_aspect_ratio
,
{})]
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_image
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_masks
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
with
self
.
test_session
()
as
sess
:
(
distorted_image_
,
distorted_boxes_
,
distorted_labels_
,
distorted_masks_
)
=
sess
.
run
([
distorted_image
,
distorted_boxes
,
distorted_labels
,
distorted_masks
])
expected_boxes
=
np
.
array
(
[[
0.0
,
0.25
,
0.375
,
1.0
],
[
0.125
,
0.5
,
0.375
,
1.0
]],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
400
,
400
,
3
])
self
.
assertAllEqual
(
distorted_labels_
,
[
1
,
2
])
self
.
assertAllClose
(
distorted_boxes_
.
flatten
(),
expected_boxes
.
flatten
())
self
.
assertAllEqual
(
distorted_masks_
.
shape
,
[
2
,
400
,
400
])
def
testRunRandomPadToAspectRatioWithKeypoints
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
keypoints
=
self
.
createTestKeypoints
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_keypoints
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_pad_to_aspect_ratio
,
{})]
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_image
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_keypoints
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
with
self
.
test_session
()
as
sess
:
(
distorted_image_
,
distorted_boxes_
,
distorted_labels_
,
distorted_keypoints_
)
=
sess
.
run
([
distorted_image
,
distorted_boxes
,
distorted_labels
,
distorted_keypoints
])
expected_boxes
=
np
.
array
(
[[
0.0
,
0.25
,
0.375
,
1.0
],
[
0.125
,
0.5
,
0.375
,
1.0
]],
dtype
=
np
.
float32
)
expected_keypoints
=
np
.
array
([
[[
0.05
,
0.1
],
[
0.1
,
0.2
],
[
0.15
,
0.3
]],
[[
0.2
,
0.4
],
[
0.25
,
0.5
],
[
0.3
,
0.6
]],
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
400
,
400
,
3
])
self
.
assertAllEqual
(
distorted_labels_
,
[
1
,
2
])
self
.
assertAllClose
(
distorted_boxes_
.
flatten
(),
expected_boxes
.
flatten
())
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
def
testRandomPadImage
(
self
):
def
testRandomPadImage
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_minval'
:
0
,
...
@@ -1225,9 +1635,11 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1225,9 +1635,11 @@ class PreprocessorTest(tf.test.TestCase):
images
=
self
.
createTestImages
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
...
@@ -1269,9 +1681,11 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1269,9 +1681,11 @@ class PreprocessorTest(tf.test.TestCase):
images
=
self
.
createTestImages
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
...
@@ -1305,22 +1719,15 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1305,22 +1719,15 @@ class PreprocessorTest(tf.test.TestCase):
padded_boxes_
[:,
3
]
-
padded_boxes_
[:,
1
])))
padded_boxes_
[:,
3
]
-
padded_boxes_
[:,
1
])))
def
testRandomCropToAspectRatio
(
self
):
def
testRandomCropToAspectRatio
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
})]
images
=
self
.
createTestImages
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
[]
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessing_options
=
[(
preprocessor
.
random_crop_to_aspect_ratio
,
{
preprocessing_options
=
[(
preprocessor
.
random_crop_to_aspect_ratio
,
{
...
@@ -1346,6 +1753,41 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1346,6 +1753,41 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertEqual
(
images_shape_
[
1
],
cropped_images_shape_
[
1
]
*
2
)
self
.
assertEqual
(
images_shape_
[
1
],
cropped_images_shape_
[
1
]
*
2
)
self
.
assertEqual
(
images_shape_
[
2
],
cropped_images_shape_
[
2
])
self
.
assertEqual
(
images_shape_
[
2
],
cropped_images_shape_
[
2
])
def
testRandomPadToAspectRatio
(
self
):
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
[])
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessing_options
=
[(
preprocessor
.
random_pad_to_aspect_ratio
,
{
'aspect_ratio'
:
2.0
})]
padded_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
padded_images
=
padded_tensor_dict
[
fields
.
InputDataFields
.
image
]
padded_boxes
=
padded_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_shape
=
tf
.
shape
(
boxes
)
padded_boxes_shape
=
tf
.
shape
(
padded_boxes
)
images_shape
=
tf
.
shape
(
images
)
padded_images_shape
=
tf
.
shape
(
padded_images
)
with
self
.
test_session
()
as
sess
:
(
boxes_shape_
,
padded_boxes_shape_
,
images_shape_
,
padded_images_shape_
)
=
sess
.
run
([
boxes_shape
,
padded_boxes_shape
,
images_shape
,
padded_images_shape
])
self
.
assertAllEqual
(
boxes_shape_
,
padded_boxes_shape_
)
self
.
assertEqual
(
images_shape_
[
1
],
padded_images_shape_
[
1
])
self
.
assertEqual
(
2
*
images_shape_
[
2
],
padded_images_shape_
[
2
])
def
testRandomBlackPatches
(
self
):
def
testRandomBlackPatches
(
self
):
preprocessing_options
=
[]
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
...
@@ -1395,6 +1837,60 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1395,6 +1837,60 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
(
expected_images_shape_
,
self
.
assertAllEqual
(
expected_images_shape_
,
resized_images_shape_
)
resized_images_shape_
)
def
testResizeImageWithMasks
(
self
):
"""Tests image resizing, checking output sizes."""
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
15
,
60
,
40
],
[
10
,
15
,
30
]]
height
=
50
width
=
100
expected_image_shape_list
=
[[
50
,
100
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
15
,
50
,
100
],
[
10
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_image_shape_list
,
in_masks_shape_list
,
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_image
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
with
self
.
test_session
()
as
sess
:
out_image_shape
,
out_masks_shape
=
sess
.
run
(
[
out_image_shape
,
out_masks_shape
])
self
.
assertAllEqual
(
out_image_shape
,
expected_image_shape
)
self
.
assertAllEqual
(
out_masks_shape
,
expected_mask_shape
)
def
testResizeImageWithNoInstanceMask
(
self
):
"""Tests image resizing, checking output sizes."""
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
0
,
60
,
40
],
[
0
,
15
,
30
]]
height
=
50
width
=
100
expected_image_shape_list
=
[[
50
,
100
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
0
,
50
,
100
],
[
0
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_image_shape_list
,
in_masks_shape_list
,
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_image
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
with
self
.
test_session
()
as
sess
:
out_image_shape
,
out_masks_shape
=
sess
.
run
(
[
out_image_shape
,
out_masks_shape
])
self
.
assertAllEqual
(
out_image_shape
,
expected_image_shape
)
self
.
assertAllEqual
(
out_masks_shape
,
expected_mask_shape
)
def
testResizeToRangePreservesStaticSpatialShape
(
self
):
def
testResizeToRangePreservesStaticSpatialShape
(
self
):
"""Tests image resizing, checking output sizes."""
"""Tests image resizing, checking output sizes."""
in_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
],
[
15
,
50
,
3
]]
in_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
],
[
15
,
50
,
3
]]
...
@@ -1483,10 +1979,10 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1483,10 +1979,10 @@ class PreprocessorTest(tf.test.TestCase):
"""Tests image resizing, checking output sizes."""
"""Tests image resizing, checking output sizes."""
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
0
,
60
,
40
],
[
0
,
15
,
30
]]
in_masks_shape_list
=
[[
0
,
60
,
40
],
[
0
,
15
,
30
]]
height
=
50
min_dim
=
50
width
=
100
max_dim
=
100
expected_image_shape_list
=
[[
50
,
10
0
,
3
],
[
50
,
100
,
3
]]
expected_image_shape_list
=
[[
75
,
5
0
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
0
,
50
,
10
0
],
[
0
,
50
,
100
]]
expected_masks_shape_list
=
[[
0
,
75
,
5
0
],
[
0
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
...
@@ -1495,8 +1991,8 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1495,8 +1991,8 @@ class PreprocessorTest(tf.test.TestCase):
expected_masks_shape_list
):
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_
ima
ge
(
out_image
,
out_masks
=
preprocessor
.
resize_
to_ran
ge
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
...
@@ -1528,6 +2024,67 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1528,6 +2024,67 @@ class PreprocessorTest(tf.test.TestCase):
out_image_shape
=
sess
.
run
(
out_image_shape
)
out_image_shape
=
sess
.
run
(
out_image_shape
)
self
.
assertAllEqual
(
out_image_shape
,
expected_shape
)
self
.
assertAllEqual
(
out_image_shape
,
expected_shape
)
def
testResizeToMinDimensionTensorShapes
(
self
):
in_image_shape_list
=
[[
60
,
55
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
15
,
60
,
55
],
[
10
,
15
,
30
]]
min_dim
=
50
expected_image_shape_list
=
[[
60
,
55
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
15
,
60
,
55
],
[
10
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_image_shape_list
,
in_masks_shape_list
,
expected_masks_shape_list
):
in_image
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
3
))
in_masks
=
tf
.
placeholder
(
tf
.
float32
,
shape
=
(
None
,
None
,
None
))
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_min_dimension
(
in_image
,
in_masks
,
min_dimension
=
min_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
with
self
.
test_session
()
as
sess
:
out_image_shape
,
out_masks_shape
=
sess
.
run
(
[
out_image_shape
,
out_masks_shape
],
feed_dict
=
{
in_image
:
np
.
random
.
randn
(
*
in_image_shape
),
in_masks
:
np
.
random
.
randn
(
*
in_masks_shape
)
})
self
.
assertAllEqual
(
out_image_shape
,
expected_image_shape
)
self
.
assertAllEqual
(
out_masks_shape
,
expected_mask_shape
)
def
testResizeToMinDimensionWithInstanceMasksTensorOfSizeZero
(
self
):
"""Tests image resizing, checking output sizes."""
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
0
,
60
,
40
],
[
0
,
15
,
30
]]
min_dim
=
50
expected_image_shape_list
=
[[
75
,
50
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
0
,
75
,
50
],
[
0
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_image_shape_list
,
in_masks_shape_list
,
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_min_dimension
(
in_image
,
in_masks
,
min_dimension
=
min_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
with
self
.
test_session
()
as
sess
:
out_image_shape
,
out_masks_shape
=
sess
.
run
(
[
out_image_shape
,
out_masks_shape
])
self
.
assertAllEqual
(
out_image_shape
,
expected_image_shape
)
self
.
assertAllEqual
(
out_masks_shape
,
expected_mask_shape
)
def
testResizeToMinDimensionRaisesErrorOn4DImage
(
self
):
image
=
tf
.
random_uniform
([
1
,
200
,
300
,
3
])
with
self
.
assertRaises
(
ValueError
):
preprocessor
.
resize_to_min_dimension
(
image
,
500
)
def
testScaleBoxesToPixelCoordinates
(
self
):
def
testScaleBoxesToPixelCoordinates
(
self
):
"""Tests box scaling, checking scaled values."""
"""Tests box scaling, checking scaled values."""
in_shape
=
[
60
,
40
,
3
]
in_shape
=
[
60
,
40
,
3
]
...
@@ -1599,9 +2156,11 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1599,9 +2156,11 @@ class PreprocessorTest(tf.test.TestCase):
images
=
self
.
createTestImages
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
...
@@ -1633,9 +2192,11 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1633,9 +2192,11 @@ class PreprocessorTest(tf.test.TestCase):
'target_maxval'
:
1
'target_maxval'
:
1
}),
}),
(
preprocessor
.
ssd_random_crop_pad
,
{})]
(
preprocessor
.
ssd_random_crop_pad
,
{})]
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
...
@@ -1655,7 +2216,10 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1655,7 +2216,10 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testSSDRandomCropFixedAspectRatio
(
self
):
def
_testSSDRandomCropFixedAspectRatio
(
self
,
include_label_scores
,
include_instance_masks
,
include_keypoints
):
images
=
self
.
createTestImages
()
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
labels
=
self
.
createTestLabels
()
...
@@ -1672,54 +2236,26 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1672,54 +2236,26 @@ class PreprocessorTest(tf.test.TestCase):
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
if
include_label_scores
:
preprocessing_options
)
label_scores
=
self
.
createTestLabelScores
()
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_label_scores
]
=
(
distorted_boxes
=
distorted_tensor_dict
[
label_scores
)
fields
.
InputDataFields
.
groundtruth_boxes
]
if
include_instance_masks
:
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
with
self
.
test_session
()
as
sess
:
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
)
=
sess
.
run
(
[
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
])
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints
(
self
):
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
masks
=
self
.
createTestMasks
()
masks
=
self
.
createTestMasks
()
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
=
masks
if
include_keypoints
:
keypoints
=
self
.
createTestKeypoints
()
keypoints
=
self
.
createTestKeypoints
()
preprocessing_options
=
[
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
=
keypoints
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop_fixed_aspect_ratio
,
{})]
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
,
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
,
include_keypoints
=
True
)
include_label_scores
=
include_label_scores
,
include_instance_masks
=
include_instance_masks
,
include_keypoints
=
include_keypoints
)
distorted_tensor_dict
=
preprocessor
.
preprocess
(
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
fields
.
InputDataFields
.
groundtruth_boxes
]
images_rank
=
tf
.
rank
(
images
)
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
boxes_rank
=
tf
.
rank
(
boxes
)
boxes_rank
=
tf
.
rank
(
boxes
)
...
@@ -1733,5 +2269,20 @@ class PreprocessorTest(tf.test.TestCase):
...
@@ -1733,5 +2269,20 @@ class PreprocessorTest(tf.test.TestCase):
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testSSDRandomCropFixedAspectRatio
(
self
):
self
.
_testSSDRandomCropFixedAspectRatio
(
include_label_scores
=
False
,
include_instance_masks
=
False
,
include_keypoints
=
False
)
def
testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints
(
self
):
self
.
_testSSDRandomCropFixedAspectRatio
(
include_label_scores
=
False
,
include_instance_masks
=
True
,
include_keypoints
=
True
)
def
testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints
(
self
):
self
.
_testSSDRandomCropFixedAspectRatio
(
include_label_scores
=
True
,
include_instance_masks
=
True
,
include_keypoints
=
True
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
tf
.
test
.
main
()
research/object_detection/core/standard_fields.py
View file @
e7de233b
...
@@ -18,6 +18,7 @@
...
@@ -18,6 +18,7 @@
Specifies:
Specifies:
InputDataFields: standard fields used by reader/preprocessor/batcher.
InputDataFields: standard fields used by reader/preprocessor/batcher.
DetectionResultFields: standard fields returned by object detector.
BoxListFields: standard field used by BoxList
BoxListFields: standard field used by BoxList
TfExampleFields: standard fields for tf-example data format (go/tf-example).
TfExampleFields: standard fields for tf-example data format (go/tf-example).
"""
"""
...
@@ -41,12 +42,17 @@ class InputDataFields(object):
...
@@ -41,12 +42,17 @@ class InputDataFields(object):
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels.
groundtruth_classes: box-level class labels.
groundtruth_label_types: box-level label types (e.g. explicit negative).
groundtruth_label_types: box-level label types (e.g. explicit negative).
groundtruth_is_crowd: is the groundtruth a single object or a crowd.
groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
is the groundtruth a single object or a crowd.
groundtruth_area: area of a groundtruth segment.
groundtruth_area: area of a groundtruth segment.
groundtruth_difficult: is a `difficult` object
groundtruth_difficult: is a `difficult` object
groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
same class, forming a connected group, where instances are heavily
occluding each other.
proposal_boxes: coordinates of object proposal boxes.
proposal_boxes: coordinates of object proposal boxes.
proposal_objectness: objectness score of each proposal.
proposal_objectness: objectness score of each proposal.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_boundaries: ground truth instance boundaries.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
...
@@ -64,15 +70,43 @@ class InputDataFields(object):
...
@@ -64,15 +70,43 @@ class InputDataFields(object):
groundtruth_is_crowd
=
'groundtruth_is_crowd'
groundtruth_is_crowd
=
'groundtruth_is_crowd'
groundtruth_area
=
'groundtruth_area'
groundtruth_area
=
'groundtruth_area'
groundtruth_difficult
=
'groundtruth_difficult'
groundtruth_difficult
=
'groundtruth_difficult'
groundtruth_group_of
=
'groundtruth_group_of'
proposal_boxes
=
'proposal_boxes'
proposal_boxes
=
'proposal_boxes'
proposal_objectness
=
'proposal_objectness'
proposal_objectness
=
'proposal_objectness'
groundtruth_instance_masks
=
'groundtruth_instance_masks'
groundtruth_instance_masks
=
'groundtruth_instance_masks'
groundtruth_instance_boundaries
=
'groundtruth_instance_boundaries'
groundtruth_instance_classes
=
'groundtruth_instance_classes'
groundtruth_instance_classes
=
'groundtruth_instance_classes'
groundtruth_keypoints
=
'groundtruth_keypoints'
groundtruth_keypoints
=
'groundtruth_keypoints'
groundtruth_keypoint_visibilities
=
'groundtruth_keypoint_visibilities'
groundtruth_keypoint_visibilities
=
'groundtruth_keypoint_visibilities'
groundtruth_label_scores
=
'groundtruth_label_scores'
groundtruth_label_scores
=
'groundtruth_label_scores'
class
DetectionResultFields
(
object
):
"""Naming converntions for storing the output of the detector.
Attributes:
source_id: source of the original image.
key: unique key corresponding to image.
detection_boxes: coordinates of the detection boxes in the image.
detection_scores: detection scores for the detection boxes in the image.
detection_classes: detection-level class labels.
detection_masks: contains a segmentation mask for each detection box.
detection_boundaries: contains an object boundary for each detection box.
detection_keypoints: contains detection keypoints for each detection box.
num_detections: number of detections in the batch.
"""
source_id
=
'source_id'
key
=
'key'
detection_boxes
=
'detection_boxes'
detection_scores
=
'detection_scores'
detection_classes
=
'detection_classes'
detection_masks
=
'detection_masks'
detection_boundaries
=
'detection_boundaries'
detection_keypoints
=
'detection_keypoints'
num_detections
=
'num_detections'
class
BoxListFields
(
object
):
class
BoxListFields
(
object
):
"""Naming conventions for BoxLists.
"""Naming conventions for BoxLists.
...
@@ -83,6 +117,7 @@ class BoxListFields(object):
...
@@ -83,6 +117,7 @@ class BoxListFields(object):
weights: sample weights per bounding box.
weights: sample weights per bounding box.
objectness: objectness score per bounding box.
objectness: objectness score per bounding box.
masks: masks per bounding box.
masks: masks per bounding box.
boundaries: boundaries per bounding box.
keypoints: keypoints per bounding box.
keypoints: keypoints per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
"""
"""
...
@@ -92,6 +127,7 @@ class BoxListFields(object):
...
@@ -92,6 +127,7 @@ class BoxListFields(object):
weights
=
'weights'
weights
=
'weights'
objectness
=
'objectness'
objectness
=
'objectness'
masks
=
'masks'
masks
=
'masks'
boundaries
=
'boundaries'
keypoints
=
'keypoints'
keypoints
=
'keypoints'
keypoint_heatmaps
=
'keypoint_heatmaps'
keypoint_heatmaps
=
'keypoint_heatmaps'
...
@@ -112,7 +148,7 @@ class TfExampleFields(object):
...
@@ -112,7 +148,7 @@ class TfExampleFields(object):
width: width of image in pixels, e.g. 581
width: width of image in pixels, e.g. 581
source_id: original source of the image
source_id: original source of the image
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_
text
: labels in numbers, e.g. [16, 8]
object_class_
label
: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
...
@@ -121,10 +157,20 @@ class TfExampleFields(object):
...
@@ -121,10 +157,20 @@ class TfExampleFields(object):
object_truncated: is object truncated, e.g. [true, false]
object_truncated: is object truncated, e.g. [true, false]
object_occluded: is object occluded, e.g. [true, false]
object_occluded: is object occluded, e.g. [true, false]
object_difficult: is object difficult, e.g. [true, false]
object_difficult: is object difficult, e.g. [true, false]
object_is_crowd: is the object a single object or a crowd
object_group_of: is object a single object or a group of objects
object_depiction: is object a depiction
object_is_crowd: [DEPRECATED, use object_group_of instead]
is the object a single object or a crowd
object_segment_area: the area of the segment.
object_segment_area: the area of the segment.
instance_masks: instance segmentation masks.
instance_masks: instance segmentation masks.
instance_boundaries: instance boundaries.
instance_classes: Classes for each instance segmentation mask.
instance_classes: Classes for each instance segmentation mask.
detection_class_label: class label in numbers.
detection_bbox_ymin: ymin coordinates of a detection box.
detection_bbox_xmin: xmin coordinates of a detection box.
detection_bbox_ymax: ymax coordinates of a detection box.
detection_bbox_xmax: xmax coordinates of a detection box.
detection_score: detection score for the class label and box.
"""
"""
image_encoded
=
'image/encoded'
image_encoded
=
'image/encoded'
image_format
=
'image/format'
# format is reserved keyword
image_format
=
'image/format'
# format is reserved keyword
...
@@ -144,7 +190,16 @@ class TfExampleFields(object):
...
@@ -144,7 +190,16 @@ class TfExampleFields(object):
object_truncated
=
'image/object/truncated'
object_truncated
=
'image/object/truncated'
object_occluded
=
'image/object/occluded'
object_occluded
=
'image/object/occluded'
object_difficult
=
'image/object/difficult'
object_difficult
=
'image/object/difficult'
object_group_of
=
'image/object/group_of'
object_depiction
=
'image/object/depiction'
object_is_crowd
=
'image/object/is_crowd'
object_is_crowd
=
'image/object/is_crowd'
object_segment_area
=
'image/object/segment/area'
object_segment_area
=
'image/object/segment/area'
instance_masks
=
'image/segmentation/object'
instance_masks
=
'image/segmentation/object'
instance_boundaries
=
'image/boundaries/object'
instance_classes
=
'image/segmentation/object/class'
instance_classes
=
'image/segmentation/object/class'
detection_class_label
=
'image/detection/label'
detection_bbox_ymin
=
'image/detection/bbox/ymin'
detection_bbox_xmin
=
'image/detection/bbox/xmin'
detection_bbox_ymax
=
'image/detection/bbox/ymax'
detection_bbox_xmax
=
'image/detection/bbox/xmax'
detection_score
=
'image/detection/score'
research/object_detection/core/target_assigner.py
View file @
e7de233b
...
@@ -50,7 +50,7 @@ class TargetAssigner(object):
...
@@ -50,7 +50,7 @@ class TargetAssigner(object):
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
unmatched_cls_target
=
None
):
"""Construct
Multibox
Target Assigner.
"""Construct
Object Detection
Target Assigner.
Args:
Args:
similarity_calc: a RegionSimilarityCalculator
similarity_calc: a RegionSimilarityCalculator
...
@@ -108,7 +108,7 @@ class TargetAssigner(object):
...
@@ -108,7 +108,7 @@ class TargetAssigner(object):
Args:
Args:
anchors: a BoxList representing N anchors
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [
num_gt_boxes
, d_1, ... d_k]
groundtruth_labels: a tensor of shape [
M
, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
to None, groundtruth_labels assumes a binary problem where all
...
@@ -140,10 +140,16 @@ class TargetAssigner(object):
...
@@ -140,10 +140,16 @@ class TargetAssigner(object):
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
0
))
0
))
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
shape_assert
=
tf
.
assert_equal
(
tf
.
shape
(
groundtruth_labels
)[
1
:],
unmatched_shape_assert
=
tf
.
assert_equal
(
tf
.
shape
(
self
.
_unmatched_cls_target
))
tf
.
shape
(
groundtruth_labels
)[
1
:],
tf
.
shape
(
self
.
_unmatched_cls_target
),
message
=
'Unmatched class target shape incompatible '
with
tf
.
control_dependencies
([
shape_assert
]):
'with groundtruth labels shape!'
)
labels_and_box_shapes_assert
=
tf
.
assert_equal
(
tf
.
shape
(
groundtruth_labels
)[
0
],
groundtruth_boxes
.
num_boxes
(),
message
=
'Groundtruth boxes and labels have incompatible shapes!'
)
with
tf
.
control_dependencies
(
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
anchors
)
anchors
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
**
params
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
**
params
)
...
@@ -316,8 +322,8 @@ class TargetAssigner(object):
...
@@ -316,8 +322,8 @@ class TargetAssigner(object):
return
self
.
_box_coder
return
self
.
_box_coder
# TODO: This method pulls in all the implementation dependencies into
core.
# TODO: This method pulls in all the implementation dependencies into
# Therefore its best to have this factory method outside of core.
#
core.
Therefore its best to have this factory method outside of core.
def
create_target_assigner
(
reference
,
stage
=
None
,
def
create_target_assigner
(
reference
,
stage
=
None
,
positive_class_weight
=
1.0
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
...
...
research/object_detection/core/target_assigner_test.py
View file @
e7de233b
...
@@ -327,6 +327,41 @@ class TargetAssignerTest(tf.test.TestCase):
...
@@ -327,6 +327,41 @@ class TargetAssignerTest(tf.test.TestCase):
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_raises_error_on_incompatible_groundtruth_boxes_and_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.0
,
0.0
,
0.5
,
0.8
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
_
)
=
result
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesWithPredicateMatch
(
tf
.
errors
.
InvalidArgumentError
,
'Groundtruth boxes and labels have incompatible shapes!'
):
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
])
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment