Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
44fa1d37
Commit
44fa1d37
authored
Jun 29, 2017
by
Alex Lee
Browse files
Merge remote-tracking branch 'upstream/master'
parents
d3628a74
6e367f67
Changes
296
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
8526 additions
and
0 deletions
+8526
-0
object_detection/core/keypoint_ops_test.py
object_detection/core/keypoint_ops_test.py
+168
-0
object_detection/core/losses.py
object_detection/core/losses.py
+551
-0
object_detection/core/losses_test.py
object_detection/core/losses_test.py
+562
-0
object_detection/core/matcher.py
object_detection/core/matcher.py
+213
-0
object_detection/core/matcher_test.py
object_detection/core/matcher_test.py
+150
-0
object_detection/core/minibatch_sampler.py
object_detection/core/minibatch_sampler.py
+90
-0
object_detection/core/minibatch_sampler_test.py
object_detection/core/minibatch_sampler_test.py
+82
-0
object_detection/core/model.py
object_detection/core/model.py
+252
-0
object_detection/core/post_processing.py
object_detection/core/post_processing.py
+298
-0
object_detection/core/post_processing_test.py
object_detection/core/post_processing_test.py
+673
-0
object_detection/core/prefetcher.py
object_detection/core/prefetcher.py
+61
-0
object_detection/core/prefetcher_test.py
object_detection/core/prefetcher_test.py
+101
-0
object_detection/core/preprocessor.py
object_detection/core/preprocessor.py
+1921
-0
object_detection/core/preprocessor_test.py
object_detection/core/preprocessor_test.py
+1751
-0
object_detection/core/region_similarity_calculator.py
object_detection/core/region_similarity_calculator.py
+114
-0
object_detection/core/region_similarity_calculator_test.py
object_detection/core/region_similarity_calculator_test.py
+75
-0
object_detection/core/standard_fields.py
object_detection/core/standard_fields.py
+150
-0
object_detection/core/target_assigner.py
object_detection/core/target_assigner.py
+449
-0
object_detection/core/target_assigner_test.py
object_detection/core/target_assigner_test.py
+682
-0
object_detection/create_pascal_tf_record.py
object_detection/create_pascal_tf_record.py
+183
-0
No files found.
Too many changes to show.
To preserve performance only
296 of 296+
files are displayed.
Plain diff
Email patch
object_detection/core/keypoint_ops_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.keypoint_ops."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
keypoint_ops
class
KeypointOpsTest
(
tf
.
test
.
TestCase
):
"""Tests for common keypoint operations."""
def
test_scale
(
self
):
keypoints
=
tf
.
constant
([
[[
0.0
,
0.0
],
[
100.0
,
200.0
]],
[[
50.0
,
120.0
],
[
100.0
,
140.0
]]
])
y_scale
=
tf
.
constant
(
1.0
/
100
)
x_scale
=
tf
.
constant
(
1.0
/
200
)
expected_keypoints
=
tf
.
constant
([
[[
0.
,
0.
],
[
1.0
,
1.0
]],
[[
0.5
,
0.6
],
[
1.0
,
0.7
]]
])
output
=
keypoint_ops
.
scale
(
keypoints
,
y_scale
,
x_scale
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_clip_to_window
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
expected_keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.25
],
[
0.75
,
0.75
]]
])
output
=
keypoint_ops
.
clip_to_window
(
keypoints
,
window
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_prune_outside_window
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
expected_keypoints
=
tf
.
constant
([[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
]]])
output
=
keypoint_ops
.
prune_outside_window
(
keypoints
,
window
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_change_coordinate_frame
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
window
=
tf
.
constant
([
0.25
,
0.25
,
0.75
,
0.75
])
expected_keypoints
=
tf
.
constant
([
[[
0
,
0.5
],
[
1.0
,
1.0
]],
[[
0.5
,
-
0.5
],
[
1.5
,
1.5
]]
])
output
=
keypoint_ops
.
change_coordinate_frame
(
keypoints
,
window
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_to_normalized_coordinates
(
self
):
keypoints
=
tf
.
constant
([
[[
10.
,
30.
],
[
30.
,
45.
]],
[[
20.
,
0.
],
[
40.
,
60.
]]
])
output
=
keypoint_ops
.
to_normalized_coordinates
(
keypoints
,
40
,
60
)
expected_keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_to_normalized_coordinates_already_normalized
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
output
=
keypoint_ops
.
to_normalized_coordinates
(
keypoints
,
40
,
60
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesOpError
(
'assertion failed'
):
sess
.
run
(
output
)
def
test_to_absolute_coordinates
(
self
):
keypoints
=
tf
.
constant
([
[[
0.25
,
0.5
],
[
0.75
,
0.75
]],
[[
0.5
,
0.0
],
[
1.0
,
1.0
]]
])
output
=
keypoint_ops
.
to_absolute_coordinates
(
keypoints
,
40
,
60
)
expected_keypoints
=
tf
.
constant
([
[[
10.
,
30.
],
[
30.
,
45.
]],
[[
20.
,
0.
],
[
40.
,
60.
]]
])
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
def
test_to_absolute_coordinates_already_absolute
(
self
):
keypoints
=
tf
.
constant
([
[[
10.
,
30.
],
[
30.
,
45.
]],
[[
20.
,
0.
],
[
40.
,
60.
]]
])
output
=
keypoint_ops
.
to_absolute_coordinates
(
keypoints
,
40
,
60
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesOpError
(
'assertion failed'
):
sess
.
run
(
output
)
def
test_flip_horizontal
(
self
):
keypoints
=
tf
.
constant
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
[[
0.4
,
0.4
],
[
0.5
,
0.5
],
[
0.6
,
0.6
]]
])
flip_permutation
=
[
0
,
2
,
1
]
expected_keypoints
=
tf
.
constant
([
[[
0.1
,
0.9
],
[
0.3
,
0.7
],
[
0.2
,
0.8
]],
[[
0.4
,
0.6
],
[
0.6
,
0.4
],
[
0.5
,
0.5
]],
])
output
=
keypoint_ops
.
flip_horizontal
(
keypoints
,
0.5
,
flip_permutation
)
with
self
.
test_session
()
as
sess
:
output_
,
expected_keypoints_
=
sess
.
run
([
output
,
expected_keypoints
])
self
.
assertAllClose
(
output_
,
expected_keypoints_
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/losses.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Classification and regression loss functions for object detection.
Localization losses:
* WeightedL2LocalizationLoss
* WeightedSmoothL1LocalizationLoss
* WeightedIOULocalizationLoss
Classification losses:
* WeightedSigmoidClassificationLoss
* WeightedSoftmaxClassificationLoss
* BootstrappedSigmoidClassificationLoss
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.utils
import
ops
slim
=
tf
.
contrib
.
slim
class
Loss
(
object
):
"""Abstract base class for loss functions."""
__metaclass__
=
ABCMeta
def
__call__
(
self
,
prediction_tensor
,
target_tensor
,
ignore_nan_targets
=
False
,
scope
=
None
,
**
params
):
"""Call the loss function.
Args:
prediction_tensor: a tensor representing predicted quantities.
target_tensor: a tensor representing regression or classification targets.
ignore_nan_targets: whether to ignore nan targets in the loss computation.
E.g. can be used if the target tensor is missing groundtruth data that
shouldn't be factored into the loss.
scope: Op scope name. Defaults to 'Loss' if None.
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: a tensor representing the value of the loss function.
"""
with
tf
.
name_scope
(
scope
,
'Loss'
,
[
prediction_tensor
,
target_tensor
,
params
])
as
scope
:
if
ignore_nan_targets
:
target_tensor
=
tf
.
where
(
tf
.
is_nan
(
target_tensor
),
prediction_tensor
,
target_tensor
)
return
self
.
_compute_loss
(
prediction_tensor
,
target_tensor
,
**
params
)
@
abstractmethod
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
**
params
):
"""Method to be overriden by implementations.
Args:
prediction_tensor: a tensor representing predicted quantities
target_tensor: a tensor representing regression or classification targets
**params: Additional keyword arguments for specific implementations of
the Loss.
Returns:
loss: a tensor representing the value of the loss function
"""
pass
class
WeightedL2LocalizationLoss
(
Loss
):
"""L2 localization loss function with anchorwise output support.
Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
"""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
weighted_diff
=
(
prediction_tensor
-
target_tensor
)
*
tf
.
expand_dims
(
weights
,
2
)
square_diff
=
0.5
*
tf
.
square
(
weighted_diff
)
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
square_diff
,
2
)
return
tf
.
reduce_sum
(
square_diff
)
class
WeightedSmoothL1LocalizationLoss
(
Loss
):
"""Smooth L1 localization loss function.
The smooth L1_loss is defined elementwise as .5 x^2 if |x|<1 and |x|-.5
otherwise, where x is the difference between predictions and target.
See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
"""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the (encoded) predicted locations of objects.
target_tensor: A float tensor of shape [batch_size, num_anchors,
code_size] representing the regression targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
diff
=
prediction_tensor
-
target_tensor
abs_diff
=
tf
.
abs
(
diff
)
abs_diff_lt_1
=
tf
.
less
(
abs_diff
,
1
)
anchorwise_smooth_l1norm
=
tf
.
reduce_sum
(
tf
.
where
(
abs_diff_lt_1
,
0.5
*
tf
.
square
(
abs_diff
),
abs_diff
-
0.5
),
2
)
*
weights
if
self
.
_anchorwise_output
:
return
anchorwise_smooth_l1norm
return
tf
.
reduce_sum
(
anchorwise_smooth_l1norm
)
class
WeightedIOULocalizationLoss
(
Loss
):
"""IOU localization loss function.
Sums the IOU for corresponding pairs of predicted/groundtruth boxes
and for each pair assign a loss of 1 - IOU. We then compute a weighted
sum over all pairs which is returned as the total loss.
"""
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
representing the decoded predicted boxes
target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
representing the decoded target boxes
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
predicted_boxes
=
box_list
.
BoxList
(
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
4
]))
target_boxes
=
box_list
.
BoxList
(
tf
.
reshape
(
target_tensor
,
[
-
1
,
4
]))
per_anchor_iou_loss
=
1.0
-
box_list_ops
.
matched_iou
(
predicted_boxes
,
target_boxes
)
return
tf
.
reduce_sum
(
tf
.
reshape
(
weights
,
[
-
1
])
*
per_anchor_iou_loss
)
class
WeightedSigmoidClassificationLoss
(
Loss
):
"""Sigmoid cross entropy classification loss function."""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Outputs loss per anchor. (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
,
class_indices
=
None
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
class_indices: (Optional) A 1-D integer tensor of class indices.
If provided, computes loss only for the specified class indices.
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
weights
=
tf
.
expand_dims
(
weights
,
2
)
if
class_indices
is
not
None
:
weights
*=
tf
.
reshape
(
ops
.
indices_to_dense_vector
(
class_indices
,
tf
.
shape
(
prediction_tensor
)[
2
]),
[
1
,
1
,
-
1
])
per_entry_cross_ent
=
(
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
target_tensor
,
logits
=
prediction_tensor
))
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
,
2
)
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
weights
)
class
WeightedSoftmaxClassificationLoss
(
Loss
):
"""Softmax loss function."""
def
__init__
(
self
,
anchorwise_output
=
False
):
"""Constructor.
Args:
anchorwise_output: Whether to output loss per anchor (default False)
"""
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
"""
num_classes
=
prediction_tensor
.
get_shape
().
as_list
()[
-
1
]
per_row_cross_ent
=
(
tf
.
nn
.
softmax_cross_entropy_with_logits
(
labels
=
tf
.
reshape
(
target_tensor
,
[
-
1
,
num_classes
]),
logits
=
tf
.
reshape
(
prediction_tensor
,
[
-
1
,
num_classes
])))
if
self
.
_anchorwise_output
:
return
tf
.
reshape
(
per_row_cross_ent
,
tf
.
shape
(
weights
))
*
weights
return
tf
.
reduce_sum
(
per_row_cross_ent
*
tf
.
reshape
(
weights
,
[
-
1
]))
class
BootstrappedSigmoidClassificationLoss
(
Loss
):
"""Bootstrapped sigmoid cross entropy classification loss function.
This loss uses a convex combination of training labels and the current model's
predictions as training targets in the classification loss. The idea is that
as the model improves over time, its predictions can be trusted more and we
can use these predictions to mitigate the damage of noisy/incorrect labels,
because incorrect labels are likely to be eventually highly inconsistent with
other stimuli predicted to have the same label by the model.
In "soft" bootstrapping, we use all predicted class probabilities, whereas in
"hard" bootstrapping, we use the single class favored by the model.
See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
Reed et al. (ICLR 2015).
"""
def
__init__
(
self
,
alpha
,
bootstrap_type
=
'soft'
,
anchorwise_output
=
False
):
"""Constructor.
Args:
alpha: a float32 scalar tensor between 0 and 1 representing interpolation
weight
bootstrap_type: set to either 'hard' or 'soft' (default)
anchorwise_output: Outputs loss per anchor. (default False)
Raises:
ValueError: if bootstrap_type is not either 'hard' or 'soft'
"""
if
bootstrap_type
!=
'hard'
and
bootstrap_type
!=
'soft'
:
raise
ValueError
(
'Unrecognized bootstrap_type: must be one of '
'
\'
hard
\'
or
\'
soft.
\'
'
)
self
.
_alpha
=
alpha
self
.
_bootstrap_type
=
bootstrap_type
self
.
_anchorwise_output
=
anchorwise_output
def
_compute_loss
(
self
,
prediction_tensor
,
target_tensor
,
weights
):
"""Compute loss function.
Args:
prediction_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing the predicted logits for each class
target_tensor: A float tensor of shape [batch_size, num_anchors,
num_classes] representing one-hot encoded classification targets
weights: a float tensor of shape [batch_size, num_anchors]
Returns:
loss: a (scalar) tensor representing the value of the loss function
or a float tensor of shape [batch_size, num_anchors]
"""
if
self
.
_bootstrap_type
==
'soft'
:
bootstrap_target_tensor
=
self
.
_alpha
*
target_tensor
+
(
1.0
-
self
.
_alpha
)
*
tf
.
sigmoid
(
prediction_tensor
)
else
:
bootstrap_target_tensor
=
self
.
_alpha
*
target_tensor
+
(
1.0
-
self
.
_alpha
)
*
tf
.
cast
(
tf
.
sigmoid
(
prediction_tensor
)
>
0.5
,
tf
.
float32
)
per_entry_cross_ent
=
(
tf
.
nn
.
sigmoid_cross_entropy_with_logits
(
labels
=
bootstrap_target_tensor
,
logits
=
prediction_tensor
))
if
self
.
_anchorwise_output
:
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
tf
.
expand_dims
(
weights
,
2
),
2
)
return
tf
.
reduce_sum
(
per_entry_cross_ent
*
tf
.
expand_dims
(
weights
,
2
))
class
HardExampleMiner
(
object
):
"""Hard example mining for regions in a list of images.
Implements hard example mining to select a subset of regions to be
back-propagated. For each image, selects the regions with highest losses,
subject to the condition that a newly selected region cannot have
an IOU > iou_threshold with any of the previously selected regions.
This can be achieved by re-using a greedy non-maximum suppression algorithm.
A constraint on the number of negatives mined per positive region can also be
enforced.
Reference papers: "Training Region-based Object Detectors with Online
Hard Example Mining" (CVPR 2016) by Srivastava et al., and
"SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
"""
def
__init__
(
self
,
num_hard_examples
=
64
,
iou_threshold
=
0.7
,
loss_type
=
'both'
,
cls_loss_weight
=
0.05
,
loc_loss_weight
=
0.06
,
max_negatives_per_positive
=
None
,
min_negatives_per_image
=
0
):
"""Constructor.
The hard example mining implemented by this class can replicate the behavior
in the two aforementioned papers (Srivastava et al., and Liu et al).
To replicate the A2 paper (Srivastava et al), num_hard_examples is set
to a fixed parameter (64 by default) and iou_threshold is set to .7 for
running non-max-suppression the predicted boxes prior to hard mining.
In order to replicate the SSD paper (Liu et al), num_hard_examples should
be set to None, max_negatives_per_positive should be 3 and iou_threshold
should be 1.0 (in order to effectively turn off NMS).
Args:
num_hard_examples: maximum number of hard examples to be
selected per image (prior to enforcing max negative to positive ratio
constraint). If set to None, all examples obtained after NMS are
considered.
iou_threshold: minimum intersection over union for an example
to be discarded during NMS.
loss_type: use only classification losses ('cls', default),
localization losses ('loc') or both losses ('both').
In the last case, cls_loss_weight and loc_loss_weight are used to
compute weighted sum of the two losses.
cls_loss_weight: weight for classification loss.
loc_loss_weight: weight for location loss.
max_negatives_per_positive: maximum number of negatives to retain for
each positive anchor. By default, num_negatives_per_positive is None,
which means that we do not enforce a prespecified negative:positive
ratio. Note also that num_negatives_per_positives can be a float
(and will be converted to be a float even if it is passed in otherwise).
min_negatives_per_image: minimum number of negative anchors to sample for
a given image. Setting this to a positive number allows sampling
negatives in an image without any positive anchors and thus not biased
towards at least one detection per image.
"""
self
.
_num_hard_examples
=
num_hard_examples
self
.
_iou_threshold
=
iou_threshold
self
.
_loss_type
=
loss_type
self
.
_cls_loss_weight
=
cls_loss_weight
self
.
_loc_loss_weight
=
loc_loss_weight
self
.
_max_negatives_per_positive
=
max_negatives_per_positive
self
.
_min_negatives_per_image
=
min_negatives_per_image
if
self
.
_max_negatives_per_positive
is
not
None
:
self
.
_max_negatives_per_positive
=
float
(
self
.
_max_negatives_per_positive
)
self
.
_num_positives_list
=
None
self
.
_num_negatives_list
=
None
def
__call__
(
self
,
location_losses
,
cls_losses
,
decoded_boxlist_list
,
match_list
=
None
):
"""Computes localization and classification losses after hard mining.
Args:
location_losses: a float tensor of shape [num_images, num_anchors]
representing anchorwise localization losses.
cls_losses: a float tensor of shape [num_images, num_anchors]
representing anchorwise classification losses.
decoded_boxlist_list: a list of decoded BoxList representing location
predictions for each image.
match_list: an optional list of matcher.Match objects encoding the match
between anchors and groundtruth boxes for each image of the batch,
with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors. Match objects in match_list are
used to reference which anchors are positive, negative or ignored. If
self._max_negatives_per_positive exists, these are then used to enforce
a prespecified negative to positive ratio.
Returns:
mined_location_loss: a float scalar with sum of localization losses from
selected hard examples.
mined_cls_loss: a float scalar with sum of classification losses from
selected hard examples.
Raises:
ValueError: if location_losses, cls_losses and decoded_boxlist_list do
not have compatible shapes (i.e., they must correspond to the same
number of images).
ValueError: if match_list is specified but its length does not match
len(decoded_boxlist_list).
"""
mined_location_losses
=
[]
mined_cls_losses
=
[]
location_losses
=
tf
.
unstack
(
location_losses
)
cls_losses
=
tf
.
unstack
(
cls_losses
)
num_images
=
len
(
decoded_boxlist_list
)
if
not
match_list
:
match_list
=
num_images
*
[
None
]
if
not
len
(
location_losses
)
==
len
(
decoded_boxlist_list
)
==
len
(
cls_losses
):
raise
ValueError
(
'location_losses, cls_losses and decoded_boxlist_list '
'do not have compatible shapes.'
)
if
not
isinstance
(
match_list
,
list
):
raise
ValueError
(
'match_list must be a list.'
)
if
len
(
match_list
)
!=
len
(
decoded_boxlist_list
):
raise
ValueError
(
'match_list must either be None or have '
'length=len(decoded_boxlist_list).'
)
num_positives_list
=
[]
num_negatives_list
=
[]
for
ind
,
detection_boxlist
in
enumerate
(
decoded_boxlist_list
):
box_locations
=
detection_boxlist
.
get
()
match
=
match_list
[
ind
]
image_losses
=
cls_losses
[
ind
]
if
self
.
_loss_type
==
'loc'
:
image_losses
=
location_losses
[
ind
]
elif
self
.
_loss_type
==
'both'
:
image_losses
*=
self
.
_cls_loss_weight
image_losses
+=
location_losses
[
ind
]
*
self
.
_loc_loss_weight
if
self
.
_num_hard_examples
is
not
None
:
num_hard_examples
=
self
.
_num_hard_examples
else
:
num_hard_examples
=
detection_boxlist
.
num_boxes
()
selected_indices
=
tf
.
image
.
non_max_suppression
(
box_locations
,
image_losses
,
num_hard_examples
,
self
.
_iou_threshold
)
if
self
.
_max_negatives_per_positive
is
not
None
and
match
:
(
selected_indices
,
num_positives
,
num_negatives
)
=
self
.
_subsample_selection_to_desired_neg_pos_ratio
(
selected_indices
,
match
,
self
.
_max_negatives_per_positive
,
self
.
_min_negatives_per_image
)
num_positives_list
.
append
(
num_positives
)
num_negatives_list
.
append
(
num_negatives
)
mined_location_losses
.
append
(
tf
.
reduce_sum
(
tf
.
gather
(
location_losses
[
ind
],
selected_indices
)))
mined_cls_losses
.
append
(
tf
.
reduce_sum
(
tf
.
gather
(
cls_losses
[
ind
],
selected_indices
)))
location_loss
=
tf
.
reduce_sum
(
tf
.
stack
(
mined_location_losses
))
cls_loss
=
tf
.
reduce_sum
(
tf
.
stack
(
mined_cls_losses
))
if
match
and
self
.
_max_negatives_per_positive
:
self
.
_num_positives_list
=
num_positives_list
self
.
_num_negatives_list
=
num_negatives_list
return
(
location_loss
,
cls_loss
)
def
summarize
(
self
):
"""Summarize the number of positives and negatives after mining."""
if
self
.
_num_positives_list
and
self
.
_num_negatives_list
:
avg_num_positives
=
tf
.
reduce_mean
(
tf
.
to_float
(
self
.
_num_positives_list
))
avg_num_negatives
=
tf
.
reduce_mean
(
tf
.
to_float
(
self
.
_num_negatives_list
))
tf
.
summary
.
scalar
(
'HardExampleMiner/NumPositives'
,
avg_num_positives
)
tf
.
summary
.
scalar
(
'HardExampleMiner/NumNegatives'
,
avg_num_negatives
)
def
_subsample_selection_to_desired_neg_pos_ratio
(
self
,
indices
,
match
,
max_negatives_per_positive
,
min_negatives_per_image
=
0
):
"""Subsample a collection of selected indices to a desired neg:pos ratio.
This function takes a subset of M indices (indexing into a large anchor
collection of N anchors where M<N) which are labeled as positive/negative
via a Match object (matched indices are positive, unmatched indices
are negative). It returns a subset of the provided indices retaining all
positives as well as up to the first K negatives, where:
K=floor(num_negative_per_positive * num_positives).
For example, if indices=[2, 4, 5, 7, 9, 10] (indexing into 12 anchors),
with positives=[2, 5] and negatives=[4, 7, 9, 10] and
num_negatives_per_positive=1, then the returned subset of indices
is [2, 4, 5, 7].
Args:
indices: An integer tensor of shape [M] representing a collection
of selected anchor indices
match: A matcher.Match object encoding the match between anchors and
groundtruth boxes for a given image, with rows of the Match objects
corresponding to groundtruth boxes and columns corresponding to anchors.
max_negatives_per_positive: (float) maximum number of negatives for
each positive anchor.
min_negatives_per_image: minimum number of negative anchors for a given
image. Allow sampling negatives in image without any positive anchors.
Returns:
selected_indices: An integer tensor of shape [M'] representing a
collection of selected anchor indices with M' <= M.
num_positives: An integer tensor representing the number of positive
examples in selected set of indices.
num_negatives: An integer tensor representing the number of negative
examples in selected set of indices.
"""
positives_indicator
=
tf
.
gather
(
match
.
matched_column_indicator
(),
indices
)
negatives_indicator
=
tf
.
gather
(
match
.
unmatched_column_indicator
(),
indices
)
num_positives
=
tf
.
reduce_sum
(
tf
.
to_int32
(
positives_indicator
))
max_negatives
=
tf
.
maximum
(
min_negatives_per_image
,
tf
.
to_int32
(
max_negatives_per_positive
*
tf
.
to_float
(
num_positives
)))
topk_negatives_indicator
=
tf
.
less_equal
(
tf
.
cumsum
(
tf
.
to_int32
(
negatives_indicator
)),
max_negatives
)
subsampled_selection_indices
=
tf
.
where
(
tf
.
logical_or
(
positives_indicator
,
topk_negatives_indicator
))
num_negatives
=
tf
.
size
(
subsampled_selection_indices
)
-
num_positives
return
(
tf
.
reshape
(
tf
.
gather
(
indices
,
subsampled_selection_indices
),
[
-
1
]),
num_positives
,
num_negatives
)
object_detection/core/losses_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.research.vale.object_detection.losses."""
import
math
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
losses
from
object_detection.core
import
matcher
class
WeightedL2LocalizationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
batch_size
=
3
num_anchors
=
10
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
],
[
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
],
[
1
,
1
,
1
,
1
,
1
,
0
,
0
,
0
,
0
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedL2LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
expected_loss
=
(
3
*
5
*
4
)
/
2.0
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss
)
def
testReturnsCorrectAnchorwiseLoss
(
self
):
batch_size
=
3
num_anchors
=
16
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
ones
([
batch_size
,
num_anchors
])
loss_op
=
losses
.
WeightedL2LocalizationLoss
(
anchorwise_output
=
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
expected_loss
=
np
.
ones
((
batch_size
,
num_anchors
))
*
2
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss
)
def
testReturnsCorrectLossSum
(
self
):
batch_size
=
3
num_anchors
=
16
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
ones
([
batch_size
,
num_anchors
])
loss_op
=
losses
.
WeightedL2LocalizationLoss
(
anchorwise_output
=
False
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
expected_loss
=
tf
.
nn
.
l2_loss
(
prediction_tensor
-
target_tensor
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
expected_loss_output
=
sess
.
run
(
expected_loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss_output
)
def
testReturnsCorrectNanLoss
(
self
):
batch_size
=
3
num_anchors
=
10
code_size
=
4
prediction_tensor
=
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
])
target_tensor
=
tf
.
concat
([
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
/
2
]),
tf
.
ones
([
batch_size
,
num_anchors
,
code_size
/
2
])
*
np
.
nan
],
axis
=
2
)
weights
=
tf
.
ones
([
batch_size
,
num_anchors
])
loss_op
=
losses
.
WeightedL2LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
,
ignore_nan_targets
=
True
)
expected_loss
=
(
3
*
5
*
4
)
/
2.0
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
expected_loss
)
class
WeightedSmoothL1LocalizationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
batch_size
=
2
num_anchors
=
3
code_size
=
4
prediction_tensor
=
tf
.
constant
([[[
2.5
,
0
,
.
4
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
2.5
,
0
,
.
4
]],
[[
3.5
,
0
,
0
,
0
],
[
0
,
.
4
,
0
,
.
9
],
[
0
,
0
,
1.5
,
0
]]],
tf
.
float32
)
target_tensor
=
tf
.
zeros
([
batch_size
,
num_anchors
,
code_size
])
weights
=
tf
.
constant
([[
2
,
1
,
1
],
[
0
,
3
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSmoothL1LocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
7.695
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
WeightedIOULocalizationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
1.5
,
0
,
2.4
,
1
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
.
5
,
.
25
]]])
target_tensor
=
tf
.
constant
([[[
1.5
,
0
,
2.4
,
1
],
[
0
,
0
,
1
,
1
],
[
50
,
50
,
500.5
,
100.25
]]])
weights
=
[[
1.0
,
.
5
,
2.0
]]
loss_op
=
losses
.
WeightedIOULocalizationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
2.0
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
WeightedSigmoidClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
-
2
*
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectLossWithClassIndices
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
,
100
],
[
100
,
-
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
,
100
],
[
-
100
,
-
100
,
100
,
-
100
]],
[[
-
100
,
0
,
100
,
100
],
[
-
100
,
100
,
-
100
,
100
],
[
100
,
100
,
100
,
100
],
[
0
,
0
,
-
1
,
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
,
0
],
[
1
,
0
,
0
,
1
],
[
1
,
0
,
0
,
0
],
[
0
,
0
,
1
,
1
]],
[[
0
,
0
,
1
,
0
],
[
0
,
1
,
0
,
0
],
[
1
,
1
,
1
,
0
],
[
1
,
0
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
# Ignores the last class.
class_indices
=
tf
.
constant
([
0
,
1
,
2
],
tf
.
int32
)
loss_op
=
losses
.
WeightedSigmoidClassificationLoss
(
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
,
class_indices
=
class_indices
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
WeightedSoftmaxClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
0
,
1
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
.
5
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
()
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
-
1.5
*
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
0
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
0
],
[
-
100
,
100
,
-
100
],
[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
0
,
1
,
0
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
.
5
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
loss_op
=
losses
.
WeightedSoftmaxClassificationLoss
(
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
0.5
*
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
BootstrappedSigmoidClassificationLossTest
(
tf
.
test
.
TestCase
):
def
testReturnsCorrectLossSoftBootstrapping
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
0
],
[
100
,
-
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
-
100
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
alpha
=
tf
.
constant
(.
5
,
tf
.
float32
)
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'soft'
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
-
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectLossHardBootstrapping
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
0
],
[
100
,
-
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
-
100
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
alpha
=
tf
.
constant
(.
5
,
tf
.
float32
)
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'hard'
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
-
math
.
log
(.
5
)
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
def
testReturnsCorrectAnchorWiseLoss
(
self
):
prediction_tensor
=
tf
.
constant
([[[
-
100
,
100
,
-
100
],
[
100
,
-
100
,
-
100
],
[
100
,
0
,
-
100
],
[
-
100
,
-
100
,
100
]],
[[
-
100
,
0
,
100
],
[
-
100
,
100
,
-
100
],
[
100
,
100
,
100
],
[
0
,
0
,
-
1
]]],
tf
.
float32
)
target_tensor
=
tf
.
constant
([[[
0
,
1
,
0
],
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
0
,
0
,
1
]],
[[
0
,
0
,
1
],
[
0
,
1
,
0
],
[
1
,
1
,
1
],
[
1
,
0
,
0
]]],
tf
.
float32
)
weights
=
tf
.
constant
([[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
0
]],
tf
.
float32
)
alpha
=
tf
.
constant
(.
5
,
tf
.
float32
)
loss_op
=
losses
.
BootstrappedSigmoidClassificationLoss
(
alpha
,
bootstrap_type
=
'hard'
,
anchorwise_output
=
True
)
loss
=
loss_op
(
prediction_tensor
,
target_tensor
,
weights
=
weights
)
exp_loss
=
np
.
matrix
([[
0
,
0
,
-
math
.
log
(.
5
),
0
],
[
-
math
.
log
(.
5
),
0
,
0
,
0
]])
with
self
.
test_session
()
as
sess
:
loss_output
=
sess
.
run
(
loss
)
self
.
assertAllClose
(
loss_output
,
exp_loss
)
class
HardExampleMinerTest
(
tf
.
test
.
TestCase
):
def
testHardMiningWithSingleLossType
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
],
[
0
,
1
,
2
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
10
,
50
,
110
],
[
9
,
6
,
3
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
]],
tf
.
float32
)
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
# Uses only location loss to select hard examples
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
1
,
iou_threshold
=
0.0
,
loss_type
=
'loc'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
)
exp_loc_loss
=
100
+
3
exp_cls_loss
=
0
+
0
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
def
testHardMiningWithBothLossType
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
],
[
0
,
1
,
2
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
10
,
50
,
110
],
[
9
,
6
,
3
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
]],
tf
.
float32
)
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
1
,
iou_threshold
=
0.0
,
loss_type
=
'both'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
)
exp_loc_loss
=
80
+
0
exp_cls_loss
=
50
+
9
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
def
testHardMiningNMS
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
],
[
0
,
1
,
2
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
10
,
50
,
110
],
[
9
,
6
,
3
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.9
,
0.9
,
0.99
,
0.99
],
[
0.1
,
0.1
,
0.9
,
0.9
],
[
0.1
,
0.1
,
0.9
,
0.9
]],
tf
.
float32
)
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
2
,
iou_threshold
=
0.5
,
loss_type
=
'cls'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
)
exp_loc_loss
=
0
+
90
+
0
+
1
exp_cls_loss
=
110
+
10
+
9
+
6
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
def
testEnforceNegativesPerPositiveRatio
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
,
1
,
2
,
3
,
10
,
20
,
100
,
20
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
0
,
100
,
0
,
90
,
70
,
0
,
60
,
0
,
17
,
13
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.5
,
0.1
],
[
0.0
,
0.0
,
0.6
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.8
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
1.0
,
0.1
],
[
0.0
,
0.0
,
1.1
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
]],
tf
.
float32
)
match_results
=
tf
.
constant
([
2
,
-
1
,
0
,
-
1
,
-
1
,
1
,
-
1
,
-
1
,
-
1
,
-
1
,
-
1
,
3
])
match_list
=
[
matcher
.
Match
(
match_results
)]
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
max_negatives_per_positive_list
=
[
0.0
,
0.5
,
1.0
,
1.5
,
10
]
exp_loc_loss_list
=
[
80
+
2
,
80
+
1
+
2
,
80
+
1
+
2
+
10
,
80
+
1
+
2
+
10
+
100
,
80
+
1
+
2
+
10
+
100
+
20
]
exp_cls_loss_list
=
[
100
+
70
,
100
+
90
+
70
,
100
+
90
+
70
+
60
,
100
+
90
+
70
+
60
+
17
,
100
+
90
+
70
+
60
+
17
+
13
]
for
max_negatives_per_positive
,
exp_loc_loss
,
exp_cls_loss
in
zip
(
max_negatives_per_positive_list
,
exp_loc_loss_list
,
exp_cls_loss_list
):
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
None
,
iou_threshold
=
0.9999
,
loss_type
=
'cls'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
,
max_negatives_per_positive
=
max_negatives_per_positive
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
,
match_list
)
loss_op
.
summarize
()
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
def
testEnforceNegativesPerPositiveRatioWithMinNegativesPerImage
(
self
):
location_losses
=
tf
.
constant
([[
100
,
90
,
80
,
0
,
1
,
2
,
3
,
10
,
20
,
100
,
20
,
3
]],
tf
.
float32
)
cls_losses
=
tf
.
constant
([[
0
,
0
,
100
,
0
,
90
,
70
,
0
,
60
,
0
,
17
,
13
,
0
]],
tf
.
float32
)
box_corners
=
tf
.
constant
([[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.5
,
0.1
],
[
0.0
,
0.0
,
0.6
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
0.8
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
],
[
0.0
,
0.0
,
1.0
,
0.1
],
[
0.0
,
0.0
,
1.1
,
0.1
],
[
0.0
,
0.0
,
0.2
,
0.1
]],
tf
.
float32
)
match_results
=
tf
.
constant
([
-
1
]
*
12
)
match_list
=
[
matcher
.
Match
(
match_results
)]
decoded_boxlist_list
=
[]
decoded_boxlist_list
.
append
(
box_list
.
BoxList
(
box_corners
))
min_negatives_per_image_list
=
[
0
,
1
,
2
,
4
,
5
,
6
]
exp_loc_loss_list
=
[
0
,
80
,
80
+
1
,
80
+
1
+
2
+
10
,
80
+
1
+
2
+
10
+
100
,
80
+
1
+
2
+
10
+
100
+
20
]
exp_cls_loss_list
=
[
0
,
100
,
100
+
90
,
100
+
90
+
70
+
60
,
100
+
90
+
70
+
60
+
17
,
100
+
90
+
70
+
60
+
17
+
13
]
for
min_negatives_per_image
,
exp_loc_loss
,
exp_cls_loss
in
zip
(
min_negatives_per_image_list
,
exp_loc_loss_list
,
exp_cls_loss_list
):
loss_op
=
losses
.
HardExampleMiner
(
num_hard_examples
=
None
,
iou_threshold
=
0.9999
,
loss_type
=
'cls'
,
cls_loss_weight
=
1
,
loc_loss_weight
=
1
,
max_negatives_per_positive
=
3
,
min_negatives_per_image
=
min_negatives_per_image
)
(
loc_loss
,
cls_loss
)
=
loss_op
(
location_losses
,
cls_losses
,
decoded_boxlist_list
,
match_list
)
with
self
.
test_session
()
as
sess
:
loc_loss_output
=
sess
.
run
(
loc_loss
)
self
.
assertAllClose
(
loc_loss_output
,
exp_loc_loss
)
cls_loss_output
=
sess
.
run
(
cls_loss
)
self
.
assertAllClose
(
cls_loss_output
,
exp_cls_loss
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/matcher.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the
matcher is to match row and column indices based on the similarity matrix and
other optional parameters. Each column is matched to at most one row. There
are three possibilities for the matching:
1) match: A column matches a row.
2) no_match: A column does not match any row.
3) ignore: A column that is neither 'match' nor no_match.
The ignore case is regularly encountered in object detection: when an anchor has
a relatively small overlap with a ground-truth box, one neither wants to
consider this box a positive example (match) nor a negative example (no match).
The Match class is used to store the match results and it provides simple apis
to query the results.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
class
Match
(
object
):
"""Class to store results from the matcher.
This class is used to store the results from the matcher. It provides
convenient methods to query the matching results.
"""
def
__init__
(
self
,
match_results
):
"""Constructs a Match object.
Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
Raises:
ValueError: if match_results does not have rank 1 or is not an
integer int32 scalar tensor
"""
if
match_results
.
shape
.
ndims
!=
1
:
raise
ValueError
(
'match_results should have rank 1'
)
if
match_results
.
dtype
!=
tf
.
int32
:
raise
ValueError
(
'match_results should be an int32 or int64 scalar '
'tensor'
)
self
.
_match_results
=
match_results
@
property
def
match_results
(
self
):
"""The accessor for match results.
Returns:
the tensor which encodes the match results.
"""
return
self
.
_match_results
def
matched_column_indices
(
self
):
"""Returns column indices that match to some row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
greater
(
self
.
_match_results
,
-
1
)))
def
matched_column_indicator
(
self
):
"""Returns column indices that are matched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
tf
.
greater_equal
(
self
.
_match_results
,
0
)
def
num_matched_columns
(
self
):
"""Returns number (int32 scalar tensor) of matched columns."""
return
tf
.
size
(
self
.
matched_column_indices
())
def
unmatched_column_indices
(
self
):
"""Returns column indices that do not match any row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
equal
(
self
.
_match_results
,
-
1
)))
def
unmatched_column_indicator
(
self
):
"""Returns column indices that are unmatched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
tf
.
equal
(
self
.
_match_results
,
-
1
)
def
num_unmatched_columns
(
self
):
"""Returns number (int32 scalar tensor) of unmatched columns."""
return
tf
.
size
(
self
.
unmatched_column_indices
())
def
ignored_column_indices
(
self
):
"""Returns column indices that are ignored (neither Matched nor Unmatched).
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
self
.
ignored_column_indicator
()))
def
ignored_column_indicator
(
self
):
"""Returns boolean column indicator where True means the colum is ignored.
Returns:
column_indicator: boolean vector which is True for all ignored column
indices.
"""
return
tf
.
equal
(
self
.
_match_results
,
-
2
)
def
num_ignored_columns
(
self
):
"""Returns number (int32 scalar tensor) of matched columns."""
return
tf
.
size
(
self
.
ignored_column_indices
())
def
unmatched_or_ignored_column_indices
(
self
):
"""Returns column indices that are unmatched or ignored.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
greater
(
0
,
self
.
_match_results
)))
def
matched_row_indices
(
self
):
"""Returns row indices that match some column.
The indices returned by this op are ordered so as to be in correspondence
with the output of matched_column_indicator(). For example if
self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
[7, 3], then we know that column 0 was matched to row 7 and column 2 was
matched to row 3.
Returns:
row_indices: int32 tensor of shape [K] with row indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
gather
(
self
.
_match_results
,
self
.
matched_column_indices
()))
def
_reshape_and_cast
(
self
,
t
):
return
tf
.
cast
(
tf
.
reshape
(
t
,
[
-
1
]),
tf
.
int32
)
class
Matcher
(
object
):
"""Abstract base class for matcher.
"""
__metaclass__
=
ABCMeta
def
match
(
self
,
similarity_matrix
,
scope
=
None
,
**
params
):
"""Computes matches among row and column indices and returns the result.
Computes matches among the row and column indices based on the similarity
matrix and optional arguments.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
A Match object with the results of matching.
"""
with
tf
.
name_scope
(
scope
,
'Match'
,
[
similarity_matrix
,
params
])
as
scope
:
return
Match
(
self
.
_match
(
similarity_matrix
,
**
params
))
@
abstractmethod
def
_match
(
self
,
similarity_matrix
,
**
params
):
"""Method to be overriden by implementations.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
that column i is matched to row match_results[i], match_results[i]=-1
means that the column is not matched. match_results[i]=-2 means that
the column is ignored (usually this happens when there is a very weak
match which one neither wants as positive nor negative example).
"""
pass
object_detection/core/matcher_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.matcher."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
matcher
class
AnchorMatcherTest
(
tf
.
test
.
TestCase
):
def
test_get_correct_matched_columnIndices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indices
=
[
0
,
1
,
3
,
5
]
matched_column_indices
=
match
.
matched_column_indices
()
self
.
assertEquals
(
matched_column_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
matched_column_indices
=
sess
.
run
(
matched_column_indices
)
self
.
assertAllEqual
(
matched_column_indices
,
expected_column_indices
)
def
test_get_correct_counts
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
exp_num_matched_columns
=
4
exp_num_unmatched_columns
=
2
exp_num_ignored_columns
=
1
num_matched_columns
=
match
.
num_matched_columns
()
num_unmatched_columns
=
match
.
num_unmatched_columns
()
num_ignored_columns
=
match
.
num_ignored_columns
()
self
.
assertEquals
(
num_matched_columns
.
dtype
,
tf
.
int32
)
self
.
assertEquals
(
num_unmatched_columns
.
dtype
,
tf
.
int32
)
self
.
assertEquals
(
num_ignored_columns
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
(
num_matched_columns_out
,
num_unmatched_columns_out
,
num_ignored_columns_out
)
=
sess
.
run
(
[
num_matched_columns
,
num_unmatched_columns
,
num_ignored_columns
])
self
.
assertAllEqual
(
num_matched_columns_out
,
exp_num_matched_columns
)
self
.
assertAllEqual
(
num_unmatched_columns_out
,
exp_num_unmatched_columns
)
self
.
assertAllEqual
(
num_ignored_columns_out
,
exp_num_ignored_columns
)
def
testGetCorrectUnmatchedColumnIndices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indices
=
[
2
,
4
]
unmatched_column_indices
=
match
.
unmatched_column_indices
()
self
.
assertEquals
(
unmatched_column_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
unmatched_column_indices
=
sess
.
run
(
unmatched_column_indices
)
self
.
assertAllEqual
(
unmatched_column_indices
,
expected_column_indices
)
def
testGetCorrectMatchedRowIndices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_row_indices
=
[
3
,
1
,
0
,
5
]
matched_row_indices
=
match
.
matched_row_indices
()
self
.
assertEquals
(
matched_row_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
matched_row_inds
=
sess
.
run
(
matched_row_indices
)
self
.
assertAllEqual
(
matched_row_inds
,
expected_row_indices
)
def
test_get_correct_ignored_column_indices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indices
=
[
6
]
ignored_column_indices
=
match
.
ignored_column_indices
()
self
.
assertEquals
(
ignored_column_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
ignored_column_indices
=
sess
.
run
(
ignored_column_indices
)
self
.
assertAllEqual
(
ignored_column_indices
,
expected_column_indices
)
def
test_get_correct_matched_column_indicator
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indicator
=
[
True
,
True
,
False
,
True
,
False
,
True
,
False
]
matched_column_indicator
=
match
.
matched_column_indicator
()
self
.
assertEquals
(
matched_column_indicator
.
dtype
,
tf
.
bool
)
with
self
.
test_session
()
as
sess
:
matched_column_indicator
=
sess
.
run
(
matched_column_indicator
)
self
.
assertAllEqual
(
matched_column_indicator
,
expected_column_indicator
)
def
test_get_correct_unmatched_column_indicator
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indicator
=
[
False
,
False
,
True
,
False
,
True
,
False
,
False
]
unmatched_column_indicator
=
match
.
unmatched_column_indicator
()
self
.
assertEquals
(
unmatched_column_indicator
.
dtype
,
tf
.
bool
)
with
self
.
test_session
()
as
sess
:
unmatched_column_indicator
=
sess
.
run
(
unmatched_column_indicator
)
self
.
assertAllEqual
(
unmatched_column_indicator
,
expected_column_indicator
)
def
test_get_correct_ignored_column_indicator
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indicator
=
[
False
,
False
,
False
,
False
,
False
,
False
,
True
]
ignored_column_indicator
=
match
.
ignored_column_indicator
()
self
.
assertEquals
(
ignored_column_indicator
.
dtype
,
tf
.
bool
)
with
self
.
test_session
()
as
sess
:
ignored_column_indicator
=
sess
.
run
(
ignored_column_indicator
)
self
.
assertAllEqual
(
ignored_column_indicator
,
expected_column_indicator
)
def
test_get_correct_unmatched_ignored_column_indices
(
self
):
match_results
=
tf
.
constant
([
3
,
1
,
-
1
,
0
,
-
1
,
5
,
-
2
])
match
=
matcher
.
Match
(
match_results
)
expected_column_indices
=
[
2
,
4
,
6
]
unmatched_ignored_column_indices
=
(
match
.
unmatched_or_ignored_column_indices
())
self
.
assertEquals
(
unmatched_ignored_column_indices
.
dtype
,
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
unmatched_ignored_column_indices
=
sess
.
run
(
unmatched_ignored_column_indices
)
self
.
assertAllEqual
(
unmatched_ignored_column_indices
,
expected_column_indices
)
def
test_all_columns_accounted_for
(
self
):
# Note: deliberately setting to small number so not always
# all possibilities appear (matched, unmatched, ignored)
num_matches
=
10
match_results
=
tf
.
random_uniform
(
[
num_matches
],
minval
=-
2
,
maxval
=
5
,
dtype
=
tf
.
int32
)
match
=
matcher
.
Match
(
match_results
)
matched_column_indices
=
match
.
matched_column_indices
()
unmatched_column_indices
=
match
.
unmatched_column_indices
()
ignored_column_indices
=
match
.
ignored_column_indices
()
with
self
.
test_session
()
as
sess
:
matched
,
unmatched
,
ignored
=
sess
.
run
([
matched_column_indices
,
unmatched_column_indices
,
ignored_column_indices
])
all_indices
=
np
.
hstack
((
matched
,
unmatched
,
ignored
))
all_indices_sorted
=
np
.
sort
(
all_indices
)
self
.
assertAllEqual
(
all_indices_sorted
,
np
.
arange
(
num_matches
,
dtype
=
np
.
int32
))
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/minibatch_sampler.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base minibatch sampler module.
The job of the minibatch_sampler is to subsample a minibatch based on some
criterion.
The main function call is:
subsample(indicator, batch_size, **params).
Indicator is a 1d boolean tensor where True denotes which examples can be
sampled. It returns a boolean indicator where True denotes an example has been
sampled..
Subclasses should implement the Subsample function and can make use of the
@staticmethod SubsampleIndicator.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
from
object_detection.utils
import
ops
class
MinibatchSampler
(
object
):
"""Abstract base class for subsampling minibatches."""
__metaclass__
=
ABCMeta
def
__init__
(
self
):
"""Constructs a minibatch sampler."""
pass
@
abstractmethod
def
subsample
(
self
,
indicator
,
batch_size
,
**
params
):
"""Returns subsample of entries in indicator.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
**params: additional keyword arguments for specific implementations of
the MinibatchSampler.
Returns:
sample_indicator: boolean tensor of shape [N] whose True entries have been
sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
"""
pass
@
staticmethod
def
subsample_indicator
(
indicator
,
num_samples
):
"""Subsample indicator vector.
Given a boolean indicator vector with M elements set to `True`, the function
assigns all but `num_samples` of these previously `True` elements to
`False`. If `num_samples` is greater than M, the original indicator vector
is returned.
Args:
indicator: a 1-dimensional boolean tensor indicating which elements
are allowed to be sampled and which are not.
num_samples: int32 scalar tensor
Returns:
a boolean tensor with the same shape as input (indicator) tensor
"""
indices
=
tf
.
where
(
indicator
)
indices
=
tf
.
random_shuffle
(
indices
)
indices
=
tf
.
reshape
(
indices
,
[
-
1
])
num_samples
=
tf
.
minimum
(
tf
.
size
(
indices
),
num_samples
)
selected_indices
=
tf
.
slice
(
indices
,
[
0
],
tf
.
reshape
(
num_samples
,
[
1
]))
selected_indicator
=
ops
.
indices_to_dense_vector
(
selected_indices
,
tf
.
shape
(
indicator
)[
0
])
return
tf
.
equal
(
selected_indicator
,
1
)
object_detection/core/minibatch_sampler_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
minibatch_sampler
class
MinibatchSamplerTest
(
tf
.
test
.
TestCase
):
def
test_subsample_indicator_when_more_true_elements_than_num_samples
(
self
):
np_indicator
=
[
True
,
False
,
True
,
False
,
True
,
True
,
False
]
indicator
=
tf
.
constant
(
np_indicator
)
samples
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator
,
3
)
with
self
.
test_session
()
as
sess
:
samples_out
=
sess
.
run
(
samples
)
self
.
assertTrue
(
np
.
sum
(
samples_out
),
3
)
self
.
assertAllEqual
(
samples_out
,
np
.
logical_and
(
samples_out
,
np_indicator
))
def
test_subsample_when_more_true_elements_than_num_samples_no_shape
(
self
):
np_indicator
=
[
True
,
False
,
True
,
False
,
True
,
True
,
False
]
indicator
=
tf
.
placeholder
(
tf
.
bool
)
feed_dict
=
{
indicator
:
np_indicator
}
samples
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator
,
3
)
with
self
.
test_session
()
as
sess
:
samples_out
=
sess
.
run
(
samples
,
feed_dict
=
feed_dict
)
self
.
assertTrue
(
np
.
sum
(
samples_out
),
3
)
self
.
assertAllEqual
(
samples_out
,
np
.
logical_and
(
samples_out
,
np_indicator
))
def
test_subsample_indicator_when_less_true_elements_than_num_samples
(
self
):
np_indicator
=
[
True
,
False
,
True
,
False
,
True
,
True
,
False
]
indicator
=
tf
.
constant
(
np_indicator
)
samples
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator
,
5
)
with
self
.
test_session
()
as
sess
:
samples_out
=
sess
.
run
(
samples
)
self
.
assertTrue
(
np
.
sum
(
samples_out
),
4
)
self
.
assertAllEqual
(
samples_out
,
np
.
logical_and
(
samples_out
,
np_indicator
))
def
test_subsample_indicator_when_num_samples_is_zero
(
self
):
np_indicator
=
[
True
,
False
,
True
,
False
,
True
,
True
,
False
]
indicator
=
tf
.
constant
(
np_indicator
)
samples_none
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator
,
0
)
with
self
.
test_session
()
as
sess
:
samples_none_out
=
sess
.
run
(
samples_none
)
self
.
assertAllEqual
(
np
.
zeros_like
(
samples_none_out
,
dtype
=
bool
),
samples_none_out
)
def
test_subsample_indicator_when_indicator_all_false
(
self
):
indicator_empty
=
tf
.
zeros
([
0
],
dtype
=
tf
.
bool
)
samples_empty
=
minibatch_sampler
.
MinibatchSampler
.
subsample_indicator
(
indicator_empty
,
4
)
with
self
.
test_session
()
as
sess
:
samples_empty_out
=
sess
.
run
(
samples_empty
)
self
.
assertEqual
(
0
,
samples_empty_out
.
size
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/model.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Abstract detection model.
This file defines a generic base class for detection models. Programs that are
designed to work with arbitrary detection models should only depend on this
class. We intend for the functions in this class to follow tensor-in/tensor-out
design, thus all functions have tensors or lists/dictionaries holding tensors as
inputs and outputs.
Abstractly, detection models predict output tensors given input images
which can be passed to a loss function at training time or passed to a
postprocessing function at eval time. The computation graphs at a high level
consequently look as follows:
Training time:
inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
Evaluation time:
inputs (images tensor) -> preprocess -> predict -> postprocess
-> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
DetectionModels must thus implement four functions (1) preprocess, (2) predict,
(3) postprocess and (4) loss. DetectionModels should make no assumptions about
the input size or aspect ratio --- they are responsible for doing any
resize/reshaping necessary (see docstring for the preprocess function).
Output classes are always integers in the range [0, num_classes). Any mapping
of these integers to semantic labels is to be handled outside of this class.
By default, DetectionModels produce bounding box detections; However, we support
a handful of auxiliary annotations associated with each bounding box, namely,
instance masks and keypoints.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
from
object_detection.core
import
standard_fields
as
fields
class
DetectionModel
(
object
):
"""Abstract base class for detection models."""
__metaclass__
=
ABCMeta
def
__init__
(
self
,
num_classes
):
"""Constructor.
Args:
num_classes: number of classes. Note that num_classes *does not* include
background categories that might be implicitly be predicted in various
implementations.
"""
self
.
_num_classes
=
num_classes
self
.
_groundtruth_lists
=
{}
@
property
def
num_classes
(
self
):
return
self
.
_num_classes
def
groundtruth_lists
(
self
,
field
):
"""Access list of groundtruth tensors.
Args:
field: a string key, options are
fields.BoxListFields.{boxes,classes,masks,keypoints}
Returns:
a list of tensors holding groundtruth information (see also
provide_groundtruth function below), with one entry for each image in the
batch.
Raises:
RuntimeError: if the field has not been provided via provide_groundtruth.
"""
if
field
not
in
self
.
_groundtruth_lists
:
raise
RuntimeError
(
'Groundtruth tensor %s has not been provided'
,
field
)
return
self
.
_groundtruth_lists
[
field
]
@
abstractmethod
def
preprocess
(
self
,
inputs
):
"""Input preprocessing.
To be overridden by implementations.
This function is responsible for any scaling/shifting of input values that
is necessary prior to running the detector on an input image.
It is also responsible for any resizing that might be necessary as images
are assumed to arrive in arbitrary sizes. While this function could
conceivably be part of the predict method (below), it is often convenient
to keep these separate --- for example, we may want to preprocess on one
device, place onto a queue, and let another device (e.g., the GPU) handle
prediction.
A few important notes about the preprocess function:
+ We assume that this operation does not have any trainable variables nor
does it affect the groundtruth annotations in any way (thus data
augmentation operations such as random cropping should be performed
externally).
+ There is no assumption that the batchsize in this function is the same as
the batch size in the predict function. In fact, we recommend calling the
preprocess function prior to calling any batching operations (which should
happen outside of the model) and thus assuming that batch sizes are equal
to 1 in the preprocess function.
+ There is also no explicit assumption that the output resolutions
must be fixed across inputs --- this is to support "fully convolutional"
settings in which input images can have different shapes/resolutions.
Args:
inputs: a [batch, height_in, width_in, channels] float32 tensor
representing a batch of images with values between 0 and 255.0.
Returns:
preprocessed_inputs: a [batch, height_out, width_out, channels] float32
tensor representing a batch of images.
"""
pass
@
abstractmethod
def
predict
(
self
,
preprocessed_inputs
):
"""Predict prediction tensors from inputs tensor.
Outputs of this function can be passed to loss or postprocess functions.
Args:
preprocessed_inputs: a [batch, height, width, channels] float32 tensor
representing a batch of images.
Returns:
prediction_dict: a dictionary holding prediction tensors to be
passed to the Loss or Postprocess functions.
"""
pass
@
abstractmethod
def
postprocess
(
self
,
prediction_dict
,
**
params
):
"""Convert predicted output tensors to final detections.
Outputs adhere to the following conventions:
* Classes are integers in [0, num_classes); background classes are removed
and the first non-background class is mapped to 0.
* Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
format and normalized relative to the image window.
* `num_detections` is provided for settings where detections are padded to a
fixed number of boxes.
* We do not specifically assume any kind of probabilistic interpretation
of the scores --- the only important thing is their relative ordering.
Thus implementations of the postprocess function are free to output
logits, probabilities, calibrated probabilities, or anything else.
Args:
prediction_dict: a dictionary holding prediction tensors.
**params: Additional keyword arguments for specific implementations of
DetectionModel.
Returns:
detections: a dictionary containing the following fields
detection_boxes: [batch, max_detections, 4]
detection_scores: [batch, max_detections]
detection_classes: [batch, max_detections]
instance_masks: [batch, max_detections, image_height, image_width]
(optional)
keypoints: [batch, max_detections, num_keypoints, 2] (optional)
num_detections: [batch]
"""
pass
@
abstractmethod
def
loss
(
self
,
prediction_dict
):
"""Compute scalar loss tensors with respect to provided groundtruth.
Calling this function requires that groundtruth tensors have been
provided via the provide_groundtruth function.
Args:
prediction_dict: a dictionary holding predicted tensors
Returns:
a dictionary mapping strings (loss names) to scalar tensors representing
loss values.
"""
pass
def
provide_groundtruth
(
self
,
groundtruth_boxes_list
,
groundtruth_classes_list
,
groundtruth_masks_list
=
None
,
groundtruth_keypoints_list
=
None
):
"""Provide groundtruth tensors.
Args:
groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
[num_boxes, 4] containing coordinates of the groundtruth boxes.
Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
format and assumed to be normalized and clipped
relative to the image window with y_min <= y_max and x_min <= x_max.
groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
tensors of shape [num_boxes, num_classes] containing the class targets
with the 0th index assumed to map to the first non-background class.
groundtruth_masks_list: a list of 2-D tf.float32 tensors of
shape [max_detections, height_in, width_in] containing instance
masks with values in {0, 1}. If None, no masks are provided.
Mask resolution `height_in`x`width_in` must agree with the resolution
of the input image tensor provided to the `preprocess` function.
groundtruth_keypoints_list: a list of 2-D tf.float32 tensors of
shape [batch, max_detections, num_keypoints, 2] containing keypoints.
Keypoints are assumed to be provided in normalized coordinates and
missing keypoints should be encoded as NaN.
"""
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
boxes
]
=
groundtruth_boxes_list
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
classes
]
=
groundtruth_classes_list
if
groundtruth_masks_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
masks
]
=
groundtruth_masks_list
if
groundtruth_keypoints_list
:
self
.
_groundtruth_lists
[
fields
.
BoxListFields
.
keypoints
]
=
groundtruth_keypoints_list
@
abstractmethod
def
restore_fn
(
self
,
checkpoint_path
,
from_detection_checkpoint
=
True
):
"""Return callable for loading a foreign checkpoint into tensorflow graph.
Loads variables from a different tensorflow graph (typically feature
extractor variables). This enables the model to initialize based on weights
from another task. For example, the feature extractor variables from a
classification model can be used to bootstrap training of an object
detector. When loading from an object detection model, the checkpoint model
should have the same parameters as this detection model with exception of
the num_classes parameter.
Args:
checkpoint_path: path to checkpoint to restore.
from_detection_checkpoint: whether to restore from a full detection
checkpoint (with compatible variable names) or to restore from a
classification checkpoint for initialization prior to training.
Returns:
a callable which takes a tf.Session as input and loads a checkpoint when
run.
"""
pass
object_detection/core/post_processing.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Post-processing operations on detected boxes."""
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
standard_fields
as
fields
def
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
,
max_total_size
=
0
,
clip_window
=
None
,
change_coordinate_frame
=
False
,
masks
=
None
,
additional_fields
=
None
,
scope
=
None
):
"""Multi-class version of non maximum suppression.
This op greedily selects a subset of detection bounding boxes, pruning
away boxes that have high IOU (intersection over union) overlap (> thresh)
with already selected boxes. It operates independently for each class for
which scores are provided (via the scores field of the input box_list),
pruning boxes with score less than a provided threshold prior to
applying NMS.
Please note that this operation is performed on *all* classes, therefore any
background classes should be removed prior to calling this function.
Args:
boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
number of classes or 1 depending on whether a separate box is predicted
per class.
scores: A [k, num_classes] float32 tensor containing the scores for each of
the k detections.
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
with previously selected boxes are removed).
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip and normalize boxes to before performing
non-max suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
containing box masks. `q` can be either number of classes or 1 depending
on whether a separate mask is predicted per class.
additional_fields: (optional) If not None, a dictionary that maps keys to
tensors whose first dimensions are all of size `k`. After non-maximum
suppression, all tensors corresponding to the selected boxes will be
added to resulting BoxList.
scope: name scope.
Returns:
a BoxList holding M boxes with a rank-1 scores field representing
corresponding scores for each box with scores sorted in decreasing order
and a rank-1 classes field representing a class label for each box.
If masks, keypoints, keypoint_heatmaps is not None, the boxlist will
contain masks, keypoints, keypoint_heatmaps corresponding to boxes.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
if
not
0
<=
iou_thresh
<=
1.0
:
raise
ValueError
(
'iou_thresh must be between 0 and 1'
)
if
scores
.
shape
.
ndims
!=
2
:
raise
ValueError
(
'scores field must be of rank 2'
)
if
scores
.
shape
[
1
].
value
is
None
:
raise
ValueError
(
'scores must have statically defined second '
'dimension'
)
if
boxes
.
shape
.
ndims
!=
3
:
raise
ValueError
(
'boxes must be of rank 3.'
)
if
not
(
boxes
.
shape
[
1
].
value
==
scores
.
shape
[
1
].
value
or
boxes
.
shape
[
1
].
value
==
1
):
raise
ValueError
(
'second dimension of boxes must be either 1 or equal '
'to the second dimension of scores'
)
if
boxes
.
shape
[
2
].
value
!=
4
:
raise
ValueError
(
'last dimension of boxes must be of size 4.'
)
if
change_coordinate_frame
and
clip_window
is
None
:
raise
ValueError
(
'if change_coordinate_frame is True, then a clip_window'
'must be specified.'
)
with
tf
.
name_scope
(
scope
,
'MultiClassNonMaxSuppression'
):
num_boxes
=
tf
.
shape
(
boxes
)[
0
]
num_scores
=
tf
.
shape
(
scores
)[
0
]
num_classes
=
scores
.
get_shape
()[
1
]
length_assert
=
tf
.
Assert
(
tf
.
equal
(
num_boxes
,
num_scores
),
[
'Incorrect scores field length: actual vs expected.'
,
num_scores
,
num_boxes
])
selected_boxes_list
=
[]
per_class_boxes_list
=
tf
.
unstack
(
boxes
,
axis
=
1
)
if
masks
is
not
None
:
per_class_masks_list
=
tf
.
unstack
(
masks
,
axis
=
1
)
boxes_ids
=
(
range
(
num_classes
)
if
len
(
per_class_boxes_list
)
>
1
else
[
0
]
*
num_classes
)
for
class_idx
,
boxes_idx
in
zip
(
range
(
num_classes
),
boxes_ids
):
per_class_boxes
=
per_class_boxes_list
[
boxes_idx
]
boxlist_and_class_scores
=
box_list
.
BoxList
(
per_class_boxes
)
with
tf
.
control_dependencies
([
length_assert
]):
class_scores
=
tf
.
reshape
(
tf
.
slice
(
scores
,
[
0
,
class_idx
],
tf
.
stack
([
num_scores
,
1
])),
[
-
1
])
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
scores
,
class_scores
)
if
masks
is
not
None
:
per_class_masks
=
per_class_masks_list
[
boxes_idx
]
boxlist_and_class_scores
.
add_field
(
fields
.
BoxListFields
.
masks
,
per_class_masks
)
if
additional_fields
is
not
None
:
for
key
,
tensor
in
additional_fields
.
items
():
boxlist_and_class_scores
.
add_field
(
key
,
tensor
)
boxlist_filtered
=
box_list_ops
.
filter_greater_than
(
boxlist_and_class_scores
,
score_thresh
)
if
clip_window
is
not
None
:
boxlist_filtered
=
box_list_ops
.
clip_to_window
(
boxlist_filtered
,
clip_window
)
if
change_coordinate_frame
:
boxlist_filtered
=
box_list_ops
.
change_coordinate_frame
(
boxlist_filtered
,
clip_window
)
max_selection_size
=
tf
.
minimum
(
max_size_per_class
,
boxlist_filtered
.
num_boxes
())
selected_indices
=
tf
.
image
.
non_max_suppression
(
boxlist_filtered
.
get
(),
boxlist_filtered
.
get_field
(
fields
.
BoxListFields
.
scores
),
max_selection_size
,
iou_threshold
=
iou_thresh
)
nms_result
=
box_list_ops
.
gather
(
boxlist_filtered
,
selected_indices
)
nms_result
.
add_field
(
fields
.
BoxListFields
.
classes
,
(
tf
.
zeros_like
(
nms_result
.
get_field
(
fields
.
BoxListFields
.
scores
))
+
class_idx
))
selected_boxes_list
.
append
(
nms_result
)
selected_boxes
=
box_list_ops
.
concatenate
(
selected_boxes_list
)
sorted_boxes
=
box_list_ops
.
sort_by_field
(
selected_boxes
,
fields
.
BoxListFields
.
scores
)
if
max_total_size
:
max_total_size
=
tf
.
minimum
(
max_total_size
,
sorted_boxes
.
num_boxes
())
sorted_boxes
=
box_list_ops
.
gather
(
sorted_boxes
,
tf
.
range
(
max_total_size
))
return
sorted_boxes
def
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
,
max_total_size
=
0
,
clip_window
=
None
,
change_coordinate_frame
=
False
,
num_valid_boxes
=
None
,
masks
=
None
,
scope
=
None
):
"""Multi-class version of non maximum suppression that operates on a batch.
This op is similar to `multiclass_non_max_suppression` but operates on a batch
of boxes and scores. See documentation for `multiclass_non_max_suppression`
for details.
Args:
boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
detections. If `q` is 1 then same boxes are used for all classes
otherwise, if `q` is equal to number of classes, class-specific boxes
are used.
scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
the scores for each of the `num_anchors` detections.
score_thresh: scalar threshold for score (low scoring boxes are removed).
iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
with previously selected boxes are removed).
max_size_per_class: maximum number of retained boxes per class.
max_total_size: maximum number of boxes retained over all classes. By
default returns all boxes retained after capping boxes per class.
clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
representing the window to clip boxes to before performing non-max
suppression.
change_coordinate_frame: Whether to normalize coordinates after clipping
relative to clip_window (this can only be set to True if a clip_window
is provided)
num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
[batch_size] representing the number of valid boxes to be considered
for each image in the batch. This parameter allows for ignoring zero
paddings.
masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
float32 tensor containing box masks. `q` can be either number of classes
or 1 depending on whether a separate mask is predicted per class.
scope: tf scope name.
Returns:
A dictionary containing the following entries:
'detection_boxes': A [batch_size, max_detections, 4] float32 tensor
containing the non-max suppressed boxes.
'detection_scores': A [bath_size, max_detections] float32 tensor containing
the scores for the boxes.
'detection_classes': A [batch_size, max_detections] float32 tensor
containing the class for boxes.
'num_detections': A [batchsize] float32 tensor indicating the number of
valid detections per batch item. Only the top num_detections[i] entries in
nms_boxes[i], nms_scores[i] and nms_class[i] are valid. the rest of the
entries are zero paddings.
'detection_masks': (optional) a
[batch_size, max_detections, mask_height, mask_width] float32 tensor
containing masks for each selected box.
Raises:
ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
a valid scores field.
"""
q
=
boxes
.
shape
[
2
].
value
num_classes
=
scores
.
shape
[
2
].
value
if
q
!=
1
and
q
!=
num_classes
:
raise
ValueError
(
'third dimension of boxes must be either 1 or equal '
'to the third dimension of scores'
)
with
tf
.
name_scope
(
scope
,
'BatchMultiClassNonMaxSuppression'
):
per_image_boxes_list
=
tf
.
unstack
(
boxes
)
per_image_scores_list
=
tf
.
unstack
(
scores
)
num_valid_boxes_list
=
len
(
per_image_boxes_list
)
*
[
None
]
per_image_masks_list
=
len
(
per_image_boxes_list
)
*
[
None
]
if
num_valid_boxes
is
not
None
:
num_valid_boxes_list
=
tf
.
unstack
(
num_valid_boxes
)
if
masks
is
not
None
:
per_image_masks_list
=
tf
.
unstack
(
masks
)
detection_boxes_list
=
[]
detection_scores_list
=
[]
detection_classes_list
=
[]
num_detections_list
=
[]
detection_masks_list
=
[]
for
(
per_image_boxes
,
per_image_scores
,
per_image_masks
,
num_valid_boxes
)
in
zip
(
per_image_boxes_list
,
per_image_scores_list
,
per_image_masks_list
,
num_valid_boxes_list
):
if
num_valid_boxes
is
not
None
:
per_image_boxes
=
tf
.
reshape
(
tf
.
slice
(
per_image_boxes
,
3
*
[
0
],
tf
.
stack
([
num_valid_boxes
,
-
1
,
-
1
])),
[
-
1
,
q
,
4
])
per_image_scores
=
tf
.
reshape
(
tf
.
slice
(
per_image_scores
,
[
0
,
0
],
tf
.
stack
([
num_valid_boxes
,
-
1
])),
[
-
1
,
num_classes
])
if
masks
is
not
None
:
per_image_masks
=
tf
.
reshape
(
tf
.
slice
(
per_image_masks
,
4
*
[
0
],
tf
.
stack
([
num_valid_boxes
,
-
1
,
-
1
,
-
1
])),
[
-
1
,
q
,
masks
.
shape
[
3
].
value
,
masks
.
shape
[
4
].
value
])
nmsed_boxlist
=
multiclass_non_max_suppression
(
per_image_boxes
,
per_image_scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
,
max_total_size
,
masks
=
per_image_masks
,
clip_window
=
clip_window
,
change_coordinate_frame
=
change_coordinate_frame
)
num_detections_list
.
append
(
tf
.
to_float
(
nmsed_boxlist
.
num_boxes
()))
padded_boxlist
=
box_list_ops
.
pad_or_clip_box_list
(
nmsed_boxlist
,
max_total_size
)
detection_boxes_list
.
append
(
padded_boxlist
.
get
())
detection_scores_list
.
append
(
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
scores
))
detection_classes_list
.
append
(
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
classes
))
if
masks
is
not
None
:
detection_masks_list
.
append
(
padded_boxlist
.
get_field
(
fields
.
BoxListFields
.
masks
))
nms_dict
=
{
'detection_boxes'
:
tf
.
stack
(
detection_boxes_list
),
'detection_scores'
:
tf
.
stack
(
detection_scores_list
),
'detection_classes'
:
tf
.
stack
(
detection_classes_list
),
'num_detections'
:
tf
.
stack
(
num_detections_list
)
}
if
masks
is
not
None
:
nms_dict
[
'detection_masks'
]
=
tf
.
stack
(
detection_masks_list
)
return
nms_dict
object_detection/core/post_processing_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for tensorflow_models.object_detection.core.post_processing."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.core
import
post_processing
from
object_detection.core
import
standard_fields
as
fields
class
MulticlassNonMaxSuppressionTest
(
tf
.
test
.
TestCase
):
def
test_with_invalid_scores_size
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
],
[.
75
],
[.
6
],
[.
95
],
[.
5
]])
iou_thresh
=
.
5
score_thresh
=
0.6
max_output_size
=
3
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
with
self
.
assertRaisesWithPredicateMatch
(
tf
.
errors
.
InvalidArgumentError
,
'Incorrect scores field length'
):
sess
.
run
(
nms
.
get
())
def
test_multiclass_nms_select_with_shared_boxes
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_select_with_shared_boxes_given_keypoints
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
num_keypoints
=
6
keypoints
=
tf
.
tile
(
tf
.
reshape
(
tf
.
range
(
8
),
[
8
,
1
,
1
]),
[
1
,
num_keypoints
,
2
])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
exp_nms_keypoints_tensor
=
tf
.
tile
(
tf
.
reshape
(
tf
.
constant
([
3
,
0
,
6
,
5
],
dtype
=
tf
.
float32
),
[
4
,
1
,
1
]),
[
1
,
num_keypoints
,
2
])
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
additional_fields
=
{
fields
.
BoxListFields
.
keypoints
:
keypoints
})
with
self
.
test_session
()
as
sess
:
(
nms_corners_output
,
nms_scores_output
,
nms_classes_output
,
nms_keypoints
,
exp_nms_keypoints
)
=
sess
.
run
([
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
),
nms
.
get_field
(
fields
.
BoxListFields
.
keypoints
),
exp_nms_keypoints_tensor
])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
self
.
assertAllEqual
(
nms_keypoints
,
exp_nms_keypoints
)
def
test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
num_boxes
=
tf
.
shape
(
boxes
)[
0
]
heatmap_height
=
5
heatmap_width
=
5
num_keypoints
=
17
keypoint_heatmaps
=
tf
.
ones
(
[
num_boxes
,
heatmap_height
,
heatmap_width
,
num_keypoints
],
dtype
=
tf
.
float32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
exp_nms_keypoint_heatmaps
=
np
.
ones
(
(
4
,
heatmap_height
,
heatmap_width
,
num_keypoints
),
dtype
=
np
.
float32
)
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
additional_fields
=
{
fields
.
BoxListFields
.
keypoint_heatmaps
:
keypoint_heatmaps
})
with
self
.
test_session
()
as
sess
:
(
nms_corners_output
,
nms_scores_output
,
nms_classes_output
,
nms_keypoint_heatmaps
)
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
),
nms
.
get_field
(
fields
.
BoxListFields
.
keypoint_heatmaps
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
self
.
assertAllEqual
(
nms_keypoint_heatmaps
,
exp_nms_keypoint_heatmaps
)
def
test_multiclass_nms_with_additional_fields
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
coarse_boxes_key
=
'coarse_boxes'
coarse_boxes
=
tf
.
constant
([[
0.1
,
0.1
,
1.1
,
1.1
],
[
0.1
,
0.2
,
1.1
,
1.2
],
[
0.1
,
-
0.2
,
1.1
,
1.0
],
[
0.1
,
10.1
,
1.1
,
11.1
],
[
0.1
,
10.2
,
1.1
,
11.2
],
[
0.1
,
100.1
,
1.1
,
101.1
],
[
0.1
,
1000.1
,
1.1
,
1002.1
],
[
0.1
,
1000.1
,
1.1
,
1002.2
]],
tf
.
float32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
np
.
array
([[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]],
dtype
=
np
.
float32
)
exp_nms_coarse_corners
=
np
.
array
([[
0.1
,
10.1
,
1.1
,
11.1
],
[
0.1
,
0.1
,
1.1
,
1.1
],
[
0.1
,
1000.1
,
1.1
,
1002.1
],
[
0.1
,
100.1
,
1.1
,
101.1
]],
dtype
=
np
.
float32
)
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
additional_fields
=
{
coarse_boxes_key
:
coarse_boxes
})
with
self
.
test_session
()
as
sess
:
(
nms_corners_output
,
nms_scores_output
,
nms_classes_output
,
nms_coarse_corners
)
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
),
nms
.
get_field
(
coarse_boxes_key
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
self
.
assertAllEqual
(
nms_coarse_corners
,
exp_nms_coarse_corners
)
def
test_multiclass_nms_select_with_shared_boxes_given_masks
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
num_classes
=
2
mask_height
=
3
mask_width
=
3
masks
=
tf
.
tile
(
tf
.
reshape
(
tf
.
range
(
8
),
[
8
,
1
,
1
,
1
]),
[
1
,
num_classes
,
mask_height
,
mask_width
])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
exp_nms_masks_tensor
=
tf
.
tile
(
tf
.
reshape
(
tf
.
constant
([
3
,
0
,
6
,
5
],
dtype
=
tf
.
float32
),
[
4
,
1
,
1
]),
[
1
,
mask_height
,
mask_width
])
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
masks
=
masks
)
with
self
.
test_session
()
as
sess
:
(
nms_corners_output
,
nms_scores_output
,
nms_classes_output
,
nms_masks
,
exp_nms_masks
)
=
sess
.
run
([
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
),
nms
.
get_field
(
fields
.
BoxListFields
.
masks
),
exp_nms_masks_tensor
])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
self
.
assertAllEqual
(
nms_masks
,
exp_nms_masks
)
def
test_multiclass_nms_select_with_clip_window
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
10
,
10
]],
[[
1
,
1
,
11
,
11
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
],
[.
75
]])
clip_window
=
tf
.
constant
([
5
,
4
,
8
,
7
],
tf
.
float32
)
score_thresh
=
0.0
iou_thresh
=
0.5
max_output_size
=
100
exp_nms_corners
=
[[
5
,
4
,
8
,
7
]]
exp_nms_scores
=
[.
9
]
exp_nms_classes
=
[
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
clip_window
=
clip_window
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_select_with_clip_window_change_coordinate_frame
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
10
,
10
]],
[[
1
,
1
,
11
,
11
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
],
[.
75
]])
clip_window
=
tf
.
constant
([
5
,
4
,
8
,
7
],
tf
.
float32
)
score_thresh
=
0.0
iou_thresh
=
0.5
max_output_size
=
100
exp_nms_corners
=
[[
0
,
0
,
1
,
1
]]
exp_nms_scores
=
[.
9
]
exp_nms_classes
=
[
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
,
clip_window
=
clip_window
,
change_coordinate_frame
=
True
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_select_with_per_class_cap
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_size_per_class
=
2
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
1000
,
1
,
1002
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
]
exp_nms_classes
=
[
0
,
0
,
1
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_select_with_total_cap
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_size_per_class
=
4
max_total_size
=
2
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
]]
exp_nms_scores
=
[.
95
,
.
9
]
exp_nms_classes
=
[
0
,
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
,
max_total_size
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_multiclass_nms_threshold_then_select_with_shared_boxes
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
]],
[[
0
,
0.1
,
1
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
]],
[[
0
,
1000
,
1
,
1002.1
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
],
[.
75
],
[.
6
],
[.
95
],
[.
5
],
[.
3
],
[.
01
],
[.
01
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
3
exp_nms
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
100
,
1
,
101
]]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms
.
get
())
self
.
assertAllClose
(
nms_output
,
exp_nms
)
def
test_multiclass_nms_select_with_separate_boxes
(
self
):
boxes
=
tf
.
constant
([[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]],
tf
.
float32
)
scores
=
tf
.
constant
([[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
999
,
2
,
1004
],
[
0
,
100
,
1
,
101
]]
exp_nms_scores
=
[.
95
,
.
9
,
.
85
,
.
3
]
exp_nms_classes
=
[
0
,
0
,
1
,
0
]
nms
=
post_processing
.
multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_corners_output
,
nms_scores_output
,
nms_classes_output
=
sess
.
run
(
[
nms
.
get
(),
nms
.
get_field
(
fields
.
BoxListFields
.
scores
),
nms
.
get_field
(
fields
.
BoxListFields
.
classes
)])
self
.
assertAllClose
(
nms_corners_output
,
exp_nms_corners
)
self
.
assertAllClose
(
nms_scores_output
,
exp_nms_scores
)
self
.
assertAllClose
(
nms_classes_output
,
exp_nms_classes
)
def
test_batch_multiclass_nms_with_batch_size_1
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
],
[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
999
,
2
,
1004
],
[
0
,
100
,
1
,
101
]]]
exp_nms_scores
=
[[.
95
,
.
9
,
.
85
,
.
3
]]
exp_nms_classes
=
[[
0
,
0
,
1
,
0
]]
nms_dict
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms_dict
)
self
.
assertAllClose
(
nms_output
[
'detection_boxes'
],
exp_nms_corners
)
self
.
assertAllClose
(
nms_output
[
'detection_scores'
],
exp_nms_scores
)
self
.
assertAllClose
(
nms_output
[
'detection_classes'
],
exp_nms_classes
)
self
.
assertEqual
(
nms_output
[
'num_detections'
],
[
4
])
def
test_batch_multiclass_nms_with_batch_size_2
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
999
,
2
,
1004
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
]]]
exp_nms_scores
=
[[.
95
,
.
9
,
0
,
0
],
[.
85
,
.
5
,
.
3
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]]
nms_dict
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms_dict
)
self
.
assertAllClose
(
nms_output
[
'detection_boxes'
],
exp_nms_corners
)
self
.
assertAllClose
(
nms_output
[
'detection_scores'
],
exp_nms_scores
)
self
.
assertAllClose
(
nms_output
[
'detection_classes'
],
exp_nms_classes
)
self
.
assertAllClose
(
nms_output
[
'num_detections'
],
[
2
,
3
])
def
test_batch_multiclass_nms_with_masks
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
masks
=
tf
.
constant
([[[[[
0
,
1
],
[
2
,
3
]],
[[
1
,
2
],
[
3
,
4
]]],
[[[
2
,
3
],
[
4
,
5
]],
[[
3
,
4
],
[
5
,
6
]]],
[[[
4
,
5
],
[
6
,
7
]],
[[
5
,
6
],
[
7
,
8
]]],
[[[
6
,
7
],
[
8
,
9
]],
[[
7
,
8
],
[
9
,
10
]]]],
[[[[
8
,
9
],
[
10
,
11
]],
[[
9
,
10
],
[
11
,
12
]]],
[[[
10
,
11
],
[
12
,
13
]],
[[
11
,
12
],
[
13
,
14
]]],
[[[
12
,
13
],
[
14
,
15
]],
[[
13
,
14
],
[
15
,
16
]]],
[[[
14
,
15
],
[
16
,
17
]],
[[
15
,
16
],
[
17
,
18
]]]]],
tf
.
float32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
10
,
1
,
11
],
[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
999
,
2
,
1004
],
[
0
,
10.1
,
1
,
11.1
],
[
0
,
100
,
1
,
101
],
[
0
,
0
,
0
,
0
]]]
exp_nms_scores
=
[[.
95
,
.
9
,
0
,
0
],
[.
85
,
.
5
,
.
3
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]]
exp_nms_masks
=
[[[[
6
,
7
],
[
8
,
9
]],
[[
0
,
1
],
[
2
,
3
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
13
,
14
],
[
15
,
16
]],
[[
8
,
9
],
[
10
,
11
]],
[[
10
,
11
],
[
12
,
13
]],
[[
0
,
0
],
[
0
,
0
]]]]
nms_dict
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
masks
=
masks
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms_dict
)
self
.
assertAllClose
(
nms_output
[
'detection_boxes'
],
exp_nms_corners
)
self
.
assertAllClose
(
nms_output
[
'detection_scores'
],
exp_nms_scores
)
self
.
assertAllClose
(
nms_output
[
'detection_classes'
],
exp_nms_classes
)
self
.
assertAllClose
(
nms_output
[
'num_detections'
],
[
2
,
3
])
self
.
assertAllClose
(
nms_output
[
'detection_masks'
],
exp_nms_masks
)
def
test_batch_multiclass_nms_with_masks_and_num_valid_boxes
(
self
):
boxes
=
tf
.
constant
([[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
4
,
5
]],
[[
0
,
0.1
,
1
,
1.1
],
[
0
,
0.1
,
2
,
1.1
]],
[[
0
,
-
0.1
,
1
,
0.9
],
[
0
,
-
0.1
,
1
,
0.9
]],
[[
0
,
10
,
1
,
11
],
[
0
,
10
,
1
,
11
]]],
[[[
0
,
10.1
,
1
,
11.1
],
[
0
,
10.1
,
1
,
11.1
]],
[[
0
,
100
,
1
,
101
],
[
0
,
100
,
1
,
101
]],
[[
0
,
1000
,
1
,
1002
],
[
0
,
999
,
2
,
1004
]],
[[
0
,
1000
,
1
,
1002.1
],
[
0
,
999
,
2
,
1002.7
]]]],
tf
.
float32
)
scores
=
tf
.
constant
([[[.
9
,
0.01
],
[.
75
,
0.05
],
[.
6
,
0.01
],
[.
95
,
0
]],
[[.
5
,
0.01
],
[.
3
,
0.01
],
[.
01
,
.
85
],
[.
01
,
.
5
]]])
masks
=
tf
.
constant
([[[[[
0
,
1
],
[
2
,
3
]],
[[
1
,
2
],
[
3
,
4
]]],
[[[
2
,
3
],
[
4
,
5
]],
[[
3
,
4
],
[
5
,
6
]]],
[[[
4
,
5
],
[
6
,
7
]],
[[
5
,
6
],
[
7
,
8
]]],
[[[
6
,
7
],
[
8
,
9
]],
[[
7
,
8
],
[
9
,
10
]]]],
[[[[
8
,
9
],
[
10
,
11
]],
[[
9
,
10
],
[
11
,
12
]]],
[[[
10
,
11
],
[
12
,
13
]],
[[
11
,
12
],
[
13
,
14
]]],
[[[
12
,
13
],
[
14
,
15
]],
[[
13
,
14
],
[
15
,
16
]]],
[[[
14
,
15
],
[
16
,
17
]],
[[
15
,
16
],
[
17
,
18
]]]]],
tf
.
float32
)
num_valid_boxes
=
tf
.
constant
([
1
,
1
],
tf
.
int32
)
score_thresh
=
0.1
iou_thresh
=
.
5
max_output_size
=
4
exp_nms_corners
=
[[[
0
,
0
,
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
10.1
,
1
,
11.1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]]
exp_nms_scores
=
[[.
9
,
0
,
0
,
0
],
[.
5
,
0
,
0
,
0
]]
exp_nms_classes
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
exp_nms_masks
=
[[[[
0
,
1
],
[
2
,
3
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]],
[[[
8
,
9
],
[
10
,
11
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
0
],
[
0
,
0
]]]]
nms_dict
=
post_processing
.
batch_multiclass_non_max_suppression
(
boxes
,
scores
,
score_thresh
,
iou_thresh
,
max_size_per_class
=
max_output_size
,
max_total_size
=
max_output_size
,
num_valid_boxes
=
num_valid_boxes
,
masks
=
masks
)
with
self
.
test_session
()
as
sess
:
nms_output
=
sess
.
run
(
nms_dict
)
self
.
assertAllClose
(
nms_output
[
'detection_boxes'
],
exp_nms_corners
)
self
.
assertAllClose
(
nms_output
[
'detection_scores'
],
exp_nms_scores
)
self
.
assertAllClose
(
nms_output
[
'detection_classes'
],
exp_nms_classes
)
self
.
assertAllClose
(
nms_output
[
'num_detections'
],
[
1
,
1
])
self
.
assertAllClose
(
nms_output
[
'detection_masks'
],
exp_nms_masks
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/prefetcher.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Provides functions to prefetch tensors to feed into models."""
import
tensorflow
as
tf
def
prefetch
(
tensor_dict
,
capacity
):
"""Creates a prefetch queue for tensors.
Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
dequeue op that evaluates to a tensor_dict. This function is useful in
prefetching preprocessed tensors so that the data is readily available for
consumers.
Example input pipeline when you don't need batching:
----------------------------------------------------
key, string_tensor = slim.parallel_reader.parallel_read(...)
tensor_dict = decoder.decode(string_tensor)
tensor_dict = preprocessor.preprocess(tensor_dict, ...)
prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
tensor_dict = prefetch_queue.dequeue()
outputs = Model(tensor_dict)
...
----------------------------------------------------
For input pipelines with batching, refer to core/batcher.py
Args:
tensor_dict: a dictionary of tensors to prefetch.
capacity: the size of the prefetch queue.
Returns:
a FIFO prefetcher queue
"""
names
=
list
(
tensor_dict
.
keys
())
dtypes
=
[
t
.
dtype
for
t
in
tensor_dict
.
values
()]
shapes
=
[
t
.
get_shape
()
for
t
in
tensor_dict
.
values
()]
prefetch_queue
=
tf
.
PaddingFIFOQueue
(
capacity
,
dtypes
=
dtypes
,
shapes
=
shapes
,
names
=
names
,
name
=
'prefetch_queue'
)
enqueue_op
=
prefetch_queue
.
enqueue
(
tensor_dict
)
tf
.
train
.
queue_runner
.
add_queue_runner
(
tf
.
train
.
queue_runner
.
QueueRunner
(
prefetch_queue
,
[
enqueue_op
]))
tf
.
summary
.
scalar
(
'queue/%s/fraction_of_%d_full'
%
(
prefetch_queue
.
name
,
capacity
),
tf
.
to_float
(
prefetch_queue
.
size
())
*
(
1.
/
capacity
))
return
prefetch_queue
object_detection/core/prefetcher_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.prefetcher."""
import
tensorflow
as
tf
from
object_detection.core
import
prefetcher
slim
=
tf
.
contrib
.
slim
class
PrefetcherTest
(
tf
.
test
.
TestCase
):
def
test_prefetch_tensors_with_fully_defined_shapes
(
self
):
with
self
.
test_session
()
as
sess
:
batch_size
=
10
image_size
=
32
num_batches
=
5
examples
=
tf
.
Variable
(
tf
.
constant
(
0
,
dtype
=
tf
.
int64
))
counter
=
examples
.
count_up_to
(
num_batches
)
image
=
tf
.
random_normal
([
batch_size
,
image_size
,
image_size
,
3
],
dtype
=
tf
.
float32
,
name
=
'images'
)
label
=
tf
.
random_uniform
([
batch_size
,
1
],
0
,
10
,
dtype
=
tf
.
int32
,
name
=
'labels'
)
prefetch_queue
=
prefetcher
.
prefetch
(
tensor_dict
=
{
'counter'
:
counter
,
'image'
:
image
,
'label'
:
label
},
capacity
=
100
)
tensor_dict
=
prefetch_queue
.
dequeue
()
self
.
assertAllEqual
(
tensor_dict
[
'image'
].
get_shape
().
as_list
(),
[
batch_size
,
image_size
,
image_size
,
3
])
self
.
assertAllEqual
(
tensor_dict
[
'label'
].
get_shape
().
as_list
(),
[
batch_size
,
1
])
tf
.
initialize_all_variables
().
run
()
with
slim
.
queues
.
QueueRunners
(
sess
):
for
_
in
range
(
num_batches
):
results
=
sess
.
run
(
tensor_dict
)
self
.
assertEquals
(
results
[
'image'
].
shape
,
(
batch_size
,
image_size
,
image_size
,
3
))
self
.
assertEquals
(
results
[
'label'
].
shape
,
(
batch_size
,
1
))
with
self
.
assertRaises
(
tf
.
errors
.
OutOfRangeError
):
sess
.
run
(
tensor_dict
)
def
test_prefetch_tensors_with_partially_defined_shapes
(
self
):
with
self
.
test_session
()
as
sess
:
batch_size
=
10
image_size
=
32
num_batches
=
5
examples
=
tf
.
Variable
(
tf
.
constant
(
0
,
dtype
=
tf
.
int64
))
counter
=
examples
.
count_up_to
(
num_batches
)
image
=
tf
.
random_normal
([
batch_size
,
tf
.
Variable
(
image_size
),
tf
.
Variable
(
image_size
),
3
],
dtype
=
tf
.
float32
,
name
=
'image'
)
image
.
set_shape
([
batch_size
,
None
,
None
,
3
])
label
=
tf
.
random_uniform
([
batch_size
,
tf
.
Variable
(
1
)],
0
,
10
,
dtype
=
tf
.
int32
,
name
=
'label'
)
label
.
set_shape
([
batch_size
,
None
])
prefetch_queue
=
prefetcher
.
prefetch
(
tensor_dict
=
{
'counter'
:
counter
,
'image'
:
image
,
'label'
:
label
},
capacity
=
100
)
tensor_dict
=
prefetch_queue
.
dequeue
()
self
.
assertAllEqual
(
tensor_dict
[
'image'
].
get_shape
().
as_list
(),
[
batch_size
,
None
,
None
,
3
])
self
.
assertAllEqual
(
tensor_dict
[
'label'
].
get_shape
().
as_list
(),
[
batch_size
,
None
])
tf
.
initialize_all_variables
().
run
()
with
slim
.
queues
.
QueueRunners
(
sess
):
for
_
in
range
(
num_batches
):
results
=
sess
.
run
(
tensor_dict
)
self
.
assertEquals
(
results
[
'image'
].
shape
,
(
batch_size
,
image_size
,
image_size
,
3
))
self
.
assertEquals
(
results
[
'label'
].
shape
,
(
batch_size
,
1
))
with
self
.
assertRaises
(
tf
.
errors
.
OutOfRangeError
):
sess
.
run
(
tensor_dict
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/preprocessor.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocess images and bounding boxes for detection.
We perform two sets of operations in preprocessing stage:
(a) operations that are applied to both training and testing data,
(b) operations that are applied only to training data for the purpose of
data augmentation.
A preprocessing function receives a set of inputs,
e.g. an image and bounding boxes,
performs an operation on them, and returns them.
Some examples are: randomly cropping the image, randomly mirroring the image,
randomly changing the brightness, contrast, hue and
randomly jittering the bounding boxes.
The preprocess function receives a tensor_dict which is a dictionary that maps
different field names to their tensors. For example,
tensor_dict[fields.InputDataFields.image] holds the image tensor.
The image is a rank 4 tensor: [1, height, width, channels] with
dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1]
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then
we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4.
"""
import
sys
import
tensorflow
as
tf
from
tensorflow.python.ops
import
control_flow_ops
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
keypoint_ops
from
object_detection.core
import
standard_fields
as
fields
def
_apply_with_random_selector
(
x
,
func
,
num_cases
):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
Args:
x: input Tensor.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
rand_sel
=
tf
.
random_uniform
([],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
# Pass the real x only to one of the func calls.
return
control_flow_ops
.
merge
([
func
(
control_flow_ops
.
switch
(
x
,
tf
.
equal
(
rand_sel
,
case
))[
1
],
case
)
for
case
in
range
(
num_cases
)])[
0
]
def
_apply_with_random_selector_tuples
(
x
,
func
,
num_cases
):
"""Computes func(x, sel), with sel sampled from [0...num_cases-1].
Args:
x: A tuple of input tensors.
func: Python function to apply.
num_cases: Python int32, number of cases to sample sel from.
Returns:
The result of func(x, sel), where func receives the value of the
selector as a python integer, but sel is sampled dynamically.
"""
num_inputs
=
len
(
x
)
rand_sel
=
tf
.
random_uniform
([],
maxval
=
num_cases
,
dtype
=
tf
.
int32
)
# Pass the real x only to one of the func calls.
tuples
=
[
list
()
for
t
in
x
]
for
case
in
range
(
num_cases
):
new_x
=
[
control_flow_ops
.
switch
(
t
,
tf
.
equal
(
rand_sel
,
case
))[
1
]
for
t
in
x
]
output
=
func
(
tuple
(
new_x
),
case
)
for
j
in
range
(
num_inputs
):
tuples
[
j
].
append
(
output
[
j
])
for
i
in
range
(
num_inputs
):
tuples
[
i
]
=
control_flow_ops
.
merge
(
tuples
[
i
])[
0
]
return
tuple
(
tuples
)
def
_random_integer
(
minval
,
maxval
,
seed
):
"""Returns a random 0-D tensor between minval and maxval.
Args:
minval: minimum value of the random tensor.
maxval: maximum value of the random tensor.
seed: random seed.
Returns:
A random 0-D tensor between minval and maxval.
"""
return
tf
.
random_uniform
(
[],
minval
=
minval
,
maxval
=
maxval
,
dtype
=
tf
.
int32
,
seed
=
seed
)
def
normalize_image
(
image
,
original_minval
,
original_maxval
,
target_minval
,
target_maxval
):
"""Normalizes pixel values in the image.
Moves the pixel values from the current [original_minval, original_maxval]
range to a the [target_minval, target_maxval] range.
Args:
image: rank 3 float32 tensor containing 1
image -> [height, width, channels].
original_minval: current image minimum value.
original_maxval: current image maximum value.
target_minval: target image minimum value.
target_maxval: target image maximum value.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'NormalizeImage'
,
values
=
[
image
]):
original_minval
=
float
(
original_minval
)
original_maxval
=
float
(
original_maxval
)
target_minval
=
float
(
target_minval
)
target_maxval
=
float
(
target_maxval
)
image
=
tf
.
to_float
(
image
)
image
=
tf
.
subtract
(
image
,
original_minval
)
image
=
tf
.
multiply
(
image
,
(
target_maxval
-
target_minval
)
/
(
original_maxval
-
original_minval
))
image
=
tf
.
add
(
image
,
target_minval
)
return
image
def
flip_boxes
(
boxes
):
"""Left-right flip the boxes.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
"""
# Flip boxes.
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
1
)
flipped_xmin
=
tf
.
subtract
(
1.0
,
xmax
)
flipped_xmax
=
tf
.
subtract
(
1.0
,
xmin
)
flipped_boxes
=
tf
.
concat
([
ymin
,
flipped_xmin
,
ymax
,
flipped_xmax
],
1
)
return
flipped_boxes
def
retain_boxes_above_threshold
(
boxes
,
labels
,
label_scores
,
masks
=
None
,
keypoints
=
None
,
threshold
=
0.0
):
"""Retains boxes whose label score is above a given threshold.
If the label score for a box is missing (represented by NaN), the box is
retained. The boxes that don't pass the threshold will not appear in the
returned tensor.
Args:
boxes: float32 tensor of shape [num_instance, 4] representing boxes
location in normalized coordinates.
labels: rank 1 int32 tensor of shape [num_instance] containing the object
classes.
label_scores: float32 tensor of shape [num_instance] representing the
score for each box.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks are of
the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
threshold: scalar python float.
Returns:
retained_boxes: [num_retained_instance, 4]
retianed_labels: [num_retained_instance]
retained_label_scores: [num_retained_instance]
If masks, or keypoints are not None, the function also returns:
retained_masks: [num_retained_instance, height, width]
retained_keypoints: [num_retained_instance, num_keypoints, 2]
"""
with
tf
.
name_scope
(
'RetainBoxesAboveThreshold'
,
values
=
[
boxes
,
labels
,
label_scores
]):
indices
=
tf
.
where
(
tf
.
logical_or
(
label_scores
>
threshold
,
tf
.
is_nan
(
label_scores
)))
indices
=
tf
.
squeeze
(
indices
,
axis
=
1
)
retained_boxes
=
tf
.
gather
(
boxes
,
indices
)
retained_labels
=
tf
.
gather
(
labels
,
indices
)
retained_label_scores
=
tf
.
gather
(
label_scores
,
indices
)
result
=
[
retained_boxes
,
retained_labels
,
retained_label_scores
]
if
masks
is
not
None
:
retained_masks
=
tf
.
gather
(
masks
,
indices
)
result
.
append
(
retained_masks
)
if
keypoints
is
not
None
:
retained_keypoints
=
tf
.
gather
(
keypoints
,
indices
)
result
.
append
(
retained_keypoints
)
return
result
def
_flip_masks
(
masks
):
"""Left-right flips masks.
Args:
masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
Returns:
flipped masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
"""
return
masks
[:,
:,
::
-
1
]
def
random_horizontal_flip
(
image
,
boxes
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoint_flip_permutation
=
None
,
seed
=
None
):
"""Randomly decides whether to mirror the image and detections or not.
The probability of flipping the image is 50%.
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
containing the bounding boxes.
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing keypoint flip
permutation.
seed: random seed
Returns:
image: image which is the same shape as input image.
If boxes, masks, keypoints, and keypoint_flip_permutation is not None,
the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
"""
def
_flip_image
(
image
):
# flip image
image_flipped
=
tf
.
image
.
flip_left_right
(
image
)
return
image_flipped
if
keypoints
is
not
None
and
keypoint_flip_permutation
is
None
:
raise
ValueError
(
'keypoints are provided but keypoints_flip_permutation is not provided'
)
with
tf
.
name_scope
(
'RandomHorizontalFlip'
,
values
=
[
image
,
boxes
]):
result
=
[]
# random variable defining whether to do flip or not
do_a_flip_random
=
tf
.
random_uniform
([],
seed
=
seed
)
# flip only if there are bounding boxes in image!
do_a_flip_random
=
tf
.
logical_and
(
tf
.
greater
(
tf
.
size
(
boxes
),
0
),
tf
.
greater
(
do_a_flip_random
,
0.5
))
# flip image
image
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_image
(
image
),
lambda
:
image
)
result
.
append
(
image
)
# flip boxes
if
boxes
is
not
None
:
boxes
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
flip_boxes
(
boxes
),
lambda
:
boxes
)
result
.
append
(
boxes
)
# flip masks
if
masks
is
not
None
:
masks
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
_flip_masks
(
masks
),
lambda
:
masks
)
result
.
append
(
masks
)
# flip keypoints
if
keypoints
is
not
None
and
keypoint_flip_permutation
is
not
None
:
permutation
=
keypoint_flip_permutation
keypoints
=
tf
.
cond
(
do_a_flip_random
,
lambda
:
keypoint_ops
.
flip_horizontal
(
keypoints
,
0.5
,
permutation
),
lambda
:
keypoints
)
result
.
append
(
keypoints
)
return
tuple
(
result
)
def
random_pixel_value_scale
(
image
,
minval
=
0.9
,
maxval
=
1.1
,
seed
=
None
):
"""Scales each value in the pixels of the image.
This function scales each pixel independent of the other ones.
For each value in image tensor, draws a random number between
minval and maxval and multiples the values with them.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
minval: lower ratio of scaling pixel values.
maxval: upper ratio of scaling pixel values.
seed: random seed.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'RandomPixelValueScale'
,
values
=
[
image
]):
color_coef
=
tf
.
random_uniform
(
tf
.
shape
(
image
),
minval
=
minval
,
maxval
=
maxval
,
dtype
=
tf
.
float32
,
seed
=
seed
)
image
=
tf
.
multiply
(
image
,
color_coef
)
image
=
tf
.
clip_by_value
(
image
,
0.0
,
1.0
)
return
image
def
random_image_scale
(
image
,
masks
=
None
,
min_scale_ratio
=
0.5
,
max_scale_ratio
=
2.0
,
seed
=
None
):
"""Scales the image size.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels].
masks: (optional) rank 3 float32 tensor containing masks with
size [height, width, num_masks]. The value is set to None if there are no
masks.
min_scale_ratio: minimum scaling ratio.
max_scale_ratio: maximum scaling ratio.
seed: random seed.
Returns:
image: image which is the same rank as input image.
masks: If masks is not none, resized masks which are the same rank as input
masks will be returned.
"""
with
tf
.
name_scope
(
'RandomImageScale'
,
values
=
[
image
]):
result
=
[]
image_shape
=
tf
.
shape
(
image
)
image_height
=
image_shape
[
0
]
image_width
=
image_shape
[
1
]
size_coef
=
tf
.
random_uniform
([],
minval
=
min_scale_ratio
,
maxval
=
max_scale_ratio
,
dtype
=
tf
.
float32
,
seed
=
seed
)
image_newysize
=
tf
.
to_int32
(
tf
.
multiply
(
tf
.
to_float
(
image_height
),
size_coef
))
image_newxsize
=
tf
.
to_int32
(
tf
.
multiply
(
tf
.
to_float
(
image_width
),
size_coef
))
image
=
tf
.
image
.
resize_images
(
image
,
[
image_newysize
,
image_newxsize
],
align_corners
=
True
)
result
.
append
(
image
)
if
masks
:
masks
=
tf
.
image
.
resize_nearest_neighbor
(
masks
,
[
image_newysize
,
image_newxsize
],
align_corners
=
True
)
result
.
append
(
masks
)
return
tuple
(
result
)
def
random_rgb_to_gray
(
image
,
probability
=
0.1
,
seed
=
None
):
"""Changes the image from RGB to Grayscale with the given probability.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
probability: the probability of returning a grayscale image.
The probability should be a number between [0, 1].
seed: random seed.
Returns:
image: image which is the same shape as input image.
"""
def
_image_to_gray
(
image
):
image_gray1
=
tf
.
image
.
rgb_to_grayscale
(
image
)
image_gray3
=
tf
.
image
.
grayscale_to_rgb
(
image_gray1
)
return
image_gray3
with
tf
.
name_scope
(
'RandomRGBtoGray'
,
values
=
[
image
]):
# random variable defining whether to do flip or not
do_gray_random
=
tf
.
random_uniform
([],
seed
=
seed
)
image
=
tf
.
cond
(
tf
.
greater
(
do_gray_random
,
probability
),
lambda
:
image
,
lambda
:
_image_to_gray
(
image
))
return
image
def
random_adjust_brightness
(
image
,
max_delta
=
0.2
):
"""Randomly adjusts brightness.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: how much to change the brightness. A value between [0, 1).
Returns:
image: image which is the same shape as input image.
boxes: boxes which is the same shape as input boxes.
"""
with
tf
.
name_scope
(
'RandomAdjustBrightness'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
def
random_adjust_contrast
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
):
"""Randomly adjusts contrast.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
min_delta: see max_delta.
max_delta: how much to change the contrast. Contrast will change with a
value between min_delta and max_delta. This value will be
multiplied to the current contrast of the image.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'RandomAdjustContrast'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_contrast
(
image
,
min_delta
,
max_delta
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
def
random_adjust_hue
(
image
,
max_delta
=
0.02
):
"""Randomly adjusts hue.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_delta: change hue randomly with a value between 0 and max_delta.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'RandomAdjustHue'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
def
random_adjust_saturation
(
image
,
min_delta
=
0.8
,
max_delta
=
1.25
):
"""Randomly adjusts saturation.
Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
min_delta: see max_delta.
max_delta: how much to change the saturation. Saturation will change with a
value between min_delta and max_delta. This value will be
multiplied to the current saturation of the image.
Returns:
image: image which is the same shape as input image.
"""
with
tf
.
name_scope
(
'RandomAdjustSaturation'
,
values
=
[
image
]):
image
=
tf
.
image
.
random_saturation
(
image
,
min_delta
,
max_delta
)
image
=
tf
.
clip_by_value
(
image
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
return
image
def
random_distort_color
(
image
,
color_ordering
=
0
):
"""Randomly distorts color.
Randomly distorts color using a combination of brightness, hue, contrast
and saturation changes. Makes sure the output image is still between 0 and 1.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
color_ordering: Python int, a type of distortion (valid values: 0, 1).
Returns:
image: image which is the same shape as input image.
Raises:
ValueError: if color_ordering is not in {0, 1}.
"""
with
tf
.
name_scope
(
'RandomDistortColor'
,
values
=
[
image
]):
if
color_ordering
==
0
:
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
=
32.
/
255.
)
image
=
tf
.
image
.
random_saturation
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
=
0.2
)
image
=
tf
.
image
.
random_contrast
(
image
,
lower
=
0.5
,
upper
=
1.5
)
elif
color_ordering
==
1
:
image
=
tf
.
image
.
random_brightness
(
image
,
max_delta
=
32.
/
255.
)
image
=
tf
.
image
.
random_contrast
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
=
tf
.
image
.
random_saturation
(
image
,
lower
=
0.5
,
upper
=
1.5
)
image
=
tf
.
image
.
random_hue
(
image
,
max_delta
=
0.2
)
else
:
raise
ValueError
(
'color_ordering must be in {0, 1}'
)
# The random_* ops do not necessarily clamp.
image
=
tf
.
clip_by_value
(
image
,
0.0
,
1.0
)
return
image
def
random_jitter_boxes
(
boxes
,
ratio
=
0.05
,
seed
=
None
):
"""Randomly jitter boxes in image.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
ratio: The ratio of the box width and height that the corners can jitter.
For example if the width is 100 pixels and ratio is 0.05,
the corners can jitter up to 5 pixels in the x direction.
seed: random seed.
Returns:
boxes: boxes which is the same shape as input boxes.
"""
def
random_jitter_box
(
box
,
ratio
,
seed
):
"""Randomly jitter box.
Args:
box: bounding box [1, 1, 4].
ratio: max ratio between jittered box and original box,
a number between [0, 0.5].
seed: random seed.
Returns:
jittered_box: jittered box.
"""
rand_numbers
=
tf
.
random_uniform
(
[
1
,
1
,
4
],
minval
=-
ratio
,
maxval
=
ratio
,
dtype
=
tf
.
float32
,
seed
=
seed
)
box_width
=
tf
.
subtract
(
box
[
0
,
0
,
3
],
box
[
0
,
0
,
1
])
box_height
=
tf
.
subtract
(
box
[
0
,
0
,
2
],
box
[
0
,
0
,
0
])
hw_coefs
=
tf
.
stack
([
box_height
,
box_width
,
box_height
,
box_width
])
hw_rand_coefs
=
tf
.
multiply
(
hw_coefs
,
rand_numbers
)
jittered_box
=
tf
.
add
(
box
,
hw_rand_coefs
)
jittered_box
=
tf
.
clip_by_value
(
jittered_box
,
0.0
,
1.0
)
return
jittered_box
with
tf
.
name_scope
(
'RandomJitterBoxes'
,
values
=
[
boxes
]):
# boxes are [N, 4]. Lets first make them [N, 1, 1, 4]
boxes_shape
=
tf
.
shape
(
boxes
)
boxes
=
tf
.
expand_dims
(
boxes
,
1
)
boxes
=
tf
.
expand_dims
(
boxes
,
2
)
distorted_boxes
=
tf
.
map_fn
(
lambda
x
:
random_jitter_box
(
x
,
ratio
,
seed
),
boxes
,
dtype
=
tf
.
float32
)
distorted_boxes
=
tf
.
reshape
(
distorted_boxes
,
boxes_shape
)
return
distorted_boxes
def
_strict_random_crop_image
(
image
,
boxes
,
labels
,
masks
=
None
,
keypoints
=
None
,
min_object_covered
=
1.0
,
aspect_ratio_range
=
(
0.75
,
1.33
),
area_range
=
(
0.1
,
1.0
),
overlap_thresh
=
0.3
):
"""Performs random crop.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
outside the crop will be set to NaN, which is consistent with the original
keypoint encoding for non-existing keypoints. This function always crops
the image and is supposed to be used by `random_crop_image` function which
sometimes returns image unchanged.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes with shape
[num_instances, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
with
tf
.
name_scope
(
'RandomCropImage'
,
values
=
[
image
,
boxes
]):
image_shape
=
tf
.
shape
(
image
)
# boxes are [N, 4]. Lets first make them [N, 1, 4].
boxes_expanded
=
tf
.
expand_dims
(
tf
.
clip_by_value
(
boxes
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
),
1
)
sample_distorted_bounding_box
=
tf
.
image
.
sample_distorted_bounding_box
(
image_shape
,
bounding_boxes
=
boxes_expanded
,
min_object_covered
=
min_object_covered
,
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
max_attempts
=
100
,
use_image_if_no_bounding_boxes
=
True
)
im_box_begin
,
im_box_size
,
im_box
=
sample_distorted_bounding_box
new_image
=
tf
.
slice
(
image
,
im_box_begin
,
im_box_size
)
new_image
.
set_shape
([
None
,
None
,
image
.
get_shape
()[
2
]])
# [1, 4]
im_box_rank2
=
tf
.
squeeze
(
im_box
,
squeeze_dims
=
[
0
])
# [4]
im_box_rank1
=
tf
.
squeeze
(
im_box
)
boxlist
=
box_list
.
BoxList
(
boxes
)
boxlist
.
add_field
(
'labels'
,
labels
)
im_boxlist
=
box_list
.
BoxList
(
im_box_rank2
)
# remove boxes that are outside cropped image
boxlist
,
inside_window_ids
=
box_list_ops
.
prune_completely_outside_window
(
boxlist
,
im_box_rank1
)
# remove boxes that are outside image
overlapping_boxlist
,
keep_ids
=
box_list_ops
.
prune_non_overlapping_boxes
(
boxlist
,
im_boxlist
,
overlap_thresh
)
# change the coordinate of the remaining boxes
new_labels
=
overlapping_boxlist
.
get_field
(
'labels'
)
new_boxlist
=
box_list_ops
.
change_coordinate_frame
(
overlapping_boxlist
,
im_box_rank1
)
new_boxes
=
new_boxlist
.
get
()
new_boxes
=
tf
.
clip_by_value
(
new_boxes
,
clip_value_min
=
0.0
,
clip_value_max
=
1.0
)
result
=
[
new_image
,
new_boxes
,
new_labels
]
if
masks
is
not
None
:
masks_of_boxes_inside_window
=
tf
.
gather
(
masks
,
inside_window_ids
)
masks_of_boxes_completely_inside_window
=
tf
.
gather
(
masks_of_boxes_inside_window
,
keep_ids
)
masks_box_begin
=
[
im_box_begin
[
2
],
im_box_begin
[
0
],
im_box_begin
[
1
]]
masks_box_size
=
[
im_box_size
[
2
],
im_box_size
[
0
],
im_box_size
[
1
]]
new_masks
=
tf
.
slice
(
masks_of_boxes_completely_inside_window
,
masks_box_begin
,
masks_box_size
)
result
.
append
(
new_masks
)
if
keypoints
is
not
None
:
keypoints_of_boxes_inside_window
=
tf
.
gather
(
keypoints
,
inside_window_ids
)
keypoints_of_boxes_completely_inside_window
=
tf
.
gather
(
keypoints_of_boxes_inside_window
,
keep_ids
)
new_keypoints
=
keypoint_ops
.
change_coordinate_frame
(
keypoints_of_boxes_completely_inside_window
,
im_box_rank1
)
new_keypoints
=
keypoint_ops
.
prune_outside_window
(
new_keypoints
,
[
0.0
,
0.0
,
1.0
,
1.0
])
result
.
append
(
new_keypoints
)
return
tuple
(
result
)
def
random_crop_image
(
image
,
boxes
,
labels
,
masks
=
None
,
keypoints
=
None
,
min_object_covered
=
1.0
,
aspect_ratio_range
=
(
0.75
,
1.33
),
area_range
=
(
0.1
,
1.0
),
overlap_thresh
=
0.3
,
random_coef
=
0.0
,
seed
=
None
):
"""Randomly crops the image.
Given the input image and its bounding boxes, this op randomly
crops a subimage. Given a user-provided set of input constraints,
the crop window is resampled until it satisfies these constraints.
If within 100 trials it is unable to find a valid crop, the original
image is returned. See the Args section for a description of the input
constraints. Both input boxes and returned Boxes are in normalized
form (e.g., lie in the unit square [0, 1]).
This function will return the original image with probability random_coef.
Note: boxes will be clipped to the crop. Keypoint coordinates that are
outside the crop will be set to NaN, which is consistent with the original
keypoint encoding for non-existing keypoints.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes with shape
[num_instances, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
labels: new labels.
If masks, or keypoints are not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
def
strict_random_crop_image_fn
():
return
_strict_random_crop_image
(
image
,
boxes
,
labels
,
masks
=
masks
,
keypoints
=
keypoints
,
min_object_covered
=
min_object_covered
,
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
overlap_thresh
=
overlap_thresh
)
# avoids tf.cond to make faster RCNN training on borg. See b/140057645.
if
random_coef
<
sys
.
float_info
.
min
:
result
=
strict_random_crop_image_fn
()
else
:
do_a_crop_random
=
tf
.
random_uniform
([],
seed
=
seed
)
do_a_crop_random
=
tf
.
greater
(
do_a_crop_random
,
random_coef
)
outputs
=
[
image
,
boxes
,
labels
]
if
masks
is
not
None
:
outputs
.
append
(
masks
)
if
keypoints
is
not
None
:
outputs
.
append
(
keypoints
)
result
=
tf
.
cond
(
do_a_crop_random
,
strict_random_crop_image_fn
,
lambda
:
tuple
(
outputs
))
return
result
def
random_pad_image
(
image
,
boxes
,
min_image_size
=
None
,
max_image_size
=
None
,
pad_color
=
None
,
seed
=
None
):
"""Randomly pads the image.
This function randomly pads the image with zeros. The final size of the
padded image will be between min_image_size and max_image_size.
if min_image_size is smaller than the input image size, min_image_size will
be set to the input image size. The same for max_image_size. The input image
will be located at a uniformly random location inside the padded image.
The relative location of the boxes to the original image will remain the same.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
min_image_size: a tensor of size [min_height, min_width], type tf.int32.
If passed as None, will be set to image size
[height, width].
max_image_size: a tensor of size [max_height, max_width], type tf.int32.
If passed as None, will be set to twice the
image [height * 2, width * 2].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the input
image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
"""
if
pad_color
is
None
:
pad_color
=
tf
.
reduce_mean
(
image
,
reduction_indices
=
[
0
,
1
])
image_shape
=
tf
.
shape
(
image
)
image_height
=
image_shape
[
0
]
image_width
=
image_shape
[
1
]
if
max_image_size
is
None
:
max_image_size
=
tf
.
stack
([
image_height
*
2
,
image_width
*
2
])
max_image_size
=
tf
.
maximum
(
max_image_size
,
tf
.
stack
([
image_height
,
image_width
]))
if
min_image_size
is
None
:
min_image_size
=
tf
.
stack
([
image_height
,
image_width
])
min_image_size
=
tf
.
maximum
(
min_image_size
,
tf
.
stack
([
image_height
,
image_width
]))
target_height
=
tf
.
cond
(
max_image_size
[
0
]
>
min_image_size
[
0
],
lambda
:
_random_integer
(
min_image_size
[
0
],
max_image_size
[
0
],
seed
),
lambda
:
max_image_size
[
0
])
target_width
=
tf
.
cond
(
max_image_size
[
1
]
>
min_image_size
[
1
],
lambda
:
_random_integer
(
min_image_size
[
1
],
max_image_size
[
1
],
seed
),
lambda
:
max_image_size
[
1
])
offset_height
=
tf
.
cond
(
target_height
>
image_height
,
lambda
:
_random_integer
(
0
,
target_height
-
image_height
,
seed
),
lambda
:
tf
.
constant
(
0
,
dtype
=
tf
.
int32
))
offset_width
=
tf
.
cond
(
target_width
>
image_width
,
lambda
:
_random_integer
(
0
,
target_width
-
image_width
,
seed
),
lambda
:
tf
.
constant
(
0
,
dtype
=
tf
.
int32
))
new_image
=
tf
.
image
.
pad_to_bounding_box
(
image
,
offset_height
=
offset_height
,
offset_width
=
offset_width
,
target_height
=
target_height
,
target_width
=
target_width
)
# Setting color of the padded pixels
image_ones
=
tf
.
ones_like
(
image
)
image_ones_padded
=
tf
.
image
.
pad_to_bounding_box
(
image_ones
,
offset_height
=
offset_height
,
offset_width
=
offset_width
,
target_height
=
target_height
,
target_width
=
target_width
)
image_color_paded
=
(
1.0
-
image_ones_padded
)
*
pad_color
new_image
+=
image_color_paded
# setting boxes
new_window
=
tf
.
to_float
(
tf
.
stack
([
-
offset_height
,
-
offset_width
,
target_height
-
offset_height
,
target_width
-
offset_width
]))
new_window
/=
tf
.
to_float
(
tf
.
stack
([
image_height
,
image_width
,
image_height
,
image_width
]))
boxlist
=
box_list
.
BoxList
(
boxes
)
new_boxlist
=
box_list_ops
.
change_coordinate_frame
(
boxlist
,
new_window
)
new_boxes
=
new_boxlist
.
get
()
return
new_image
,
new_boxes
def
random_crop_pad_image
(
image
,
boxes
,
labels
,
min_object_covered
=
1.0
,
aspect_ratio_range
=
(
0.75
,
1.33
),
area_range
=
(
0.1
,
1.0
),
overlap_thresh
=
0.3
,
random_coef
=
0.0
,
min_padded_size_ratio
=
None
,
max_padded_size_ratio
=
None
,
pad_color
=
None
,
seed
=
None
):
"""Randomly crops and pads the image.
Given an input image and its bounding boxes, this op first randomly crops
the image and then randomly pads the image with background values. Parameters
min_padded_size_ratio and max_padded_size_ratio, determine the range of the
final output image size. Specifically, the final image size will have a size
in the range of min_padded_size_ratio * tf.shape(image) and
max_padded_size_ratio * tf.shape(image). Note that these ratios are with
respect to the size of the original image, so we can't capture the same
effect easily by independently applying RandomCropImage
followed by RandomPadImage.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [0.0, 0.0].
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [2.0, 2.0].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
Returns:
padded_image: padded image.
padded_boxes: boxes which is the same rank as input boxes. Boxes are in
normalized form.
cropped_labels: cropped labels.
"""
image_size
=
tf
.
shape
(
image
)
image_height
=
image_size
[
0
]
image_width
=
image_size
[
1
]
if
min_padded_size_ratio
is
None
:
min_padded_size_ratio
=
tf
.
constant
([
0.0
,
0.0
],
tf
.
float32
)
if
max_padded_size_ratio
is
None
:
max_padded_size_ratio
=
tf
.
constant
([
2.0
,
2.0
],
tf
.
float32
)
cropped_image
,
cropped_boxes
,
cropped_labels
=
random_crop_image
(
image
=
image
,
boxes
=
boxes
,
labels
=
labels
,
min_object_covered
=
min_object_covered
,
aspect_ratio_range
=
aspect_ratio_range
,
area_range
=
area_range
,
overlap_thresh
=
overlap_thresh
,
random_coef
=
random_coef
,
seed
=
seed
)
min_image_size
=
tf
.
to_int32
(
tf
.
to_float
(
tf
.
stack
([
image_height
,
image_width
]))
*
min_padded_size_ratio
)
max_image_size
=
tf
.
to_int32
(
tf
.
to_float
(
tf
.
stack
([
image_height
,
image_width
]))
*
max_padded_size_ratio
)
padded_image
,
padded_boxes
=
random_pad_image
(
cropped_image
,
cropped_boxes
,
min_image_size
=
min_image_size
,
max_image_size
=
max_image_size
,
pad_color
=
pad_color
,
seed
=
seed
)
return
padded_image
,
padded_boxes
,
cropped_labels
def
random_crop_to_aspect_ratio
(
image
,
boxes
,
labels
,
masks
=
None
,
keypoints
=
None
,
aspect_ratio
=
1.0
,
overlap_thresh
=
0.3
,
seed
=
None
):
"""Randomly crops an image to the specified aspect ratio.
Randomly crops the a portion of the image such that the crop is of the
specified aspect ratio, and the crop is as large as possible. If the specified
aspect ratio is larger than the aspect ratio of the image, this op will
randomly remove rows from the top and bottom of the image. If the specified
aspect ratio is less than the aspect ratio of the image, this op will randomly
remove cols from the left and right of the image. If the specified aspect
ratio is the same as the aspect ratio of the image, this op will return the
image.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
aspect_ratio: the aspect ratio of cropped image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: If image is not a 3D tensor.
"""
if
len
(
image
.
get_shape
())
!=
3
:
raise
ValueError
(
'Image should be 3D tensor'
)
with
tf
.
name_scope
(
'RandomCropToAspectRatio'
,
values
=
[
image
]):
image_shape
=
tf
.
shape
(
image
)
orig_height
=
image_shape
[
0
]
orig_width
=
image_shape
[
1
]
orig_aspect_ratio
=
tf
.
to_float
(
orig_width
)
/
tf
.
to_float
(
orig_height
)
new_aspect_ratio
=
tf
.
constant
(
aspect_ratio
,
dtype
=
tf
.
float32
)
def
target_height_fn
():
return
tf
.
to_int32
(
tf
.
round
(
tf
.
to_float
(
orig_height
)
*
orig_aspect_ratio
/
new_aspect_ratio
))
target_height
=
tf
.
cond
(
orig_aspect_ratio
>=
new_aspect_ratio
,
lambda
:
orig_height
,
target_height_fn
)
def
target_width_fn
():
return
tf
.
to_int32
(
tf
.
round
(
tf
.
to_float
(
orig_width
)
*
new_aspect_ratio
/
orig_aspect_ratio
))
target_width
=
tf
.
cond
(
orig_aspect_ratio
<=
new_aspect_ratio
,
lambda
:
orig_width
,
target_width_fn
)
# either offset_height = 0 and offset_width is randomly chosen from
# [0, offset_width - target_width), or else offset_width = 0 and
# offset_height is randomly chosen from [0, offset_height - target_height)
offset_height
=
_random_integer
(
0
,
orig_height
-
target_height
+
1
,
seed
)
offset_width
=
_random_integer
(
0
,
orig_width
-
target_width
+
1
,
seed
)
new_image
=
tf
.
image
.
crop_to_bounding_box
(
image
,
offset_height
,
offset_width
,
target_height
,
target_width
)
im_box
=
tf
.
stack
([
tf
.
to_float
(
offset_height
)
/
tf
.
to_float
(
orig_height
),
tf
.
to_float
(
offset_width
)
/
tf
.
to_float
(
orig_width
),
tf
.
to_float
(
offset_height
+
target_height
)
/
tf
.
to_float
(
orig_height
),
tf
.
to_float
(
offset_width
+
target_width
)
/
tf
.
to_float
(
orig_width
)
])
boxlist
=
box_list
.
BoxList
(
boxes
)
boxlist
.
add_field
(
'labels'
,
labels
)
im_boxlist
=
box_list
.
BoxList
(
tf
.
expand_dims
(
im_box
,
0
))
# remove boxes whose overlap with the image is less than overlap_thresh
overlapping_boxlist
,
keep_ids
=
box_list_ops
.
prune_non_overlapping_boxes
(
boxlist
,
im_boxlist
,
overlap_thresh
)
# change the coordinate of the remaining boxes
new_labels
=
overlapping_boxlist
.
get_field
(
'labels'
)
new_boxlist
=
box_list_ops
.
change_coordinate_frame
(
overlapping_boxlist
,
im_box
)
new_boxlist
=
box_list_ops
.
clip_to_window
(
new_boxlist
,
tf
.
constant
(
[
0.0
,
0.0
,
1.0
,
1.0
],
tf
.
float32
))
new_boxes
=
new_boxlist
.
get
()
result
=
[
new_image
,
new_boxes
,
new_labels
]
if
masks
is
not
None
:
masks_inside_window
=
tf
.
gather
(
masks
,
keep_ids
)
masks_box_begin
=
tf
.
stack
([
0
,
offset_height
,
offset_width
])
masks_box_size
=
tf
.
stack
([
-
1
,
target_height
,
target_width
])
new_masks
=
tf
.
slice
(
masks_inside_window
,
masks_box_begin
,
masks_box_size
)
result
.
append
(
new_masks
)
if
keypoints
is
not
None
:
keypoints_inside_window
=
tf
.
gather
(
keypoints
,
keep_ids
)
new_keypoints
=
keypoint_ops
.
change_coordinate_frame
(
keypoints_inside_window
,
im_box
)
new_keypoints
=
keypoint_ops
.
prune_outside_window
(
new_keypoints
,
[
0.0
,
0.0
,
1.0
,
1.0
])
result
.
append
(
new_keypoints
)
return
tuple
(
result
)
def
random_black_patches
(
image
,
max_black_patches
=
10
,
probability
=
0.5
,
size_to_image_ratio
=
0.1
,
random_seed
=
None
):
"""Randomly adds some black patches to the image.
This op adds up to max_black_patches square black patches of a fixed size
to the image where size is specified via the size_to_image_ratio parameter.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
max_black_patches: number of times that the function tries to add a
black box to the image.
probability: at each try, what is the chance of adding a box.
size_to_image_ratio: Determines the ratio of the size of the black patches
to the size of the image.
box_size = size_to_image_ratio *
min(image_width, image_height)
random_seed: random seed.
Returns:
image
"""
def
add_black_patch_to_image
(
image
):
"""Function for adding one patch to the image.
Args:
image: image
Returns:
image with a randomly added black box
"""
image_shape
=
tf
.
shape
(
image
)
image_height
=
image_shape
[
0
]
image_width
=
image_shape
[
1
]
box_size
=
tf
.
to_int32
(
tf
.
multiply
(
tf
.
minimum
(
tf
.
to_float
(
image_height
),
tf
.
to_float
(
image_width
)),
size_to_image_ratio
))
normalized_y_min
=
tf
.
random_uniform
(
[],
minval
=
0.0
,
maxval
=
(
1.0
-
size_to_image_ratio
),
seed
=
random_seed
)
normalized_x_min
=
tf
.
random_uniform
(
[],
minval
=
0.0
,
maxval
=
(
1.0
-
size_to_image_ratio
),
seed
=
random_seed
)
y_min
=
tf
.
to_int32
(
normalized_y_min
*
tf
.
to_float
(
image_height
))
x_min
=
tf
.
to_int32
(
normalized_x_min
*
tf
.
to_float
(
image_width
))
black_box
=
tf
.
ones
([
box_size
,
box_size
,
3
],
dtype
=
tf
.
float32
)
mask
=
1.0
-
tf
.
image
.
pad_to_bounding_box
(
black_box
,
y_min
,
x_min
,
image_height
,
image_width
)
image
=
tf
.
multiply
(
image
,
mask
)
return
image
with
tf
.
name_scope
(
'RandomBlackPatchInImage'
,
values
=
[
image
]):
for
_
in
range
(
max_black_patches
):
random_prob
=
tf
.
random_uniform
([],
minval
=
0.0
,
maxval
=
1.0
,
dtype
=
tf
.
float32
,
seed
=
random_seed
)
image
=
tf
.
cond
(
tf
.
greater
(
random_prob
,
probability
),
lambda
:
image
,
lambda
:
add_black_patch_to_image
(
image
))
return
image
def
image_to_float
(
image
):
"""Used in Faster R-CNN. Casts image pixel values to float.
Args:
image: input image which might be in tf.uint8 or sth else format
Returns:
image: image in tf.float32 format.
"""
with
tf
.
name_scope
(
'ImageToFloat'
,
values
=
[
image
]):
image
=
tf
.
to_float
(
image
)
return
image
def
random_resize_method
(
image
,
target_size
):
"""Uses a random resize method to resize the image to target size.
Args:
image: a rank 3 tensor.
target_size: a list of [target_height, target_width]
Returns:
resized image.
"""
resized_image
=
_apply_with_random_selector
(
image
,
lambda
x
,
method
:
tf
.
image
.
resize_images
(
x
,
target_size
,
method
),
num_cases
=
4
)
return
resized_image
def
resize_to_range
(
image
,
masks
=
None
,
min_dimension
=
None
,
max_dimension
=
None
,
align_corners
=
False
):
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
1. If the image can be rescaled so its minimum dimension is equal to the
provided value without the other dimension exceeding max_dimension,
then do so.
2. Otherwise, resize so the largest dimension is equal to max_dimension.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
min_dimension: (optional) (scalar) desired size of the smaller image
dimension.
max_dimension: (optional) (scalar) maximum allowed size
of the larger image dimension.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
Returns:
A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
If masks is not None, also outputs masks:
A 3D tensor of shape [num_instances, new_height, new_width]
Raises:
ValueError: if the image is not a 3D tensor.
"""
if
len
(
image
.
get_shape
())
!=
3
:
raise
ValueError
(
'Image should be 3D tensor'
)
with
tf
.
name_scope
(
'ResizeToRange'
,
values
=
[
image
,
min_dimension
]):
image_shape
=
tf
.
shape
(
image
)
orig_height
=
tf
.
to_float
(
image_shape
[
0
])
orig_width
=
tf
.
to_float
(
image_shape
[
1
])
orig_min_dim
=
tf
.
minimum
(
orig_height
,
orig_width
)
# Calculates the larger of the possible sizes
min_dimension
=
tf
.
constant
(
min_dimension
,
dtype
=
tf
.
float32
)
large_scale_factor
=
min_dimension
/
orig_min_dim
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height
=
tf
.
to_int32
(
tf
.
round
(
orig_height
*
large_scale_factor
))
large_width
=
tf
.
to_int32
(
tf
.
round
(
orig_width
*
large_scale_factor
))
large_size
=
tf
.
stack
([
large_height
,
large_width
])
if
max_dimension
:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim
=
tf
.
maximum
(
orig_height
,
orig_width
)
max_dimension
=
tf
.
constant
(
max_dimension
,
dtype
=
tf
.
float32
)
small_scale_factor
=
max_dimension
/
orig_max_dim
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height
=
tf
.
to_int32
(
tf
.
round
(
orig_height
*
small_scale_factor
))
small_width
=
tf
.
to_int32
(
tf
.
round
(
orig_width
*
small_scale_factor
))
small_size
=
tf
.
stack
([
small_height
,
small_width
])
new_size
=
tf
.
cond
(
tf
.
to_float
(
tf
.
reduce_max
(
large_size
))
>
max_dimension
,
lambda
:
small_size
,
lambda
:
large_size
)
else
:
new_size
=
large_size
new_image
=
tf
.
image
.
resize_images
(
image
,
new_size
,
align_corners
=
align_corners
)
result
=
new_image
if
masks
is
not
None
:
num_instances
=
tf
.
shape
(
masks
)[
0
]
def
resize_masks_branch
():
new_masks
=
tf
.
expand_dims
(
masks
,
3
)
new_masks
=
tf
.
image
.
resize_nearest_neighbor
(
new_masks
,
new_size
,
align_corners
=
align_corners
)
new_masks
=
tf
.
squeeze
(
new_masks
,
axis
=
3
)
return
new_masks
def
reshape_masks_branch
():
new_masks
=
tf
.
reshape
(
masks
,
[
0
,
new_size
[
0
],
new_size
[
1
]])
return
new_masks
masks
=
tf
.
cond
(
num_instances
>
0
,
resize_masks_branch
,
reshape_masks_branch
)
result
=
[
new_image
,
masks
]
return
result
def
scale_boxes_to_pixel_coordinates
(
image
,
boxes
,
keypoints
=
None
):
"""Scales boxes from normalized to pixel coordinates.
Args:
image: A 3D float32 tensor of shape [height, width, channels].
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes in normalized coordinates. Each row is of the form
[ymin, xmin, ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
Returns:
image: unchanged input image.
scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
bounding boxes in pixel coordinates.
scaled_keypoints: a 3D float32 tensor with shape
[num_instances, num_keypoints, 2] containing the keypoints in pixel
coordinates.
"""
boxlist
=
box_list
.
BoxList
(
boxes
)
image_height
=
tf
.
shape
(
image
)[
0
]
image_width
=
tf
.
shape
(
image
)[
1
]
scaled_boxes
=
box_list_ops
.
scale
(
boxlist
,
image_height
,
image_width
).
get
()
result
=
[
image
,
scaled_boxes
]
if
keypoints
is
not
None
:
scaled_keypoints
=
keypoint_ops
.
scale
(
keypoints
,
image_height
,
image_width
)
result
.
append
(
scaled_keypoints
)
return
tuple
(
result
)
# pylint: disable=g-doc-return-or-yield
def
resize_image
(
image
,
masks
=
None
,
new_height
=
600
,
new_width
=
1024
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
align_corners
=
False
):
"""See `tf.image.resize_images` for detailed doc."""
with
tf
.
name_scope
(
'ResizeImage'
,
values
=
[
image
,
new_height
,
new_width
,
method
,
align_corners
]):
new_image
=
tf
.
image
.
resize_images
(
image
,
[
new_height
,
new_width
],
method
=
method
,
align_corners
=
align_corners
)
result
=
new_image
if
masks
is
not
None
:
num_instances
=
tf
.
shape
(
masks
)[
0
]
new_size
=
tf
.
constant
([
new_height
,
new_width
],
dtype
=
tf
.
int32
)
def
resize_masks_branch
():
new_masks
=
tf
.
expand_dims
(
masks
,
3
)
new_masks
=
tf
.
image
.
resize_nearest_neighbor
(
new_masks
,
new_size
,
align_corners
=
align_corners
)
new_masks
=
tf
.
squeeze
(
new_masks
,
axis
=
3
)
return
new_masks
def
reshape_masks_branch
():
new_masks
=
tf
.
reshape
(
masks
,
[
0
,
new_size
[
0
],
new_size
[
1
]])
return
new_masks
masks
=
tf
.
cond
(
num_instances
>
0
,
resize_masks_branch
,
reshape_masks_branch
)
result
=
[
new_image
,
masks
]
return
result
def
subtract_channel_mean
(
image
,
means
=
None
):
"""Normalizes an image by subtracting a mean from each channel.
Args:
image: A 3D tensor of shape [height, width, channels]
means: float list containing a mean for each channel
Returns:
normalized_images: a tensor of shape [height, width, channels]
Raises:
ValueError: if images is not a 4D tensor or if the number of means is not
equal to the number of channels.
"""
with
tf
.
name_scope
(
'SubtractChannelMean'
,
values
=
[
image
,
means
]):
if
len
(
image
.
get_shape
())
!=
3
:
raise
ValueError
(
'Input must be of size [height, width, channels]'
)
if
len
(
means
)
!=
image
.
get_shape
()[
-
1
]:
raise
ValueError
(
'len(means) must match the number of channels'
)
return
image
-
[[
means
]]
def
one_hot_encoding
(
labels
,
num_classes
=
None
):
"""One-hot encodes the multiclass labels.
Example usage:
labels = tf.constant([1, 4], dtype=tf.int32)
one_hot = OneHotEncoding(labels, num_classes=5)
one_hot.eval() # evaluates to [0, 1, 0, 0, 1]
Args:
labels: A tensor of shape [None] corresponding to the labels.
num_classes: Number of classes in the dataset.
Returns:
onehot_labels: a tensor of shape [num_classes] corresponding to the one hot
encoding of the labels.
Raises:
ValueError: if num_classes is not specified.
"""
with
tf
.
name_scope
(
'OneHotEncoding'
,
values
=
[
labels
]):
if
num_classes
is
None
:
raise
ValueError
(
'num_classes must be specified'
)
labels
=
tf
.
one_hot
(
labels
,
num_classes
,
1
,
0
)
return
tf
.
reduce_max
(
labels
,
0
)
def
rgb_to_gray
(
image
):
"""Converts a 3 channel RGB image to a 1 channel grayscale image.
Args:
image: Rank 3 float32 tensor containing 1 image -> [height, width, 3]
with pixel values varying between [0, 1].
Returns:
image: A single channel grayscale image -> [image, height, 1].
"""
return
tf
.
image
.
rgb_to_grayscale
(
image
)
def
ssd_random_crop
(
image
,
boxes
,
labels
,
masks
=
None
,
keypoints
=
None
,
min_object_covered
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
aspect_ratio_range
=
((
0.5
,
2.0
),)
*
7
,
area_range
=
((
0.1
,
1.0
),)
*
7
,
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
7
,
seed
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
def
random_crop_selector
(
selected_result
,
index
):
"""Applies random_crop_image to selected result.
Args:
selected_result: A tuple containing image, boxes, labels, keypoints (if
not None), and masks (if not None).
index: The index that was randomly selected.
Returns: A tuple containing image, boxes, labels, keypoints (if not None),
and masks (if not None).
"""
i
=
3
image
,
boxes
,
labels
=
selected_result
[:
i
]
selected_masks
=
None
selected_keypoints
=
None
if
masks
is
not
None
:
selected_masks
=
selected_result
[
i
]
i
+=
1
if
keypoints
is
not
None
:
selected_keypoints
=
selected_result
[
i
]
return
random_crop_image
(
image
=
image
,
boxes
=
boxes
,
labels
=
labels
,
masks
=
selected_masks
,
keypoints
=
selected_keypoints
,
min_object_covered
=
min_object_covered
[
index
],
aspect_ratio_range
=
aspect_ratio_range
[
index
],
area_range
=
area_range
[
index
],
overlap_thresh
=
overlap_thresh
[
index
],
random_coef
=
random_coef
[
index
],
seed
=
seed
)
result
=
_apply_with_random_selector_tuples
(
tuple
(
t
for
t
in
(
image
,
boxes
,
labels
,
masks
,
keypoints
)
if
t
is
not
None
),
random_crop_selector
,
num_cases
=
len
(
min_object_covered
))
return
result
def
ssd_random_crop_pad
(
image
,
boxes
,
labels
,
min_object_covered
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
aspect_ratio_range
=
((
0.5
,
2.0
),)
*
6
,
area_range
=
((
0.1
,
1.0
),)
*
6
,
overlap_thresh
=
(
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
6
,
min_padded_size_ratio
=
(
None
,)
*
6
,
max_padded_size_ratio
=
(
None
,)
*
6
,
pad_color
=
(
None
,)
*
6
,
seed
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
Args:
image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio_range: allowed range for aspect ratio of cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
min_padded_size_ratio: min ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [0.0, 0.0].
max_padded_size_ratio: max ratio of padded image height and width to the
input image's height and width. If None, it will
be set to [2.0, 2.0].
pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
if set as None, it will be set to average color of the randomly
cropped image.
seed: random seed.
Returns:
image: Image shape will be [new_height, new_width, channels].
boxes: boxes which is the same rank as input boxes. Boxes are in normalized
form.
new_labels: new labels.
"""
def
random_crop_pad_selector
(
image_boxes_labels
,
index
):
image
,
boxes
,
labels
=
image_boxes_labels
return
random_crop_pad_image
(
image
,
boxes
,
labels
,
min_object_covered
=
min_object_covered
[
index
],
aspect_ratio_range
=
aspect_ratio_range
[
index
],
area_range
=
area_range
[
index
],
overlap_thresh
=
overlap_thresh
[
index
],
random_coef
=
random_coef
[
index
],
min_padded_size_ratio
=
min_padded_size_ratio
[
index
],
max_padded_size_ratio
=
max_padded_size_ratio
[
index
],
pad_color
=
pad_color
[
index
],
seed
=
seed
)
new_image
,
new_boxes
,
new_labels
=
_apply_with_random_selector_tuples
(
(
image
,
boxes
,
labels
),
random_crop_pad_selector
,
num_cases
=
len
(
min_object_covered
))
return
new_image
,
new_boxes
,
new_labels
def
ssd_random_crop_fixed_aspect_ratio
(
image
,
boxes
,
labels
,
masks
=
None
,
keypoints
=
None
,
min_object_covered
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
aspect_ratio
=
1.0
,
area_range
=
((
0.1
,
1.0
),)
*
7
,
overlap_thresh
=
(
0.0
,
0.1
,
0.3
,
0.5
,
0.7
,
0.9
,
1.0
),
random_coef
=
(
0.15
,)
*
7
,
seed
=
None
):
"""Random crop preprocessing with default parameters as in SSD paper.
Liu et al., SSD: Single shot multibox detector.
For further information on random crop preprocessing refer to RandomCrop
function above.
The only difference is that the aspect ratio of the crops are fixed.
Args:
image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
with pixel values varying between [0, 1].
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
labels: rank 1 int32 tensor containing the object classes.
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
min_object_covered: the cropped image must cover at least this fraction of
at least one of the input bounding boxes.
aspect_ratio: aspect ratio of the cropped image.
area_range: allowed range for area ratio between cropped image and the
original image.
overlap_thresh: minimum overlap thresh with new cropped
image to keep the box.
random_coef: a random coefficient that defines the chance of getting the
original image. If random_coef is 0, we will always get the
cropped image, and if it is 1.0, we will always get the
original image.
seed: random seed.
Returns:
image: image which is the same rank as input image.
boxes: boxes which is the same rank as input boxes.
Boxes are in normalized form.
labels: new labels.
If masks, or keypoints is not None, the function also returns:
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
"""
aspect_ratio_range
=
((
aspect_ratio
,
aspect_ratio
),)
*
len
(
area_range
)
crop_result
=
ssd_random_crop
(
image
,
boxes
,
labels
,
masks
,
keypoints
,
min_object_covered
,
aspect_ratio_range
,
area_range
,
overlap_thresh
,
random_coef
,
seed
)
i
=
3
new_image
,
new_boxes
,
new_labels
=
crop_result
[:
i
]
new_masks
=
None
new_keypoints
=
None
if
masks
is
not
None
:
new_masks
=
crop_result
[
i
]
i
+=
1
if
keypoints
is
not
None
:
new_keypoints
=
crop_result
[
i
]
result
=
random_crop_to_aspect_ratio
(
new_image
,
new_boxes
,
new_labels
,
new_masks
,
new_keypoints
,
aspect_ratio
=
aspect_ratio
,
seed
=
seed
)
return
result
def
get_default_func_arg_map
(
include_instance_masks
=
False
,
include_keypoints
=
False
):
"""Returns the default mapping from a preprocessor function to its args.
Args:
include_instance_masks: If True, preprocessing functions will modify the
instance masks, too.
include_keypoints: If True, preprocessing functions will modify the
keypoints, too.
Returns:
A map from preprocessing functions to the arguments they receive.
"""
groundtruth_instance_masks
=
None
if
include_instance_masks
:
groundtruth_instance_masks
=
(
fields
.
InputDataFields
.
groundtruth_instance_masks
)
groundtruth_keypoints
=
None
if
include_keypoints
:
groundtruth_keypoints
=
fields
.
InputDataFields
.
groundtruth_keypoints
prep_func_arg_map
=
{
normalize_image
:
(
fields
.
InputDataFields
.
image
,),
random_horizontal_flip
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
random_pixel_value_scale
:
(
fields
.
InputDataFields
.
image
,),
random_image_scale
:
(
fields
.
InputDataFields
.
image
,
groundtruth_instance_masks
,),
random_rgb_to_gray
:
(
fields
.
InputDataFields
.
image
,),
random_adjust_brightness
:
(
fields
.
InputDataFields
.
image
,),
random_adjust_contrast
:
(
fields
.
InputDataFields
.
image
,),
random_adjust_hue
:
(
fields
.
InputDataFields
.
image
,),
random_adjust_saturation
:
(
fields
.
InputDataFields
.
image
,),
random_distort_color
:
(
fields
.
InputDataFields
.
image
,),
random_jitter_boxes
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,),
random_crop_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
random_pad_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
),
random_crop_pad_image
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
),
random_crop_to_aspect_ratio
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
random_black_patches
:
(
fields
.
InputDataFields
.
image
,),
retain_boxes_above_threshold
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
fields
.
InputDataFields
.
groundtruth_label_scores
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
image_to_float
:
(
fields
.
InputDataFields
.
image
,),
random_resize_method
:
(
fields
.
InputDataFields
.
image
,),
resize_to_range
:
(
fields
.
InputDataFields
.
image
,
groundtruth_instance_masks
,),
scale_boxes_to_pixel_coordinates
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
groundtruth_keypoints
,),
flip_boxes
:
(
fields
.
InputDataFields
.
groundtruth_boxes
,),
resize_image
:
(
fields
.
InputDataFields
.
image
,
groundtruth_instance_masks
,),
subtract_channel_mean
:
(
fields
.
InputDataFields
.
image
,),
one_hot_encoding
:
(
fields
.
InputDataFields
.
groundtruth_image_classes
,),
rgb_to_gray
:
(
fields
.
InputDataFields
.
image
,),
ssd_random_crop
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
ssd_random_crop_pad
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
),
ssd_random_crop_fixed_aspect_ratio
:
(
fields
.
InputDataFields
.
image
,
fields
.
InputDataFields
.
groundtruth_boxes
,
fields
.
InputDataFields
.
groundtruth_classes
,
groundtruth_instance_masks
,
groundtruth_keypoints
,),
}
return
prep_func_arg_map
def
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
None
):
"""Preprocess images and bounding boxes.
Various types of preprocessing (to be implemented) based on the
preprocess_options dictionary e.g. "crop image" (affects image and possibly
boxes), "white balance image" (affects only image), etc. If self._options
is None, no preprocessing is done.
Args:
tensor_dict: dictionary that contains images, boxes, and can contain other
things as well.
images-> rank 4 float32 tensor contains
1 image -> [1, height, width, 3].
with pixel values varying between [0, 1]
boxes-> rank 2 float32 tensor containing
the bounding boxes -> [N, 4].
Boxes are in normalized form meaning
their coordinates vary between [0, 1].
Each row is in the form
of [ymin, xmin, ymax, xmax].
preprocess_options: It is a list of tuples, where each tuple contains a
function and a dictionary that contains arguments and
their values.
func_arg_map: mapping from preprocessing functions to arguments that they
expect to receive and return.
Returns:
tensor_dict: which contains the preprocessed images, bounding boxes, etc.
Raises:
ValueError: (a) If the functions passed to Preprocess
are not in func_arg_map.
(b) If the arguments that a function needs
do not exist in tensor_dict.
(c) If image in tensor_dict is not rank 4
"""
if
func_arg_map
is
None
:
func_arg_map
=
get_default_func_arg_map
()
# changes the images to image (rank 4 to rank 3) since the functions
# receive rank 3 tensor for image
if
fields
.
InputDataFields
.
image
in
tensor_dict
:
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
if
len
(
images
.
get_shape
())
!=
4
:
raise
ValueError
(
'images in tensor_dict should be rank 4'
)
image
=
tf
.
squeeze
(
images
,
squeeze_dims
=
[
0
])
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
image
# Preprocess inputs based on preprocess_options
for
option
in
preprocess_options
:
func
,
params
=
option
if
func
not
in
func_arg_map
:
raise
ValueError
(
'The function %s does not exist in func_arg_map'
%
(
func
.
__name__
))
arg_names
=
func_arg_map
[
func
]
for
a
in
arg_names
:
if
a
is
not
None
and
a
not
in
tensor_dict
:
raise
ValueError
(
'The function %s requires argument %s'
%
(
func
.
__name__
,
a
))
def
get_arg
(
key
):
return
tensor_dict
[
key
]
if
key
is
not
None
else
None
args
=
[
get_arg
(
a
)
for
a
in
arg_names
]
results
=
func
(
*
args
,
**
params
)
if
not
isinstance
(
results
,
(
list
,
tuple
)):
results
=
(
results
,)
# Removes None args since the return values will not contain those.
arg_names
=
[
arg_name
for
arg_name
in
arg_names
if
arg_name
is
not
None
]
for
res
,
arg_name
in
zip
(
results
,
arg_names
):
tensor_dict
[
arg_name
]
=
res
# changes the image to images (rank 3 to rank 4) to be compatible to what
# we received in the first place
if
fields
.
InputDataFields
.
image
in
tensor_dict
:
image
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images
=
tf
.
expand_dims
(
image
,
0
)
tensor_dict
[
fields
.
InputDataFields
.
image
]
=
images
return
tensor_dict
object_detection/core/preprocessor_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.preprocessor."""
import
numpy
as
np
import
six
import
tensorflow
as
tf
from
object_detection.core
import
preprocessor
from
object_detection.core
import
standard_fields
as
fields
if
six
.
PY2
:
import
mock
# pylint: disable=g-import-not-at-top
else
:
from
unittest
import
mock
# pylint: disable=g-import-not-at-top
class
PreprocessorTest
(
tf
.
test
.
TestCase
):
def
createColorfulTestImage
(
self
):
ch255
=
tf
.
fill
([
1
,
100
,
200
,
1
],
tf
.
constant
(
255
,
dtype
=
tf
.
uint8
))
ch128
=
tf
.
fill
([
1
,
100
,
200
,
1
],
tf
.
constant
(
128
,
dtype
=
tf
.
uint8
))
ch0
=
tf
.
fill
([
1
,
100
,
200
,
1
],
tf
.
constant
(
0
,
dtype
=
tf
.
uint8
))
imr
=
tf
.
concat
([
ch255
,
ch0
,
ch0
],
3
)
img
=
tf
.
concat
([
ch255
,
ch255
,
ch0
],
3
)
imb
=
tf
.
concat
([
ch255
,
ch0
,
ch255
],
3
)
imw
=
tf
.
concat
([
ch128
,
ch128
,
ch128
],
3
)
imu
=
tf
.
concat
([
imr
,
img
],
2
)
imd
=
tf
.
concat
([
imb
,
imw
],
2
)
im
=
tf
.
concat
([
imu
,
imd
],
1
)
return
im
def
createTestImages
(
self
):
images_r
=
tf
.
constant
([[[
128
,
128
,
128
,
128
],
[
0
,
0
,
128
,
128
],
[
0
,
128
,
128
,
128
],
[
192
,
192
,
128
,
128
]]],
dtype
=
tf
.
uint8
)
images_r
=
tf
.
expand_dims
(
images_r
,
3
)
images_g
=
tf
.
constant
([[[
0
,
0
,
128
,
128
],
[
0
,
0
,
128
,
128
],
[
0
,
128
,
192
,
192
],
[
192
,
192
,
128
,
192
]]],
dtype
=
tf
.
uint8
)
images_g
=
tf
.
expand_dims
(
images_g
,
3
)
images_b
=
tf
.
constant
([[[
128
,
128
,
192
,
0
],
[
0
,
0
,
128
,
192
],
[
0
,
128
,
128
,
0
],
[
192
,
192
,
192
,
128
]]],
dtype
=
tf
.
uint8
)
images_b
=
tf
.
expand_dims
(
images_b
,
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
def
createTestBoxes
(
self
):
boxes
=
tf
.
constant
(
[[
0.0
,
0.25
,
0.75
,
1.0
],
[
0.25
,
0.5
,
0.75
,
1.0
]],
dtype
=
tf
.
float32
)
return
boxes
def
createTestLabelScores
(
self
):
return
tf
.
constant
([
1.0
,
0.5
],
dtype
=
tf
.
float32
)
def
createTestLabelScoresWithMissingScore
(
self
):
return
tf
.
constant
([
0.5
,
np
.
nan
],
dtype
=
tf
.
float32
)
def
createTestMasks
(
self
):
mask
=
np
.
array
([
[[
255.0
,
0.0
,
0.0
],
[
255.0
,
0.0
,
0.0
],
[
255.0
,
0.0
,
0.0
]],
[[
255.0
,
255.0
,
0.0
],
[
255.0
,
255.0
,
0.0
],
[
255.0
,
255.0
,
0.0
]]])
return
tf
.
constant
(
mask
,
dtype
=
tf
.
float32
)
def
createTestKeypoints
(
self
):
keypoints
=
np
.
array
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
[[
0.4
,
0.4
],
[
0.5
,
0.5
],
[
0.6
,
0.6
]],
])
return
tf
.
constant
(
keypoints
,
dtype
=
tf
.
float32
)
def
createTestKeypointsInsideCrop
(
self
):
keypoints
=
np
.
array
([
[[
0.4
,
0.4
],
[
0.5
,
0.5
],
[
0.6
,
0.6
]],
[[
0.4
,
0.4
],
[
0.5
,
0.5
],
[
0.6
,
0.6
]],
])
return
tf
.
constant
(
keypoints
,
dtype
=
tf
.
float32
)
def
createTestKeypointsOutsideCrop
(
self
):
keypoints
=
np
.
array
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]],
])
return
tf
.
constant
(
keypoints
,
dtype
=
tf
.
float32
)
def
createKeypointFlipPermutation
(
self
):
return
np
.
array
([
0
,
2
,
1
],
dtype
=
np
.
int32
)
def
createTestLabels
(
self
):
labels
=
tf
.
constant
([
1
,
2
],
dtype
=
tf
.
int32
)
return
labels
def
createTestBoxesOutOfImage
(
self
):
boxes
=
tf
.
constant
(
[[
-
0.1
,
0.25
,
0.75
,
1
],
[
0.25
,
0.5
,
0.75
,
1.1
]],
dtype
=
tf
.
float32
)
return
boxes
def
expectedImagesAfterNormalization
(
self
):
images_r
=
tf
.
constant
([[[
0
,
0
,
0
,
0
],
[
-
1
,
-
1
,
0
,
0
],
[
-
1
,
0
,
0
,
0
],
[
0.5
,
0.5
,
0
,
0
]]],
dtype
=
tf
.
float32
)
images_r
=
tf
.
expand_dims
(
images_r
,
3
)
images_g
=
tf
.
constant
([[[
-
1
,
-
1
,
0
,
0
],
[
-
1
,
-
1
,
0
,
0
],
[
-
1
,
0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0
,
0.5
]]],
dtype
=
tf
.
float32
)
images_g
=
tf
.
expand_dims
(
images_g
,
3
)
images_b
=
tf
.
constant
([[[
0
,
0
,
0.5
,
-
1
],
[
-
1
,
-
1
,
0
,
0.5
],
[
-
1
,
0
,
0
,
-
1
],
[
0.5
,
0.5
,
0.5
,
0
]]],
dtype
=
tf
.
float32
)
images_b
=
tf
.
expand_dims
(
images_b
,
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
def
expectedMaxImageAfterColorScale
(
self
):
images_r
=
tf
.
constant
([[[
0.1
,
0.1
,
0.1
,
0.1
],
[
-
0.9
,
-
0.9
,
0.1
,
0.1
],
[
-
0.9
,
0.1
,
0.1
,
0.1
],
[
0.6
,
0.6
,
0.1
,
0.1
]]],
dtype
=
tf
.
float32
)
images_r
=
tf
.
expand_dims
(
images_r
,
3
)
images_g
=
tf
.
constant
([[[
-
0.9
,
-
0.9
,
0.1
,
0.1
],
[
-
0.9
,
-
0.9
,
0.1
,
0.1
],
[
-
0.9
,
0.1
,
0.6
,
0.6
],
[
0.6
,
0.6
,
0.1
,
0.6
]]],
dtype
=
tf
.
float32
)
images_g
=
tf
.
expand_dims
(
images_g
,
3
)
images_b
=
tf
.
constant
([[[
0.1
,
0.1
,
0.6
,
-
0.9
],
[
-
0.9
,
-
0.9
,
0.1
,
0.6
],
[
-
0.9
,
0.1
,
0.1
,
-
0.9
],
[
0.6
,
0.6
,
0.6
,
0.1
]]],
dtype
=
tf
.
float32
)
images_b
=
tf
.
expand_dims
(
images_b
,
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
def
expectedMinImageAfterColorScale
(
self
):
images_r
=
tf
.
constant
([[[
-
0.1
,
-
0.1
,
-
0.1
,
-
0.1
],
[
-
1
,
-
1
,
-
0.1
,
-
0.1
],
[
-
1
,
-
0.1
,
-
0.1
,
-
0.1
],
[
0.4
,
0.4
,
-
0.1
,
-
0.1
]]],
dtype
=
tf
.
float32
)
images_r
=
tf
.
expand_dims
(
images_r
,
3
)
images_g
=
tf
.
constant
([[[
-
1
,
-
1
,
-
0.1
,
-
0.1
],
[
-
1
,
-
1
,
-
0.1
,
-
0.1
],
[
-
1
,
-
0.1
,
0.4
,
0.4
],
[
0.4
,
0.4
,
-
0.1
,
0.4
]]],
dtype
=
tf
.
float32
)
images_g
=
tf
.
expand_dims
(
images_g
,
3
)
images_b
=
tf
.
constant
([[[
-
0.1
,
-
0.1
,
0.4
,
-
1
],
[
-
1
,
-
1
,
-
0.1
,
0.4
],
[
-
1
,
-
0.1
,
-
0.1
,
-
1
],
[
0.4
,
0.4
,
0.4
,
-
0.1
]]],
dtype
=
tf
.
float32
)
images_b
=
tf
.
expand_dims
(
images_b
,
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
def
expectedImagesAfterMirroring
(
self
):
images_r
=
tf
.
constant
([[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
-
1
],
[
0
,
0
,
0
,
-
1
],
[
0
,
0
,
0.5
,
0.5
]]],
dtype
=
tf
.
float32
)
images_r
=
tf
.
expand_dims
(
images_r
,
3
)
images_g
=
tf
.
constant
([[[
0
,
0
,
-
1
,
-
1
],
[
0
,
0
,
-
1
,
-
1
],
[
0.5
,
0.5
,
0
,
-
1
],
[
0.5
,
0
,
0.5
,
0.5
]]],
dtype
=
tf
.
float32
)
images_g
=
tf
.
expand_dims
(
images_g
,
3
)
images_b
=
tf
.
constant
([[[
-
1
,
0.5
,
0
,
0
],
[
0.5
,
0
,
-
1
,
-
1
],
[
-
1
,
0
,
0
,
-
1
],
[
0
,
0.5
,
0.5
,
0.5
]]],
dtype
=
tf
.
float32
)
images_b
=
tf
.
expand_dims
(
images_b
,
3
)
images
=
tf
.
concat
([
images_r
,
images_g
,
images_b
],
3
)
return
images
def
expectedBoxesAfterMirroring
(
self
):
boxes
=
tf
.
constant
([[
0.0
,
0.0
,
0.75
,
0.75
],
[
0.25
,
0.0
,
0.75
,
0.5
]],
dtype
=
tf
.
float32
)
return
boxes
def
expectedBoxesAfterXY
(
self
):
boxes
=
tf
.
constant
([[
0.25
,
0.0
,
1.0
,
0.75
],
[
0.5
,
0.25
,
1
,
0.75
]],
dtype
=
tf
.
float32
)
return
boxes
def
expectedMasksAfterMirroring
(
self
):
mask
=
np
.
array
([
[[
0.0
,
0.0
,
255.0
],
[
0.0
,
0.0
,
255.0
],
[
0.0
,
0.0
,
255.0
]],
[[
0.0
,
255.0
,
255.0
],
[
0.0
,
255.0
,
255.0
],
[
0.0
,
255.0
,
255.0
]]])
return
tf
.
constant
(
mask
,
dtype
=
tf
.
float32
)
def
expectedLabelScoresAfterThresholding
(
self
):
return
tf
.
constant
([
1.0
],
dtype
=
tf
.
float32
)
def
expectedBoxesAfterThresholding
(
self
):
return
tf
.
constant
([[
0.0
,
0.25
,
0.75
,
1.0
]],
dtype
=
tf
.
float32
)
def
expectedLabelsAfterThresholding
(
self
):
return
tf
.
constant
([
1
],
dtype
=
tf
.
float32
)
def
expectedMasksAfterThresholding
(
self
):
mask
=
np
.
array
([
[[
255.0
,
0.0
,
0.0
],
[
255.0
,
0.0
,
0.0
],
[
255.0
,
0.0
,
0.0
]]])
return
tf
.
constant
(
mask
,
dtype
=
tf
.
float32
)
def
expectedKeypointsAfterThresholding
(
self
):
keypoints
=
np
.
array
([
[[
0.1
,
0.1
],
[
0.2
,
0.2
],
[
0.3
,
0.3
]]
])
return
tf
.
constant
(
keypoints
,
dtype
=
tf
.
float32
)
def
expectedLabelScoresAfterThresholdingWithMissingScore
(
self
):
return
tf
.
constant
([
np
.
nan
],
dtype
=
tf
.
float32
)
def
expectedBoxesAfterThresholdingWithMissingScore
(
self
):
return
tf
.
constant
([[
0.25
,
0.5
,
0.75
,
1
]],
dtype
=
tf
.
float32
)
def
expectedLabelsAfterThresholdingWithMissingScore
(
self
):
return
tf
.
constant
([
2
],
dtype
=
tf
.
float32
)
def
testNormalizeImage
(
self
):
preprocess_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
256
,
'target_minval'
:
-
1
,
'target_maxval'
:
1
})]
images
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images_expected
=
self
.
expectedImagesAfterNormalization
()
with
self
.
test_session
()
as
sess
:
(
images_
,
images_expected_
)
=
sess
.
run
(
[
images
,
images_expected
])
images_shape_
=
images_
.
shape
images_expected_shape_
=
images_expected_
.
shape
expected_shape
=
[
1
,
4
,
4
,
3
]
self
.
assertAllEqual
(
images_expected_shape_
,
images_shape_
)
self
.
assertAllEqual
(
images_shape_
,
expected_shape
)
self
.
assertAllClose
(
images_
,
images_expected_
)
def
testRetainBoxesAboveThreshold
(
self
):
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
label_scores
=
self
.
createTestLabelScores
()
(
retained_boxes
,
retained_labels
,
retained_label_scores
)
=
preprocessor
.
retain_boxes_above_threshold
(
boxes
,
labels
,
label_scores
,
threshold
=
0.6
)
with
self
.
test_session
()
as
sess
:
(
retained_boxes_
,
retained_labels_
,
retained_label_scores_
,
expected_retained_boxes_
,
expected_retained_labels_
,
expected_retained_label_scores_
)
=
sess
.
run
([
retained_boxes
,
retained_labels
,
retained_label_scores
,
self
.
expectedBoxesAfterThresholding
(),
self
.
expectedLabelsAfterThresholding
(),
self
.
expectedLabelScoresAfterThresholding
()])
self
.
assertAllClose
(
retained_boxes_
,
expected_retained_boxes_
)
self
.
assertAllClose
(
retained_labels_
,
expected_retained_labels_
)
self
.
assertAllClose
(
retained_label_scores_
,
expected_retained_label_scores_
)
def
testRetainBoxesAboveThresholdWithMasks
(
self
):
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
label_scores
=
self
.
createTestLabelScores
()
masks
=
self
.
createTestMasks
()
_
,
_
,
_
,
retained_masks
=
preprocessor
.
retain_boxes_above_threshold
(
boxes
,
labels
,
label_scores
,
masks
,
threshold
=
0.6
)
with
self
.
test_session
()
as
sess
:
retained_masks_
,
expected_retained_masks_
=
sess
.
run
([
retained_masks
,
self
.
expectedMasksAfterThresholding
()])
self
.
assertAllClose
(
retained_masks_
,
expected_retained_masks_
)
def
testRetainBoxesAboveThresholdWithKeypoints
(
self
):
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
label_scores
=
self
.
createTestLabelScores
()
keypoints
=
self
.
createTestKeypoints
()
(
_
,
_
,
_
,
retained_keypoints
)
=
preprocessor
.
retain_boxes_above_threshold
(
boxes
,
labels
,
label_scores
,
keypoints
=
keypoints
,
threshold
=
0.6
)
with
self
.
test_session
()
as
sess
:
(
retained_keypoints_
,
expected_retained_keypoints_
)
=
sess
.
run
([
retained_keypoints
,
self
.
expectedKeypointsAfterThresholding
()])
self
.
assertAllClose
(
retained_keypoints_
,
expected_retained_keypoints_
)
def
testRetainBoxesAboveThresholdWithMissingScore
(
self
):
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
label_scores
=
self
.
createTestLabelScoresWithMissingScore
()
(
retained_boxes
,
retained_labels
,
retained_label_scores
)
=
preprocessor
.
retain_boxes_above_threshold
(
boxes
,
labels
,
label_scores
,
threshold
=
0.6
)
with
self
.
test_session
()
as
sess
:
(
retained_boxes_
,
retained_labels_
,
retained_label_scores_
,
expected_retained_boxes_
,
expected_retained_labels_
,
expected_retained_label_scores_
)
=
sess
.
run
([
retained_boxes
,
retained_labels
,
retained_label_scores
,
self
.
expectedBoxesAfterThresholdingWithMissingScore
(),
self
.
expectedLabelsAfterThresholdingWithMissingScore
(),
self
.
expectedLabelScoresAfterThresholdingWithMissingScore
()])
self
.
assertAllClose
(
retained_boxes_
,
expected_retained_boxes_
)
self
.
assertAllClose
(
retained_labels_
,
expected_retained_labels_
)
self
.
assertAllClose
(
retained_label_scores_
,
expected_retained_label_scores_
)
def
testRandomFlipBoxes
(
self
):
boxes
=
self
.
createTestBoxes
()
# Case where the boxes are flipped.
boxes_expected1
=
self
.
expectedBoxesAfterMirroring
()
# Case where the boxes are not flipped.
boxes_expected2
=
boxes
# After elementwise multiplication, the result should be all-zero since one
# of them is all-zero.
boxes_diff
=
tf
.
multiply
(
tf
.
squared_difference
(
boxes
,
boxes_expected1
),
tf
.
squared_difference
(
boxes
,
boxes_expected2
))
expected_result
=
tf
.
zeros_like
(
boxes_diff
)
with
self
.
test_session
()
as
sess
:
(
boxes_diff
,
expected_result
)
=
sess
.
run
([
boxes_diff
,
expected_result
])
self
.
assertAllEqual
(
boxes_diff
,
expected_result
)
def
testFlipMasks
(
self
):
test_mask
=
self
.
createTestMasks
()
flipped_mask
=
preprocessor
.
_flip_masks
(
test_mask
)
expected_mask
=
self
.
expectedMasksAfterMirroring
()
with
self
.
test_session
()
as
sess
:
flipped_mask
,
expected_mask
=
sess
.
run
([
flipped_mask
,
expected_mask
])
self
.
assertAllEqual
(
flipped_mask
.
flatten
(),
expected_mask
.
flatten
())
def
testRandomHorizontalFlip
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
images
=
self
.
expectedImagesAfterNormalization
()
boxes
=
self
.
createTestBoxes
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
images_expected1
=
self
.
expectedImagesAfterMirroring
()
boxes_expected1
=
self
.
expectedBoxesAfterMirroring
()
images_expected2
=
images
boxes_expected2
=
boxes
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_diff1
=
tf
.
squared_difference
(
boxes
,
boxes_expected1
)
boxes_diff2
=
tf
.
squared_difference
(
boxes
,
boxes_expected2
)
boxes_diff
=
tf
.
multiply
(
boxes_diff1
,
boxes_diff2
)
boxes_diff_expected
=
tf
.
zeros_like
(
boxes_diff
)
images_diff1
=
tf
.
squared_difference
(
images
,
images_expected1
)
images_diff2
=
tf
.
squared_difference
(
images
,
images_expected2
)
images_diff
=
tf
.
multiply
(
images_diff1
,
images_diff2
)
images_diff_expected
=
tf
.
zeros_like
(
images_diff
)
with
self
.
test_session
()
as
sess
:
(
images_diff_
,
images_diff_expected_
,
boxes_diff_
,
boxes_diff_expected_
)
=
sess
.
run
([
images_diff
,
images_diff_expected
,
boxes_diff
,
boxes_diff_expected
])
self
.
assertAllClose
(
boxes_diff_
,
boxes_diff_expected_
)
self
.
assertAllClose
(
images_diff_
,
images_diff_expected_
)
def
testRunRandomHorizontalFlipWithMaskAndKeypoints
(
self
):
preprocess_options
=
[(
preprocessor
.
random_horizontal_flip
,
{})]
image_height
=
3
image_width
=
3
images
=
tf
.
random_uniform
([
1
,
image_height
,
image_width
,
3
])
boxes
=
self
.
createTestBoxes
()
masks
=
self
.
createTestMasks
()
keypoints
=
self
.
createTestKeypoints
()
keypoint_flip_permutation
=
self
.
createKeypointFlipPermutation
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
}
preprocess_options
=
[
(
preprocessor
.
random_horizontal_flip
,
{
'keypoint_flip_permutation'
:
keypoint_flip_permutation
})]
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
,
include_keypoints
=
True
)
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
,
func_arg_map
=
preprocessor_arg_map
)
boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
masks
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
keypoints
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
with
self
.
test_session
()
as
sess
:
boxes
,
masks
,
keypoints
=
sess
.
run
([
boxes
,
masks
,
keypoints
])
self
.
assertTrue
(
boxes
is
not
None
)
self
.
assertTrue
(
masks
is
not
None
)
self
.
assertTrue
(
keypoints
is
not
None
)
def
testRandomPixelValueScale
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_pixel_value_scale
,
{}))
images
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images_min
=
tf
.
to_float
(
images
)
*
0.9
/
255.0
images_max
=
tf
.
to_float
(
images
)
*
1.1
/
255.0
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
values_greater
=
tf
.
greater_equal
(
images
,
images_min
)
values_less
=
tf
.
less_equal
(
images
,
images_max
)
values_true
=
tf
.
fill
([
1
,
4
,
4
,
3
],
True
)
with
self
.
test_session
()
as
sess
:
(
values_greater_
,
values_less_
,
values_true_
)
=
sess
.
run
(
[
values_greater
,
values_less
,
values_true
])
self
.
assertAllClose
(
values_greater_
,
values_true_
)
self
.
assertAllClose
(
values_less_
,
values_true_
)
def
testRandomImageScale
(
self
):
preprocess_options
=
[(
preprocessor
.
random_image_scale
,
{})]
images_original
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images_original
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images_scaled
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images_original_shape
=
tf
.
shape
(
images_original
)
images_scaled_shape
=
tf
.
shape
(
images_scaled
)
with
self
.
test_session
()
as
sess
:
(
images_original_shape_
,
images_scaled_shape_
)
=
sess
.
run
(
[
images_original_shape
,
images_scaled_shape
])
self
.
assertTrue
(
images_original_shape_
[
1
]
*
0.5
<=
images_scaled_shape_
[
1
])
self
.
assertTrue
(
images_original_shape_
[
1
]
*
2.0
>=
images_scaled_shape_
[
1
])
self
.
assertTrue
(
images_original_shape_
[
2
]
*
0.5
<=
images_scaled_shape_
[
2
])
self
.
assertTrue
(
images_original_shape_
[
2
]
*
2.0
>=
images_scaled_shape_
[
2
])
def
testRandomRGBtoGray
(
self
):
preprocess_options
=
[(
preprocessor
.
random_rgb_to_gray
,
{})]
images_original
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images_original
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocess_options
)
images_gray
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images_gray_r
,
images_gray_g
,
images_gray_b
=
tf
.
split
(
value
=
images_gray
,
num_or_size_splits
=
3
,
axis
=
3
)
images_r
,
images_g
,
images_b
=
tf
.
split
(
value
=
images_original
,
num_or_size_splits
=
3
,
axis
=
3
)
images_r_diff1
=
tf
.
squared_difference
(
tf
.
to_float
(
images_r
),
tf
.
to_float
(
images_gray_r
))
images_r_diff2
=
tf
.
squared_difference
(
tf
.
to_float
(
images_gray_r
),
tf
.
to_float
(
images_gray_g
))
images_r_diff
=
tf
.
multiply
(
images_r_diff1
,
images_r_diff2
)
images_g_diff1
=
tf
.
squared_difference
(
tf
.
to_float
(
images_g
),
tf
.
to_float
(
images_gray_g
))
images_g_diff2
=
tf
.
squared_difference
(
tf
.
to_float
(
images_gray_g
),
tf
.
to_float
(
images_gray_b
))
images_g_diff
=
tf
.
multiply
(
images_g_diff1
,
images_g_diff2
)
images_b_diff1
=
tf
.
squared_difference
(
tf
.
to_float
(
images_b
),
tf
.
to_float
(
images_gray_b
))
images_b_diff2
=
tf
.
squared_difference
(
tf
.
to_float
(
images_gray_b
),
tf
.
to_float
(
images_gray_r
))
images_b_diff
=
tf
.
multiply
(
images_b_diff1
,
images_b_diff2
)
image_zero1
=
tf
.
constant
(
0
,
dtype
=
tf
.
float32
,
shape
=
[
1
,
4
,
4
,
1
])
with
self
.
test_session
()
as
sess
:
(
images_r_diff_
,
images_g_diff_
,
images_b_diff_
,
image_zero1_
)
=
sess
.
run
(
[
images_r_diff
,
images_g_diff
,
images_b_diff
,
image_zero1
])
self
.
assertAllClose
(
images_r_diff_
,
image_zero1_
)
self
.
assertAllClose
(
images_g_diff_
,
image_zero1_
)
self
.
assertAllClose
(
images_b_diff_
,
image_zero1_
)
def
testRandomAdjustBrightness
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_adjust_brightness
,
{}))
images_original
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images_original
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images_bright
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
image_original_shape
=
tf
.
shape
(
images_original
)
image_bright_shape
=
tf
.
shape
(
images_bright
)
with
self
.
test_session
()
as
sess
:
(
image_original_shape_
,
image_bright_shape_
)
=
sess
.
run
(
[
image_original_shape
,
image_bright_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_bright_shape_
)
def
testRandomAdjustContrast
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_adjust_contrast
,
{}))
images_original
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images_original
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images_contrast
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
image_original_shape
=
tf
.
shape
(
images_original
)
image_contrast_shape
=
tf
.
shape
(
images_contrast
)
with
self
.
test_session
()
as
sess
:
(
image_original_shape_
,
image_contrast_shape_
)
=
sess
.
run
(
[
image_original_shape
,
image_contrast_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_contrast_shape_
)
def
testRandomAdjustHue
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_adjust_hue
,
{}))
images_original
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images_original
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images_hue
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
image_original_shape
=
tf
.
shape
(
images_original
)
image_hue_shape
=
tf
.
shape
(
images_hue
)
with
self
.
test_session
()
as
sess
:
(
image_original_shape_
,
image_hue_shape_
)
=
sess
.
run
(
[
image_original_shape
,
image_hue_shape
])
self
.
assertAllEqual
(
image_original_shape_
,
image_hue_shape_
)
def
testRandomDistortColor
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_distort_color
,
{}))
images_original
=
self
.
createTestImages
()
images_original_shape
=
tf
.
shape
(
images_original
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images_original
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images_distorted_color
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
images_distorted_color_shape
=
tf
.
shape
(
images_distorted_color
)
with
self
.
test_session
()
as
sess
:
(
images_original_shape_
,
images_distorted_color_shape_
)
=
sess
.
run
(
[
images_original_shape
,
images_distorted_color_shape
])
self
.
assertAllEqual
(
images_original_shape_
,
images_distorted_color_shape_
)
def
testRandomJitterBoxes
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
random_jitter_boxes
,
{}))
boxes
=
self
.
createTestBoxes
()
boxes_shape
=
tf
.
shape
(
boxes
)
tensor_dict
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_boxes
=
tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_boxes_shape
=
tf
.
shape
(
distorted_boxes
)
with
self
.
test_session
()
as
sess
:
(
boxes_shape_
,
distorted_boxes_shape_
)
=
sess
.
run
(
[
boxes_shape
,
distorted_boxes_shape
])
self
.
assertAllEqual
(
boxes_shape_
,
distorted_boxes_shape_
)
def
testRandomCropImage
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_crop_image
,
{}))
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
self
.
assertEqual
(
3
,
distorted_images
.
get_shape
()[
3
])
with
self
.
test_session
()
as
sess
:
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
)
=
sess
.
run
([
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
])
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testRandomCropImageGrayscale
(
self
):
preprocessing_options
=
[(
preprocessor
.
rgb_to_gray
,
{}),
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
,
}),
(
preprocessor
.
random_crop_image
,
{})]
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
self
.
assertEqual
(
1
,
distorted_images
.
get_shape
()[
3
])
with
self
.
test_session
()
as
sess
:
session_results
=
sess
.
run
([
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
])
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
)
=
session_results
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testRandomCropImageWithBoxOutOfImage
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_crop_image
,
{}))
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxesOutOfImage
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
with
self
.
test_session
()
as
sess
:
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
)
=
sess
.
run
(
[
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
])
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testRandomCropImageWithRandomCoefOne
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
})]
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessing_options
=
[(
preprocessor
.
random_crop_image
,
{
'random_coef'
:
1.0
})]
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
boxes_shape
=
tf
.
shape
(
boxes
)
distorted_boxes_shape
=
tf
.
shape
(
distorted_boxes
)
images_shape
=
tf
.
shape
(
images
)
distorted_images_shape
=
tf
.
shape
(
distorted_images
)
with
self
.
test_session
()
as
sess
:
(
boxes_shape_
,
distorted_boxes_shape_
,
images_shape_
,
distorted_images_shape_
,
images_
,
distorted_images_
,
boxes_
,
distorted_boxes_
,
labels_
,
distorted_labels_
)
=
sess
.
run
(
[
boxes_shape
,
distorted_boxes_shape
,
images_shape
,
distorted_images_shape
,
images
,
distorted_images
,
boxes
,
distorted_boxes
,
labels
,
distorted_labels
])
self
.
assertAllEqual
(
boxes_shape_
,
distorted_boxes_shape_
)
self
.
assertAllEqual
(
images_shape_
,
distorted_images_shape_
)
self
.
assertAllClose
(
images_
,
distorted_images_
)
self
.
assertAllClose
(
boxes_
,
distorted_boxes_
)
self
.
assertAllEqual
(
labels_
,
distorted_labels_
)
def
testRandomCropWithMockSampleDistortedBoundingBox
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
})]
images
=
self
.
createColorfulTestImage
()
boxes
=
tf
.
constant
([[
0.1
,
0.1
,
0.8
,
0.3
],
[
0.2
,
0.4
,
0.75
,
0.75
],
[
0.3
,
0.1
,
0.4
,
0.7
]],
dtype
=
tf
.
float32
)
labels
=
tf
.
constant
([
1
,
7
,
11
],
dtype
=
tf
.
int32
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessing_options
=
[(
preprocessor
.
random_crop_image
,
{})]
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
(
[
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
(
[
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
(
[[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
expected_boxes
=
tf
.
constant
([[
0.178947
,
0.07173
,
0.75789469
,
0.66244733
],
[
0.28421
,
0.0
,
0.38947365
,
0.57805908
]],
dtype
=
tf
.
float32
)
expected_labels
=
tf
.
constant
([
7
,
11
],
dtype
=
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
(
distorted_boxes_
,
distorted_labels_
,
expected_boxes_
,
expected_labels_
)
=
sess
.
run
(
[
distorted_boxes
,
distorted_labels
,
expected_boxes
,
expected_labels
])
self
.
assertAllClose
(
distorted_boxes_
,
expected_boxes_
)
self
.
assertAllEqual
(
distorted_labels_
,
expected_labels_
)
def
testStrictRandomCropImageWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()[
0
]
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
masks
=
tf
.
random_uniform
([
2
,
200
,
400
],
dtype
=
tf
.
float32
)
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
(
new_image
,
new_boxes
,
new_labels
,
new_masks
)
=
preprocessor
.
_strict_random_crop_image
(
image
,
boxes
,
labels
,
masks
=
masks
)
with
self
.
test_session
()
as
sess
:
new_image
,
new_boxes
,
new_labels
,
new_masks
=
sess
.
run
([
new_image
,
new_boxes
,
new_labels
,
new_masks
])
expected_boxes
=
np
.
array
([
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
new_image
.
shape
,
[
190
,
237
,
3
])
self
.
assertAllEqual
(
new_masks
.
shape
,
[
2
,
190
,
237
])
self
.
assertAllClose
(
new_boxes
.
flatten
(),
expected_boxes
.
flatten
())
def
testStrictRandomCropImageWithKeypoints
(
self
):
image
=
self
.
createColorfulTestImage
()[
0
]
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
keypoints
=
self
.
createTestKeypoints
()
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
(
new_image
,
new_boxes
,
new_labels
,
new_keypoints
)
=
preprocessor
.
_strict_random_crop_image
(
image
,
boxes
,
labels
,
keypoints
=
keypoints
)
with
self
.
test_session
()
as
sess
:
new_image
,
new_boxes
,
new_labels
,
new_keypoints
=
sess
.
run
([
new_image
,
new_boxes
,
new_labels
,
new_keypoints
])
expected_boxes
=
np
.
array
([
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],
],
dtype
=
np
.
float32
)
expected_keypoints
=
np
.
array
([
[[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
]],
[[
0.38947368
,
0.07173
],
[
0.49473682
,
0.24050637
],
[
0.60000002
,
0.40928277
]]
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
new_image
.
shape
,
[
190
,
237
,
3
])
self
.
assertAllClose
(
new_boxes
.
flatten
(),
expected_boxes
.
flatten
())
self
.
assertAllClose
(
new_keypoints
.
flatten
(),
expected_keypoints
.
flatten
())
def
testRunRandomCropImageWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
masks
=
tf
.
random_uniform
([
2
,
200
,
400
],
dtype
=
tf
.
float32
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_crop_image
,
{})]
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_image
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_masks
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
with
self
.
test_session
()
as
sess
:
(
distorted_image_
,
distorted_boxes_
,
distorted_labels_
,
distorted_masks_
)
=
sess
.
run
(
[
distorted_image
,
distorted_boxes
,
distorted_labels
,
distorted_masks
])
expected_boxes
=
np
.
array
([
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
190
,
237
,
3
])
self
.
assertAllEqual
(
distorted_masks_
.
shape
,
[
2
,
190
,
237
])
self
.
assertAllEqual
(
distorted_labels_
,
[
1
,
2
])
self
.
assertAllClose
(
distorted_boxes_
.
flatten
(),
expected_boxes
.
flatten
())
def
testRunRandomCropImageWithKeypointsInsideCrop
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
keypoints
=
self
.
createTestKeypointsInsideCrop
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_keypoints
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_crop_image
,
{})]
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_image
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_keypoints
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
with
self
.
test_session
()
as
sess
:
(
distorted_image_
,
distorted_boxes_
,
distorted_labels_
,
distorted_keypoints_
)
=
sess
.
run
(
[
distorted_image
,
distorted_boxes
,
distorted_labels
,
distorted_keypoints
])
expected_boxes
=
np
.
array
([
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],
],
dtype
=
np
.
float32
)
expected_keypoints
=
np
.
array
([
[[
0.38947368
,
0.07173
],
[
0.49473682
,
0.24050637
],
[
0.60000002
,
0.40928277
]],
[[
0.38947368
,
0.07173
],
[
0.49473682
,
0.24050637
],
[
0.60000002
,
0.40928277
]]
])
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
190
,
237
,
3
])
self
.
assertAllEqual
(
distorted_labels_
,
[
1
,
2
])
self
.
assertAllClose
(
distorted_boxes_
.
flatten
(),
expected_boxes
.
flatten
())
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
def
testRunRandomCropImageWithKeypointsOutsideCrop
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
keypoints
=
self
.
createTestKeypointsOutsideCrop
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_keypoints
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_crop_image
,
{})]
with
mock
.
patch
.
object
(
tf
.
image
,
'sample_distorted_bounding_box'
)
as
mock_sample_distorted_bounding_box
:
mock_sample_distorted_bounding_box
.
return_value
=
(
tf
.
constant
([
6
,
143
,
0
],
dtype
=
tf
.
int32
),
tf
.
constant
([
190
,
237
,
-
1
],
dtype
=
tf
.
int32
),
tf
.
constant
([[[
0.03
,
0.3575
,
0.98
,
0.95
]]],
dtype
=
tf
.
float32
))
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_image
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_keypoints
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
with
self
.
test_session
()
as
sess
:
(
distorted_image_
,
distorted_boxes_
,
distorted_labels_
,
distorted_keypoints_
)
=
sess
.
run
(
[
distorted_image
,
distorted_boxes
,
distorted_labels
,
distorted_keypoints
])
expected_boxes
=
np
.
array
([
[
0.0
,
0.0
,
0.75789469
,
1.0
],
[
0.23157893
,
0.24050637
,
0.75789469
,
1.0
],
],
dtype
=
np
.
float32
)
expected_keypoints
=
np
.
array
([
[[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
]],
[[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
],
[
np
.
nan
,
np
.
nan
]],
])
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
190
,
237
,
3
])
self
.
assertAllEqual
(
distorted_labels_
,
[
1
,
2
])
self
.
assertAllClose
(
distorted_boxes_
.
flatten
(),
expected_boxes
.
flatten
())
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
def
testRunRetainBoxesAboveThreshold
(
self
):
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
label_scores
=
self
.
createTestLabelScores
()
tensor_dict
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_label_scores
:
label_scores
}
preprocessing_options
=
[
(
preprocessor
.
retain_boxes_above_threshold
,
{
'threshold'
:
0.6
})
]
retained_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
retained_boxes
=
retained_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
retained_labels
=
retained_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
retained_label_scores
=
retained_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_label_scores
]
with
self
.
test_session
()
as
sess
:
(
retained_boxes_
,
retained_labels_
,
retained_label_scores_
,
expected_retained_boxes_
,
expected_retained_labels_
,
expected_retained_label_scores_
)
=
sess
.
run
(
[
retained_boxes
,
retained_labels
,
retained_label_scores
,
self
.
expectedBoxesAfterThresholding
(),
self
.
expectedLabelsAfterThresholding
(),
self
.
expectedLabelScoresAfterThresholding
()])
self
.
assertAllClose
(
retained_boxes_
,
expected_retained_boxes_
)
self
.
assertAllClose
(
retained_labels_
,
expected_retained_labels_
)
self
.
assertAllClose
(
retained_label_scores_
,
expected_retained_label_scores_
)
def
testRunRetainBoxesAboveThresholdWithMasks
(
self
):
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
label_scores
=
self
.
createTestLabelScores
()
masks
=
self
.
createTestMasks
()
tensor_dict
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_label_scores
:
label_scores
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
)
preprocessing_options
=
[
(
preprocessor
.
retain_boxes_above_threshold
,
{
'threshold'
:
0.6
})
]
retained_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
retained_masks
=
retained_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
with
self
.
test_session
()
as
sess
:
(
retained_masks_
,
expected_masks_
)
=
sess
.
run
(
[
retained_masks
,
self
.
expectedMasksAfterThresholding
()])
self
.
assertAllClose
(
retained_masks_
,
expected_masks_
)
def
testRunRetainBoxesAboveThresholdWithKeypoints
(
self
):
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
label_scores
=
self
.
createTestLabelScores
()
keypoints
=
self
.
createTestKeypoints
()
tensor_dict
=
{
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_label_scores
:
label_scores
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_keypoints
=
True
)
preprocessing_options
=
[
(
preprocessor
.
retain_boxes_above_threshold
,
{
'threshold'
:
0.6
})
]
retained_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
retained_keypoints
=
retained_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
with
self
.
test_session
()
as
sess
:
(
retained_keypoints_
,
expected_keypoints_
)
=
sess
.
run
(
[
retained_keypoints
,
self
.
expectedKeypointsAfterThresholding
()])
self
.
assertAllClose
(
retained_keypoints_
,
expected_keypoints_
)
def
testRunRandomCropToAspectRatioWithMasks
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
masks
=
tf
.
random_uniform
([
2
,
200
,
400
],
dtype
=
tf
.
float32
)
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_crop_to_aspect_ratio
,
{})]
with
mock
.
patch
.
object
(
preprocessor
,
'_random_integer'
)
as
mock_random_integer
:
mock_random_integer
.
return_value
=
tf
.
constant
(
0
,
dtype
=
tf
.
int32
)
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_image
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_masks
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_instance_masks
]
with
self
.
test_session
()
as
sess
:
(
distorted_image_
,
distorted_boxes_
,
distorted_labels_
,
distorted_masks_
)
=
sess
.
run
([
distorted_image
,
distorted_boxes
,
distorted_labels
,
distorted_masks
])
expected_boxes
=
np
.
array
([
0.0
,
0.5
,
0.75
,
1.0
],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
200
,
200
,
3
])
self
.
assertAllEqual
(
distorted_labels_
,
[
1
])
self
.
assertAllClose
(
distorted_boxes_
.
flatten
(),
expected_boxes
.
flatten
())
self
.
assertAllEqual
(
distorted_masks_
.
shape
,
[
1
,
200
,
200
])
def
testRunRandomCropToAspectRatioWithKeypoints
(
self
):
image
=
self
.
createColorfulTestImage
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
keypoints
=
self
.
createTestKeypoints
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
image
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_keypoints
=
True
)
preprocessing_options
=
[(
preprocessor
.
random_crop_to_aspect_ratio
,
{})]
with
mock
.
patch
.
object
(
preprocessor
,
'_random_integer'
)
as
mock_random_integer
:
mock_random_integer
.
return_value
=
tf
.
constant
(
0
,
dtype
=
tf
.
int32
)
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_image
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
distorted_labels
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_classes
]
distorted_keypoints
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_keypoints
]
with
self
.
test_session
()
as
sess
:
(
distorted_image_
,
distorted_boxes_
,
distorted_labels_
,
distorted_keypoints_
)
=
sess
.
run
([
distorted_image
,
distorted_boxes
,
distorted_labels
,
distorted_keypoints
])
expected_boxes
=
np
.
array
([
0.0
,
0.5
,
0.75
,
1.0
],
dtype
=
np
.
float32
)
expected_keypoints
=
np
.
array
(
[[
0.1
,
0.2
],
[
0.2
,
0.4
],
[
0.3
,
0.6
]],
dtype
=
np
.
float32
)
self
.
assertAllEqual
(
distorted_image_
.
shape
,
[
1
,
200
,
200
,
3
])
self
.
assertAllEqual
(
distorted_labels_
,
[
1
])
self
.
assertAllClose
(
distorted_boxes_
.
flatten
(),
expected_boxes
.
flatten
())
self
.
assertAllClose
(
distorted_keypoints_
.
flatten
(),
expected_keypoints
.
flatten
())
def
testRandomPadImage
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
})]
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessing_options
=
[(
preprocessor
.
random_pad_image
,
{})]
padded_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
padded_images
=
padded_tensor_dict
[
fields
.
InputDataFields
.
image
]
padded_boxes
=
padded_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_shape
=
tf
.
shape
(
boxes
)
padded_boxes_shape
=
tf
.
shape
(
padded_boxes
)
images_shape
=
tf
.
shape
(
images
)
padded_images_shape
=
tf
.
shape
(
padded_images
)
with
self
.
test_session
()
as
sess
:
(
boxes_shape_
,
padded_boxes_shape_
,
images_shape_
,
padded_images_shape_
,
boxes_
,
padded_boxes_
)
=
sess
.
run
(
[
boxes_shape
,
padded_boxes_shape
,
images_shape
,
padded_images_shape
,
boxes
,
padded_boxes
])
self
.
assertAllEqual
(
boxes_shape_
,
padded_boxes_shape_
)
self
.
assertTrue
((
images_shape_
[
1
]
>=
padded_images_shape_
[
1
]
*
0.5
).
all
)
self
.
assertTrue
((
images_shape_
[
2
]
>=
padded_images_shape_
[
2
]
*
0.5
).
all
)
self
.
assertTrue
((
images_shape_
[
1
]
<=
padded_images_shape_
[
1
]).
all
)
self
.
assertTrue
((
images_shape_
[
2
]
<=
padded_images_shape_
[
2
]).
all
)
self
.
assertTrue
(
np
.
all
((
boxes_
[:,
2
]
-
boxes_
[:,
0
])
>=
(
padded_boxes_
[:,
2
]
-
padded_boxes_
[:,
0
])))
self
.
assertTrue
(
np
.
all
((
boxes_
[:,
3
]
-
boxes_
[:,
1
])
>=
(
padded_boxes_
[:,
3
]
-
padded_boxes_
[:,
1
])))
def
testRandomCropPadImageWithRandomCoefOne
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
})]
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessing_options
=
[(
preprocessor
.
random_crop_pad_image
,
{
'random_coef'
:
1.0
})]
padded_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
padded_images
=
padded_tensor_dict
[
fields
.
InputDataFields
.
image
]
padded_boxes
=
padded_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_shape
=
tf
.
shape
(
boxes
)
padded_boxes_shape
=
tf
.
shape
(
padded_boxes
)
images_shape
=
tf
.
shape
(
images
)
padded_images_shape
=
tf
.
shape
(
padded_images
)
with
self
.
test_session
()
as
sess
:
(
boxes_shape_
,
padded_boxes_shape_
,
images_shape_
,
padded_images_shape_
,
boxes_
,
padded_boxes_
)
=
sess
.
run
(
[
boxes_shape
,
padded_boxes_shape
,
images_shape
,
padded_images_shape
,
boxes
,
padded_boxes
])
self
.
assertAllEqual
(
boxes_shape_
,
padded_boxes_shape_
)
self
.
assertTrue
((
images_shape_
[
1
]
>=
padded_images_shape_
[
1
]
*
0.5
).
all
)
self
.
assertTrue
((
images_shape_
[
2
]
>=
padded_images_shape_
[
2
]
*
0.5
).
all
)
self
.
assertTrue
((
images_shape_
[
1
]
<=
padded_images_shape_
[
1
]).
all
)
self
.
assertTrue
((
images_shape_
[
2
]
<=
padded_images_shape_
[
2
]).
all
)
self
.
assertTrue
(
np
.
all
((
boxes_
[:,
2
]
-
boxes_
[:,
0
])
>=
(
padded_boxes_
[:,
2
]
-
padded_boxes_
[:,
0
])))
self
.
assertTrue
(
np
.
all
((
boxes_
[:,
3
]
-
boxes_
[:,
1
])
>=
(
padded_boxes_
[:,
3
]
-
padded_boxes_
[:,
1
])))
def
testRandomCropToAspectRatio
(
self
):
preprocessing_options
=
[(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
})]
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
images
=
tensor_dict
[
fields
.
InputDataFields
.
image
]
preprocessing_options
=
[(
preprocessor
.
random_crop_to_aspect_ratio
,
{
'aspect_ratio'
:
2.0
})]
cropped_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
cropped_images
=
cropped_tensor_dict
[
fields
.
InputDataFields
.
image
]
cropped_boxes
=
cropped_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
boxes_shape
=
tf
.
shape
(
boxes
)
cropped_boxes_shape
=
tf
.
shape
(
cropped_boxes
)
images_shape
=
tf
.
shape
(
images
)
cropped_images_shape
=
tf
.
shape
(
cropped_images
)
with
self
.
test_session
()
as
sess
:
(
boxes_shape_
,
cropped_boxes_shape_
,
images_shape_
,
cropped_images_shape_
)
=
sess
.
run
([
boxes_shape
,
cropped_boxes_shape
,
images_shape
,
cropped_images_shape
])
self
.
assertAllEqual
(
boxes_shape_
,
cropped_boxes_shape_
)
self
.
assertEqual
(
images_shape_
[
1
],
cropped_images_shape_
[
1
]
*
2
)
self
.
assertEqual
(
images_shape_
[
2
],
cropped_images_shape_
[
2
])
def
testRandomBlackPatches
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_black_patches
,
{
'size_to_image_ratio'
:
0.5
}))
images
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
}
blacked_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
blacked_images
=
blacked_tensor_dict
[
fields
.
InputDataFields
.
image
]
images_shape
=
tf
.
shape
(
images
)
blacked_images_shape
=
tf
.
shape
(
blacked_images
)
with
self
.
test_session
()
as
sess
:
(
images_shape_
,
blacked_images_shape_
)
=
sess
.
run
(
[
images_shape
,
blacked_images_shape
])
self
.
assertAllEqual
(
images_shape_
,
blacked_images_shape_
)
def
testRandomResizeMethod
(
self
):
preprocessing_options
=
[]
preprocessing_options
.
append
((
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}))
preprocessing_options
.
append
((
preprocessor
.
random_resize_method
,
{
'target_size'
:
(
75
,
150
)
}))
images
=
self
.
createTestImages
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
}
resized_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
resized_images
=
resized_tensor_dict
[
fields
.
InputDataFields
.
image
]
resized_images_shape
=
tf
.
shape
(
resized_images
)
expected_images_shape
=
tf
.
constant
([
1
,
75
,
150
,
3
],
dtype
=
tf
.
int32
)
with
self
.
test_session
()
as
sess
:
(
expected_images_shape_
,
resized_images_shape_
)
=
sess
.
run
(
[
expected_images_shape
,
resized_images_shape
])
self
.
assertAllEqual
(
expected_images_shape_
,
resized_images_shape_
)
def
testResizeToRange
(
self
):
"""Tests image resizing, checking output sizes."""
in_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
],
[
15
,
50
,
3
]]
min_dim
=
50
max_dim
=
100
expected_shape_list
=
[[
75
,
50
,
3
],
[
50
,
100
,
3
],
[
30
,
100
,
3
]]
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
in_image
=
tf
.
random_uniform
(
in_shape
)
out_image
=
preprocessor
.
resize_to_range
(
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
with
self
.
test_session
()
as
sess
:
out_image_shape
=
sess
.
run
(
out_image_shape
)
self
.
assertAllEqual
(
out_image_shape
,
expected_shape
)
def
testResizeToRangeWithMasks
(
self
):
"""Tests image resizing, checking output sizes."""
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
15
,
60
,
40
],
[
10
,
15
,
30
]]
min_dim
=
50
max_dim
=
100
expected_image_shape_list
=
[[
75
,
50
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
15
,
75
,
50
],
[
10
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_image_shape_list
,
in_masks_shape_list
,
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_range
(
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
with
self
.
test_session
()
as
sess
:
out_image_shape
,
out_masks_shape
=
sess
.
run
(
[
out_image_shape
,
out_masks_shape
])
self
.
assertAllEqual
(
out_image_shape
,
expected_image_shape
)
self
.
assertAllEqual
(
out_masks_shape
,
expected_mask_shape
)
def
testResizeToRangeWithNoInstanceMask
(
self
):
"""Tests image resizing, checking output sizes."""
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
0
,
60
,
40
],
[
0
,
15
,
30
]]
min_dim
=
50
max_dim
=
100
expected_image_shape_list
=
[[
75
,
50
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
0
,
75
,
50
],
[
0
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_image_shape_list
,
in_masks_shape_list
,
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_to_range
(
in_image
,
in_masks
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
with
self
.
test_session
()
as
sess
:
out_image_shape
,
out_masks_shape
=
sess
.
run
(
[
out_image_shape
,
out_masks_shape
])
self
.
assertAllEqual
(
out_image_shape
,
expected_image_shape
)
self
.
assertAllEqual
(
out_masks_shape
,
expected_mask_shape
)
def
testResizeImageWithMasks
(
self
):
"""Tests image resizing, checking output sizes."""
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
15
,
60
,
40
],
[
10
,
15
,
30
]]
height
=
50
width
=
100
expected_image_shape_list
=
[[
50
,
100
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
15
,
50
,
100
],
[
10
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_image_shape_list
,
in_masks_shape_list
,
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_image
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
with
self
.
test_session
()
as
sess
:
out_image_shape
,
out_masks_shape
=
sess
.
run
(
[
out_image_shape
,
out_masks_shape
])
self
.
assertAllEqual
(
out_image_shape
,
expected_image_shape
)
self
.
assertAllEqual
(
out_masks_shape
,
expected_mask_shape
)
def
testResizeImageWithNoInstanceMask
(
self
):
"""Tests image resizing, checking output sizes."""
in_image_shape_list
=
[[
60
,
40
,
3
],
[
15
,
30
,
3
]]
in_masks_shape_list
=
[[
0
,
60
,
40
],
[
0
,
15
,
30
]]
height
=
50
width
=
100
expected_image_shape_list
=
[[
50
,
100
,
3
],
[
50
,
100
,
3
]]
expected_masks_shape_list
=
[[
0
,
50
,
100
],
[
0
,
50
,
100
]]
for
(
in_image_shape
,
expected_image_shape
,
in_masks_shape
,
expected_mask_shape
)
in
zip
(
in_image_shape_list
,
expected_image_shape_list
,
in_masks_shape_list
,
expected_masks_shape_list
):
in_image
=
tf
.
random_uniform
(
in_image_shape
)
in_masks
=
tf
.
random_uniform
(
in_masks_shape
)
out_image
,
out_masks
=
preprocessor
.
resize_image
(
in_image
,
in_masks
,
new_height
=
height
,
new_width
=
width
)
out_image_shape
=
tf
.
shape
(
out_image
)
out_masks_shape
=
tf
.
shape
(
out_masks
)
with
self
.
test_session
()
as
sess
:
out_image_shape
,
out_masks_shape
=
sess
.
run
(
[
out_image_shape
,
out_masks_shape
])
self
.
assertAllEqual
(
out_image_shape
,
expected_image_shape
)
self
.
assertAllEqual
(
out_masks_shape
,
expected_mask_shape
)
def
testResizeToRange4DImageTensor
(
self
):
image
=
tf
.
random_uniform
([
1
,
200
,
300
,
3
])
with
self
.
assertRaises
(
ValueError
):
preprocessor
.
resize_to_range
(
image
,
500
,
600
)
def
testResizeToRangeSameMinMax
(
self
):
"""Tests image resizing, checking output sizes."""
in_shape_list
=
[[
312
,
312
,
3
],
[
299
,
299
,
3
]]
min_dim
=
320
max_dim
=
320
expected_shape_list
=
[[
320
,
320
,
3
],
[
320
,
320
,
3
]]
for
in_shape
,
expected_shape
in
zip
(
in_shape_list
,
expected_shape_list
):
in_image
=
tf
.
random_uniform
(
in_shape
)
out_image
=
preprocessor
.
resize_to_range
(
in_image
,
min_dimension
=
min_dim
,
max_dimension
=
max_dim
)
out_image_shape
=
tf
.
shape
(
out_image
)
with
self
.
test_session
()
as
sess
:
out_image_shape
=
sess
.
run
(
out_image_shape
)
self
.
assertAllEqual
(
out_image_shape
,
expected_shape
)
def
testScaleBoxesToPixelCoordinates
(
self
):
"""Tests box scaling, checking scaled values."""
in_shape
=
[
60
,
40
,
3
]
in_boxes
=
[[
0.1
,
0.2
,
0.4
,
0.6
],
[
0.5
,
0.3
,
0.9
,
0.7
]]
expected_boxes
=
[[
6.
,
8.
,
24.
,
24.
],
[
30.
,
12.
,
54.
,
28.
]]
in_image
=
tf
.
random_uniform
(
in_shape
)
in_boxes
=
tf
.
constant
(
in_boxes
)
_
,
out_boxes
=
preprocessor
.
scale_boxes_to_pixel_coordinates
(
in_image
,
boxes
=
in_boxes
)
with
self
.
test_session
()
as
sess
:
out_boxes
=
sess
.
run
(
out_boxes
)
self
.
assertAllClose
(
out_boxes
,
expected_boxes
)
def
testScaleBoxesToPixelCoordinatesWithKeypoints
(
self
):
"""Tests box and keypoint scaling, checking scaled values."""
in_shape
=
[
60
,
40
,
3
]
in_boxes
=
self
.
createTestBoxes
()
in_keypoints
=
self
.
createTestKeypoints
()
expected_boxes
=
[[
0.
,
10.
,
45.
,
40.
],
[
15.
,
20.
,
45.
,
40.
]]
expected_keypoints
=
[
[[
6.
,
4.
],
[
12.
,
8.
],
[
18.
,
12.
]],
[[
24.
,
16.
],
[
30.
,
20.
],
[
36.
,
24.
]],
]
in_image
=
tf
.
random_uniform
(
in_shape
)
_
,
out_boxes
,
out_keypoints
=
preprocessor
.
scale_boxes_to_pixel_coordinates
(
in_image
,
boxes
=
in_boxes
,
keypoints
=
in_keypoints
)
with
self
.
test_session
()
as
sess
:
out_boxes_
,
out_keypoints_
=
sess
.
run
([
out_boxes
,
out_keypoints
])
self
.
assertAllClose
(
out_boxes_
,
expected_boxes
)
self
.
assertAllClose
(
out_keypoints_
,
expected_keypoints
)
def
testSubtractChannelMean
(
self
):
"""Tests whether channel means have been subtracted."""
with
self
.
test_session
():
image
=
tf
.
zeros
((
240
,
320
,
3
))
means
=
[
1
,
2
,
3
]
actual
=
preprocessor
.
subtract_channel_mean
(
image
,
means
=
means
)
actual
=
actual
.
eval
()
self
.
assertTrue
((
actual
[:,
:,
0
]
==
-
1
).
all
())
self
.
assertTrue
((
actual
[:,
:,
1
]
==
-
2
).
all
())
self
.
assertTrue
((
actual
[:,
:,
2
]
==
-
3
).
all
())
def
testOneHotEncoding
(
self
):
"""Tests one hot encoding of multiclass labels."""
with
self
.
test_session
():
labels
=
tf
.
constant
([
1
,
4
,
2
],
dtype
=
tf
.
int32
)
one_hot
=
preprocessor
.
one_hot_encoding
(
labels
,
num_classes
=
5
)
one_hot
=
one_hot
.
eval
()
self
.
assertAllEqual
([
0
,
1
,
1
,
0
,
1
],
one_hot
)
def
testSSDRandomCrop
(
self
):
preprocessing_options
=
[
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop
,
{})]
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
with
self
.
test_session
()
as
sess
:
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
)
=
sess
.
run
(
[
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
])
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testSSDRandomCropPad
(
self
):
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
preprocessing_options
=
[
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop_pad
,
{})]
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
with
self
.
test_session
()
as
sess
:
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
)
=
sess
.
run
([
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
])
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testSSDRandomCropFixedAspectRatio
(
self
):
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
preprocessing_options
=
[
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop_fixed_aspect_ratio
,
{})]
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
}
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
with
self
.
test_session
()
as
sess
:
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
)
=
sess
.
run
(
[
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
])
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
def
testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints
(
self
):
images
=
self
.
createTestImages
()
boxes
=
self
.
createTestBoxes
()
labels
=
self
.
createTestLabels
()
masks
=
self
.
createTestMasks
()
keypoints
=
self
.
createTestKeypoints
()
preprocessing_options
=
[
(
preprocessor
.
normalize_image
,
{
'original_minval'
:
0
,
'original_maxval'
:
255
,
'target_minval'
:
0
,
'target_maxval'
:
1
}),
(
preprocessor
.
ssd_random_crop_fixed_aspect_ratio
,
{})]
tensor_dict
=
{
fields
.
InputDataFields
.
image
:
images
,
fields
.
InputDataFields
.
groundtruth_boxes
:
boxes
,
fields
.
InputDataFields
.
groundtruth_classes
:
labels
,
fields
.
InputDataFields
.
groundtruth_instance_masks
:
masks
,
fields
.
InputDataFields
.
groundtruth_keypoints
:
keypoints
,
}
preprocessor_arg_map
=
preprocessor
.
get_default_func_arg_map
(
include_instance_masks
=
True
,
include_keypoints
=
True
)
distorted_tensor_dict
=
preprocessor
.
preprocess
(
tensor_dict
,
preprocessing_options
,
func_arg_map
=
preprocessor_arg_map
)
distorted_images
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
image
]
distorted_boxes
=
distorted_tensor_dict
[
fields
.
InputDataFields
.
groundtruth_boxes
]
images_rank
=
tf
.
rank
(
images
)
distorted_images_rank
=
tf
.
rank
(
distorted_images
)
boxes_rank
=
tf
.
rank
(
boxes
)
distorted_boxes_rank
=
tf
.
rank
(
distorted_boxes
)
with
self
.
test_session
()
as
sess
:
(
boxes_rank_
,
distorted_boxes_rank_
,
images_rank_
,
distorted_images_rank_
)
=
sess
.
run
(
[
boxes_rank
,
distorted_boxes_rank
,
images_rank
,
distorted_images_rank
])
self
.
assertAllEqual
(
boxes_rank_
,
distorted_boxes_rank_
)
self
.
assertAllEqual
(
images_rank_
,
distorted_images_rank_
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/region_similarity_calculator.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Region Similarity Calculators for BoxLists.
Region Similarity Calculators compare a pairwise measure of similarity
between the boxes in two BoxLists.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
from
object_detection.core
import
box_list_ops
class
RegionSimilarityCalculator
(
object
):
"""Abstract base class for region similarity calculator."""
__metaclass__
=
ABCMeta
def
compare
(
self
,
boxlist1
,
boxlist2
,
scope
=
None
):
"""Computes matrix of pairwise similarity between BoxLists.
This op (to be overriden) computes a measure of pairwise similarity between
the boxes in the given BoxLists. Higher values indicate more similarity.
Note that this method simply measures similarity and does not explicitly
perform a matching.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
scope: Op scope name. Defaults to 'Compare' if None.
Returns:
a (float32) tensor of shape [N, M] with pairwise similarity score.
"""
with
tf
.
name_scope
(
scope
,
'Compare'
,
[
boxlist1
,
boxlist2
])
as
scope
:
return
self
.
_compare
(
boxlist1
,
boxlist2
)
@
abstractmethod
def
_compare
(
self
,
boxlist1
,
boxlist2
):
pass
class
IouSimilarity
(
RegionSimilarityCalculator
):
"""Class to compute similarity based on Intersection over Union (IOU) metric.
This class computes pairwise similarity between two BoxLists based on IOU.
"""
def
_compare
(
self
,
boxlist1
,
boxlist2
):
"""Compute pairwise IOU similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing pairwise iou scores.
"""
return
box_list_ops
.
iou
(
boxlist1
,
boxlist2
)
class
NegSqDistSimilarity
(
RegionSimilarityCalculator
):
"""Class to compute similarity based on the squared distance metric.
This class computes pairwise similarity between two BoxLists based on the
negative squared distance metric.
"""
def
_compare
(
self
,
boxlist1
,
boxlist2
):
"""Compute matrix of (negated) sq distances.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing negated pairwise squared distance.
"""
return
-
1
*
box_list_ops
.
sq_dist
(
boxlist1
,
boxlist2
)
class
IoaSimilarity
(
RegionSimilarityCalculator
):
"""Class to compute similarity based on Intersection over Area (IOA) metric.
This class computes pairwise similarity between two BoxLists based on their
pairwise intersections divided by the areas of second BoxLists.
"""
def
_compare
(
self
,
boxlist1
,
boxlist2
):
"""Compute pairwise IOA similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing pairwise IOA scores.
"""
return
box_list_ops
.
ioa
(
boxlist1
,
boxlist2
)
object_detection/core/region_similarity_calculator_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for region_similarity_calculator."""
import
tensorflow
as
tf
from
object_detection.core
import
box_list
from
object_detection.core
import
region_similarity_calculator
class
RegionSimilarityCalculatorTest
(
tf
.
test
.
TestCase
):
def
test_get_correct_pairwise_similarity_based_on_iou
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
exp_output
=
[[
2.0
/
16.0
,
0
,
6.0
/
400.0
],
[
1.0
/
16.0
,
0.0
,
5.0
/
400.0
]]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
iou_similarity_calculator
=
region_similarity_calculator
.
IouSimilarity
()
iou_similarity
=
iou_similarity_calculator
.
compare
(
boxes1
,
boxes2
)
with
self
.
test_session
()
as
sess
:
iou_output
=
sess
.
run
(
iou_similarity
)
self
.
assertAllClose
(
iou_output
,
exp_output
)
def
test_get_correct_pairwise_similarity_based_on_squared_distances
(
self
):
corners1
=
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
],
[
1.0
,
1.0
,
0.0
,
2.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
1.0
,
0.0
],
[
-
4.0
,
0.0
,
0.0
,
3.0
],
[
0.0
,
0.0
,
0.0
,
0.0
]])
exp_output
=
[[
-
26
,
-
25
,
0
],
[
-
18
,
-
27
,
-
6
]]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
dist_similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
dist_similarity
=
dist_similarity_calc
.
compare
(
boxes1
,
boxes2
)
with
self
.
test_session
()
as
sess
:
dist_output
=
sess
.
run
(
dist_similarity
)
self
.
assertAllClose
(
dist_output
,
exp_output
)
def
test_get_correct_pairwise_similarity_based_on_ioa
(
self
):
corners1
=
tf
.
constant
([[
4.0
,
3.0
,
7.0
,
5.0
],
[
5.0
,
6.0
,
10.0
,
7.0
]])
corners2
=
tf
.
constant
([[
3.0
,
4.0
,
6.0
,
8.0
],
[
14.0
,
14.0
,
15.0
,
15.0
],
[
0.0
,
0.0
,
20.0
,
20.0
]])
exp_output_1
=
[[
2.0
/
12.0
,
0
,
6.0
/
400.0
],
[
1.0
/
12.0
,
0.0
,
5.0
/
400.0
]]
exp_output_2
=
[[
2.0
/
6.0
,
1.0
/
5.0
],
[
0
,
0
],
[
6.0
/
6.0
,
5.0
/
5.0
]]
boxes1
=
box_list
.
BoxList
(
corners1
)
boxes2
=
box_list
.
BoxList
(
corners2
)
ioa_similarity_calculator
=
region_similarity_calculator
.
IoaSimilarity
()
ioa_similarity_1
=
ioa_similarity_calculator
.
compare
(
boxes1
,
boxes2
)
ioa_similarity_2
=
ioa_similarity_calculator
.
compare
(
boxes2
,
boxes1
)
with
self
.
test_session
()
as
sess
:
iou_output_1
,
iou_output_2
=
sess
.
run
(
[
ioa_similarity_1
,
ioa_similarity_2
])
self
.
assertAllClose
(
iou_output_1
,
exp_output_1
)
self
.
assertAllClose
(
iou_output_2
,
exp_output_2
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/core/standard_fields.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Contains classes specifying naming conventions used for object detection.
Specifies:
InputDataFields: standard fields used by reader/preprocessor/batcher.
BoxListFields: standard field used by BoxList
TfExampleFields: standard fields for tf-example data format (go/tf-example).
"""
class
InputDataFields
(
object
):
"""Names for the input tensors.
Holds the standard data field names to use for identifying input tensors. This
should be used by the decoder to identify keys for the returned tensor_dict
containing input tensors. And it should be used by the model to identify the
tensors it needs.
Attributes:
image: image.
original_image: image in the original input size.
key: unique key corresponding to image.
source_id: source of the original image.
filename: original filename of the dataset (without common path).
groundtruth_image_classes: image-level class labels.
groundtruth_boxes: coordinates of the ground truth boxes in the image.
groundtruth_classes: box-level class labels.
groundtruth_label_types: box-level label types (e.g. explicit negative).
groundtruth_is_crowd: is the groundtruth a single object or a crowd.
groundtruth_area: area of a groundtruth segment.
groundtruth_difficult: is a `difficult` object
proposal_boxes: coordinates of object proposal boxes.
proposal_objectness: objectness score of each proposal.
groundtruth_instance_masks: ground truth instance masks.
groundtruth_instance_classes: instance mask-level class labels.
groundtruth_keypoints: ground truth keypoints.
groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
groundtruth_label_scores: groundtruth label scores.
"""
image
=
'image'
original_image
=
'original_image'
key
=
'key'
source_id
=
'source_id'
filename
=
'filename'
groundtruth_image_classes
=
'groundtruth_image_classes'
groundtruth_boxes
=
'groundtruth_boxes'
groundtruth_classes
=
'groundtruth_classes'
groundtruth_label_types
=
'groundtruth_label_types'
groundtruth_is_crowd
=
'groundtruth_is_crowd'
groundtruth_area
=
'groundtruth_area'
groundtruth_difficult
=
'groundtruth_difficult'
proposal_boxes
=
'proposal_boxes'
proposal_objectness
=
'proposal_objectness'
groundtruth_instance_masks
=
'groundtruth_instance_masks'
groundtruth_instance_classes
=
'groundtruth_instance_classes'
groundtruth_keypoints
=
'groundtruth_keypoints'
groundtruth_keypoint_visibilities
=
'groundtruth_keypoint_visibilities'
groundtruth_label_scores
=
'groundtruth_label_scores'
class
BoxListFields
(
object
):
"""Naming conventions for BoxLists.
Attributes:
boxes: bounding box coordinates.
classes: classes per bounding box.
scores: scores per bounding box.
weights: sample weights per bounding box.
objectness: objectness score per bounding box.
masks: masks per bounding box.
keypoints: keypoints per bounding box.
keypoint_heatmaps: keypoint heatmaps per bounding box.
"""
boxes
=
'boxes'
classes
=
'classes'
scores
=
'scores'
weights
=
'weights'
objectness
=
'objectness'
masks
=
'masks'
keypoints
=
'keypoints'
keypoint_heatmaps
=
'keypoint_heatmaps'
class
TfExampleFields
(
object
):
"""TF-example proto feature names for object detection.
Holds the standard feature names to load from an Example proto for object
detection.
Attributes:
image_encoded: JPEG encoded string
image_format: image format, e.g. "JPEG"
filename: filename
channels: number of channels of image
colorspace: colorspace, e.g. "RGB"
height: height of image in pixels, e.g. 462
width: width of image in pixels, e.g. 581
source_id: original source of the image
object_class_text: labels in text format, e.g. ["person", "cat"]
object_class_text: labels in numbers, e.g. [16, 8]
object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
object_view: viewpoint of object, e.g. ["frontal", "left"]
object_truncated: is object truncated, e.g. [true, false]
object_occluded: is object occluded, e.g. [true, false]
object_difficult: is object difficult, e.g. [true, false]
object_is_crowd: is the object a single object or a crowd
object_segment_area: the area of the segment.
instance_masks: instance segmentation masks.
instance_classes: Classes for each instance segmentation mask.
"""
image_encoded
=
'image/encoded'
image_format
=
'image/format'
# format is reserved keyword
filename
=
'image/filename'
channels
=
'image/channels'
colorspace
=
'image/colorspace'
height
=
'image/height'
width
=
'image/width'
source_id
=
'image/source_id'
object_class_text
=
'image/object/class/text'
object_class_label
=
'image/object/class/label'
object_bbox_ymin
=
'image/object/bbox/ymin'
object_bbox_xmin
=
'image/object/bbox/xmin'
object_bbox_ymax
=
'image/object/bbox/ymax'
object_bbox_xmax
=
'image/object/bbox/xmax'
object_view
=
'image/object/view'
object_truncated
=
'image/object/truncated'
object_occluded
=
'image/object/occluded'
object_difficult
=
'image/object/difficult'
object_is_crowd
=
'image/object/is_crowd'
object_segment_area
=
'image/object/segment/area'
instance_masks
=
'image/segmentation/object'
instance_classes
=
'image/segmentation/object/class'
object_detection/core/target_assigner.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base target assigner module.
The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
groundtruth detections (bounding boxes), to assign classification and regression
targets to each anchor as well as weights to each anchor (specifying, e.g.,
which anchors should not contribute to training loss).
It assigns classification/regression targets by performing the following steps:
1) Computing pairwise similarity between anchors and groundtruth boxes using a
provided RegionSimilarity Calculator
2) Computing a matching based on the similarity matrix using a provided Matcher
3) Assigning regression targets based on the matching and a provided BoxCoder
4) Assigning classification targets based on the matching and groundtruth labels
Note that TargetAssigners only operate on detections from a single
image at a time, so any logic for applying a TargetAssigner to multiple
images must be handled externally.
"""
import
tensorflow
as
tf
from
object_detection.box_coders
import
faster_rcnn_box_coder
from
object_detection.box_coders
import
mean_stddev_box_coder
from
object_detection.core
import
box_coder
as
bcoder
from
object_detection.core
import
box_list
from
object_detection.core
import
box_list_ops
from
object_detection.core
import
matcher
as
mat
from
object_detection.core
import
region_similarity_calculator
as
sim_calc
from
object_detection.matchers
import
argmax_matcher
from
object_detection.matchers
import
bipartite_matcher
class
TargetAssigner
(
object
):
"""Target assigner to compute classification and regression targets."""
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
"""Construct Multibox Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
matcher: an object_detection.core.Matcher used to match groundtruth to
anchors.
box_coder: an object_detection.core.BoxCoder used to encode matching
groundtruth boxes with respect to anchors.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the "assign"
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be [0] for each anchor.
Raises:
ValueError: if similarity_calc is not a RegionSimilarityCalculator or
if matcher is not a Matcher or if box_coder is not a BoxCoder
"""
if
not
isinstance
(
similarity_calc
,
sim_calc
.
RegionSimilarityCalculator
):
raise
ValueError
(
'similarity_calc must be a RegionSimilarityCalculator'
)
if
not
isinstance
(
matcher
,
mat
.
Matcher
):
raise
ValueError
(
'matcher must be a Matcher'
)
if
not
isinstance
(
box_coder
,
bcoder
.
BoxCoder
):
raise
ValueError
(
'box_coder must be a BoxCoder'
)
self
.
_similarity_calc
=
similarity_calc
self
.
_matcher
=
matcher
self
.
_box_coder
=
box_coder
self
.
_positive_class_weight
=
positive_class_weight
self
.
_negative_class_weight
=
negative_class_weight
if
unmatched_cls_target
is
None
:
self
.
_unmatched_cls_target
=
tf
.
constant
([
0
],
tf
.
float32
)
else
:
self
.
_unmatched_cls_target
=
unmatched_cls_target
@
property
def
box_coder
(
self
):
return
self
.
_box_coder
def
assign
(
self
,
anchors
,
groundtruth_boxes
,
groundtruth_labels
=
None
,
**
params
):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
to groundtruth_boxes and assign classification and regression targets to
each anchor as well as weights based on the resulting match (specifying,
e.g., which anchors should not contribute to training loss).
Anchors that are not matched to anything are given a classification target
of self._unmatched_cls_target which can be specified via the constructor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
groundtruth boxes, with rows corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if anchors or groundtruth_boxes are not of type
box_list.BoxList
"""
if
not
isinstance
(
anchors
,
box_list
.
BoxList
):
raise
ValueError
(
'anchors must be an BoxList'
)
if
not
isinstance
(
groundtruth_boxes
,
box_list
.
BoxList
):
raise
ValueError
(
'groundtruth_boxes must be an BoxList'
)
if
groundtruth_labels
is
None
:
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
0
))
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
shape_assert
=
tf
.
assert_equal
(
tf
.
shape
(
groundtruth_labels
)[
1
:],
tf
.
shape
(
self
.
_unmatched_cls_target
))
with
tf
.
control_dependencies
([
shape_assert
]):
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
anchors
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
**
params
)
reg_targets
=
self
.
_create_regression_targets
(
anchors
,
groundtruth_boxes
,
match
)
cls_targets
=
self
.
_create_classification_targets
(
groundtruth_labels
,
match
)
reg_weights
=
self
.
_create_regression_weights
(
match
)
cls_weights
=
self
.
_create_classification_weights
(
match
,
self
.
_positive_class_weight
,
self
.
_negative_class_weight
)
num_anchors
=
anchors
.
num_boxes_static
()
if
num_anchors
is
not
None
:
reg_targets
=
self
.
_reset_target_shape
(
reg_targets
,
num_anchors
)
cls_targets
=
self
.
_reset_target_shape
(
cls_targets
,
num_anchors
)
reg_weights
=
self
.
_reset_target_shape
(
reg_weights
,
num_anchors
)
cls_weights
=
self
.
_reset_target_shape
(
cls_weights
,
num_anchors
)
return
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
def
_reset_target_shape
(
self
,
target
,
num_anchors
):
"""Sets the static shape of the target.
Args:
target: the target tensor. Its first dimension will be overwritten.
num_anchors: the number of anchors, which is used to override the target's
first dimension.
Returns:
A tensor with the shape info filled in.
"""
target_shape
=
target
.
get_shape
().
as_list
()
target_shape
[
0
]
=
num_anchors
target
.
set_shape
(
target_shape
)
return
target
def
_create_regression_targets
(
self
,
anchors
,
groundtruth_boxes
,
match
):
"""Returns a regression target for each anchor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth_boxes
match: a matcher.Match object
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_anchor_indices
=
match
.
matched_column_indices
()
unmatched_ignored_anchor_indices
=
(
match
.
unmatched_or_ignored_column_indices
())
matched_gt_indices
=
match
.
matched_row_indices
()
matched_anchors
=
box_list_ops
.
gather
(
anchors
,
matched_anchor_indices
)
matched_gt_boxes
=
box_list_ops
.
gather
(
groundtruth_boxes
,
matched_gt_indices
)
matched_reg_targets
=
self
.
_box_coder
.
encode
(
matched_gt_boxes
,
matched_anchors
)
unmatched_ignored_reg_targets
=
tf
.
tile
(
self
.
_default_regression_target
(),
tf
.
stack
([
tf
.
size
(
unmatched_ignored_anchor_indices
),
1
]))
reg_targets
=
tf
.
dynamic_stitch
(
[
matched_anchor_indices
,
unmatched_ignored_anchor_indices
],
[
matched_reg_targets
,
unmatched_ignored_reg_targets
])
# TODO: summarize the number of matches on average.
return
reg_targets
def
_default_regression_target
(
self
):
"""Returns the default target for anchors to regress to.
Default regression targets are set to zero (though in
this implementation what these targets are set to should
not matter as the regression weight of any box set to
regress to the default target is zero).
Returns:
default_target: a float32 tensor with shape [1, box_code_dimension]
"""
return
tf
.
constant
([
self
.
_box_coder
.
code_size
*
[
0
]],
tf
.
float32
)
def
_create_classification_targets
(
self
,
groundtruth_labels
,
match
):
"""Create classification targets for each anchor.
Assign a classification target of for each anchor to the matching
groundtruth label that is provided by match. Anchors that are not matched
to anything are given the target self._unmatched_cls_target
Args:
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar labels).
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
matched_anchor_indices
=
match
.
matched_column_indices
()
unmatched_ignored_anchor_indices
=
(
match
.
unmatched_or_ignored_column_indices
())
matched_gt_indices
=
match
.
matched_row_indices
()
matched_cls_targets
=
tf
.
gather
(
groundtruth_labels
,
matched_gt_indices
)
ones
=
self
.
_unmatched_cls_target
.
shape
.
ndims
*
[
1
]
unmatched_ignored_cls_targets
=
tf
.
tile
(
tf
.
expand_dims
(
self
.
_unmatched_cls_target
,
0
),
tf
.
stack
([
tf
.
size
(
unmatched_ignored_anchor_indices
)]
+
ones
))
cls_targets
=
tf
.
dynamic_stitch
(
[
matched_anchor_indices
,
unmatched_ignored_anchor_indices
],
[
matched_cls_targets
,
unmatched_ignored_cls_targets
])
return
cls_targets
def
_create_regression_weights
(
self
,
match
):
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
method returns a weight of 1 for every positive anchor and 0 for every
negative anchor.
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
reg_weights: a float32 tensor with shape [num_anchors] representing
regression weights
"""
reg_weights
=
tf
.
cast
(
match
.
matched_column_indicator
(),
tf
.
float32
)
return
reg_weights
def
_create_classification_weights
(
self
,
match
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
):
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
positive_class_weight and negative (unmatched) anchors are associated with
a weight of negative_class_weight. When anchors are ignored, weights are set
to zero. By default, both positive/negative weights are set to 1.0,
but they can be adjusted to handle class imbalance (which is almost always
the case in object detection).
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
positive_class_weight: weight to be associated to positive anchors
negative_class_weight: weight to be associated to negative anchors
Returns:
cls_weights: a float32 tensor with shape [num_anchors] representing
classification weights.
"""
matched_indicator
=
tf
.
cast
(
match
.
matched_column_indicator
(),
tf
.
float32
)
ignore_indicator
=
tf
.
cast
(
match
.
ignored_column_indicator
(),
tf
.
float32
)
unmatched_indicator
=
1.0
-
matched_indicator
-
ignore_indicator
cls_weights
=
(
positive_class_weight
*
matched_indicator
+
negative_class_weight
*
unmatched_indicator
)
return
cls_weights
def
get_box_coder
(
self
):
"""Get BoxCoder of this TargetAssigner.
Returns:
BoxCoder: BoxCoder object.
"""
return
self
.
_box_coder
# TODO: This method pulls in all the implementation dependencies into core.
# Therefore its best to have this factory method outside of core.
def
create_target_assigner
(
reference
,
stage
=
None
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
"""Factory function for creating standard target assigners.
Args:
reference: string referencing the type of TargetAssigner.
stage: string denoting stage: {proposal, detection}.
positive_class_weight: classification weight to be associated to positive
anchors (default: 1.0)
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0)
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the Assign
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be 0 for each anchor.
Returns:
TargetAssigner: desired target assigner.
Raises:
ValueError: if combination reference+stage is invalid.
"""
if
reference
==
'Multibox'
and
stage
==
'proposal'
:
similarity_calc
=
sim_calc
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
elif
reference
==
'FasterRCNN'
and
stage
==
'proposal'
:
similarity_calc
=
sim_calc
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.7
,
unmatched_threshold
=
0.3
,
force_match_for_each_row
=
True
)
box_coder
=
faster_rcnn_box_coder
.
FasterRcnnBoxCoder
(
scale_factors
=
[
10.0
,
10.0
,
5.0
,
5.0
])
elif
reference
==
'FasterRCNN'
and
stage
==
'detection'
:
similarity_calc
=
sim_calc
.
IouSimilarity
()
# Uses all proposals with IOU < 0.5 as candidate negatives.
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
negatives_lower_than_unmatched
=
True
)
box_coder
=
faster_rcnn_box_coder
.
FasterRcnnBoxCoder
(
scale_factors
=
[
10.0
,
10.0
,
5.0
,
5.0
])
elif
reference
==
'FastRCNN'
:
similarity_calc
=
sim_calc
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.1
,
force_match_for_each_row
=
False
,
negatives_lower_than_unmatched
=
False
)
box_coder
=
faster_rcnn_box_coder
.
FasterRcnnBoxCoder
()
else
:
raise
ValueError
(
'No valid combination of reference and stage.'
)
return
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
positive_class_weight
,
negative_class_weight
=
negative_class_weight
,
unmatched_cls_target
=
unmatched_cls_target
)
def
batch_assign_targets
(
target_assigner
,
anchors_batch
,
gt_box_batch
,
gt_class_targets_batch
):
"""Batched assignment of classification and regression targets.
Args:
target_assigner: a target assigner.
anchors_batch: BoxList representing N box anchors or list of BoxList objects
with length batch_size representing anchor sets.
gt_box_batch: a list of BoxList objects with length batch_size
representing groundtruth boxes for each image in the batch
gt_class_targets_batch: a list of tensors with length batch_size, where
each tensor has shape [num_gt_boxes_i, classification_target_size] and
num_gt_boxes_i is the number of boxes in the ith boxlist of
gt_box_batch.
Returns:
batch_cls_targets: a tensor with shape [batch_size, num_anchors,
num_classes],
batch_cls_weights: a tensor with shape [batch_size, num_anchors],
batch_reg_targets: a tensor with shape [batch_size, num_anchors,
box_code_dimension]
batch_reg_weights: a tensor with shape [batch_size, num_anchors],
match_list: a list of matcher.Match objects encoding the match between
anchors and groundtruth boxes for each image of the batch,
with rows of the Match objects corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if input list lengths are inconsistent, i.e.,
batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
and batch_size == len(anchors_batch) unless anchors_batch is a single
BoxList.
"""
if
not
isinstance
(
anchors_batch
,
list
):
anchors_batch
=
len
(
gt_box_batch
)
*
[
anchors_batch
]
if
not
all
(
isinstance
(
anchors
,
box_list
.
BoxList
)
for
anchors
in
anchors_batch
):
raise
ValueError
(
'anchors_batch must be a BoxList or list of BoxLists.'
)
if
not
(
len
(
anchors_batch
)
==
len
(
gt_box_batch
)
==
len
(
gt_class_targets_batch
)):
raise
ValueError
(
'batch size incompatible with lengths of anchors_batch, '
'gt_box_batch and gt_class_targets_batch.'
)
cls_targets_list
=
[]
cls_weights_list
=
[]
reg_targets_list
=
[]
reg_weights_list
=
[]
match_list
=
[]
for
anchors
,
gt_boxes
,
gt_class_targets
in
zip
(
anchors_batch
,
gt_box_batch
,
gt_class_targets_batch
):
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
target_assigner
.
assign
(
anchors
,
gt_boxes
,
gt_class_targets
)
cls_targets_list
.
append
(
cls_targets
)
cls_weights_list
.
append
(
cls_weights
)
reg_targets_list
.
append
(
reg_targets
)
reg_weights_list
.
append
(
reg_weights
)
match_list
.
append
(
match
)
batch_cls_targets
=
tf
.
stack
(
cls_targets_list
)
batch_cls_weights
=
tf
.
stack
(
cls_weights_list
)
batch_reg_targets
=
tf
.
stack
(
reg_targets_list
)
batch_reg_weights
=
tf
.
stack
(
reg_weights_list
)
return
(
batch_cls_targets
,
batch_cls_weights
,
batch_reg_targets
,
batch_reg_weights
,
match_list
)
object_detection/core/target_assigner_test.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.core.target_assigner."""
import
numpy
as
np
import
tensorflow
as
tf
from
object_detection.box_coders
import
mean_stddev_box_coder
from
object_detection.core
import
box_list
from
object_detection.core
import
region_similarity_calculator
from
object_detection.core
import
target_assigner
as
targetassigner
from
object_detection.matchers
import
argmax_matcher
from
object_detection.matchers
import
bipartite_matcher
class
TargetAssignerTest
(
tf
.
test
.
TestCase
):
def
test_assign_agnostic
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
None
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
]])
prior_stddevs
=
tf
.
constant
(
3
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
exp_matching_anchors
=
[
0
,
1
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
num_valid_rows
=
2
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_with_ignored_matches
(
self
):
# Note: test is very similar to above. The third box matched with an IOU
# of 0.35, which is between the matched and unmatched threshold. This means
# That like above the expected classification targets are [1, 1, 0].
# Unlike above, the third target is ignored and therefore expected
# classification weights are [1, 1, 0].
similarity_calc
=
region_similarity_calculator
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
matched_threshold
=
0.5
,
unmatched_threshold
=
0.3
)
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0.0
,
0.5
,
.
9
,
1.0
]])
prior_stddevs
=
tf
.
constant
(
3
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
exp_cls_targets
=
[[
1
],
[
1
],
[
0
]]
exp_cls_weights
=
[
1
,
1
,
0
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
1
,
1
,
0
]
exp_matching_anchors
=
[
0
,
1
]
result
=
target_assigner
.
assign
(
priors
,
boxes
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_multiclass
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
exp_cls_targets
=
[[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
1
,
0
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]]
exp_cls_weights
=
[
1
,
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
.
5
,
.
2
]]
exp_reg_weights
=
[
1
,
1
,
0
,
1
]
exp_matching_anchors
=
[
0
,
1
,
3
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_multiclass_unequal_class_weights
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
,
0
,
0
,
0
,
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
0.5
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[
0
,
1
,
0
,
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,
0
,
1
,
0
],
[
0
,
0
,
0
,
1
,
0
,
0
,
0
]],
tf
.
float32
)
exp_cls_weights
=
[
1
,
1
,
.
5
,
1
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
_
,
cls_weights
,
_
,
_
,
_
)
=
result
with
self
.
test_session
()
as
sess
:
cls_weights_out
=
sess
.
run
(
cls_weights
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
def
test_assign_multidimensional_class_targets
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([[
0
,
0
],
[
0
,
0
]],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[[
0
,
1
],
[
1
,
0
]],
[[
1
,
0
],
[
0
,
1
]],
[[
0
,
1
],
[
1
,
.
5
]]],
tf
.
float32
)
exp_cls_targets
=
[[[
0
,
1
],
[
1
,
0
]],
[[
1
,
0
],
[
0
,
1
]],
[[
0
,
0
],
[
0
,
0
]],
[[
0
,
1
],
[
1
,
.
5
]]]
exp_cls_weights
=
[
1
,
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
1
,
1
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
-
.
5
,
.
2
]]
exp_reg_weights
=
[
1
,
1
,
0
,
1
]
exp_matching_anchors
=
[
0
,
1
,
3
]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_assign_empty_groundtruth
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
0
,
0
,
0
],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
1.0
,
0.8
],
[
0
,
0.5
,
.
5
,
1.0
],
[.
75
,
0
,
1.0
,
.
25
]])
prior_stddevs
=
tf
.
constant
(
4
*
[
4
*
[.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners_expanded
=
tf
.
constant
([[
0.0
,
0.0
,
0.0
,
0.0
]])
box_corners
=
tf
.
slice
(
box_corners_expanded
,
[
0
,
0
],
[
0
,
4
])
boxes
=
box_list
.
BoxList
(
box_corners
)
groundtruth_labels_expanded
=
tf
.
constant
([[
0
,
0
,
0
]],
tf
.
float32
)
groundtruth_labels
=
tf
.
slice
(
groundtruth_labels_expanded
,
[
0
,
0
],
[
0
,
3
])
exp_cls_targets
=
[[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
],
[
0
,
0
,
0
]]
exp_cls_weights
=
[
1
,
1
,
1
,
1
]
exp_reg_targets
=
[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]
exp_reg_weights
=
[
0
,
0
,
0
,
0
]
exp_matching_anchors
=
[]
result
=
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
=
result
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
matching_anchors_out
)
=
sess
.
run
(
[
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
.
matched_column_indices
()])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
matching_anchors_out
,
exp_matching_anchors
)
self
.
assertEquals
(
cls_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
cls_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_targets_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
reg_weights_out
.
dtype
,
np
.
float32
)
self
.
assertEquals
(
matching_anchors_out
.
dtype
,
np
.
int32
)
def
test_raises_error_on_invalid_groundtruth_labels
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([[
0
,
0
],
[
0
,
0
],
[
0
,
0
]],
tf
.
float32
)
target_assigner
=
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
unmatched_cls_target
=
unmatched_cls_target
)
prior_means
=
tf
.
constant
([[
0.0
,
0.0
,
0.5
,
0.5
]])
prior_stddevs
=
tf
.
constant
([[
1.0
,
1.0
,
1.0
,
1.0
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
box_corners
=
[[
0.0
,
0.0
,
0.5
,
0.5
],
[
0.5
,
0.5
,
0.9
,
0.9
],
[.
75
,
0
,
.
95
,
.
27
]]
boxes
=
box_list
.
BoxList
(
tf
.
constant
(
box_corners
))
groundtruth_labels
=
tf
.
constant
([[[
0
,
1
],
[
1
,
0
]]],
tf
.
float32
)
with
self
.
assertRaises
(
ValueError
):
target_assigner
.
assign
(
priors
,
boxes
,
groundtruth_labels
,
num_valid_rows
=
3
)
class
BatchTargetAssignerTest
(
tf
.
test
.
TestCase
):
def
_get_agnostic_target_assigner
(
self
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
)
def
_get_multi_class_target_assigner
(
self
,
num_classes
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
([
1
]
+
num_classes
*
[
0
],
tf
.
float32
)
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
unmatched_cls_target
)
def
_get_multi_dimensional_target_assigner
(
self
,
target_dimensions
):
similarity_calc
=
region_similarity_calculator
.
NegSqDistSimilarity
()
matcher
=
bipartite_matcher
.
GreedyBipartiteMatcher
()
box_coder
=
mean_stddev_box_coder
.
MeanStddevBoxCoder
()
unmatched_cls_target
=
tf
.
constant
(
np
.
zeros
(
target_dimensions
),
tf
.
float32
)
return
targetassigner
.
TargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
positive_class_weight
=
1.0
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
unmatched_cls_target
)
def
test_batch_assign_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
[[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
))
gt_box_batch
=
[
box_list1
,
box_list2
]
gt_class_targets
=
[
None
,
None
]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
,],
[
0
,
0
,
0
,
0
,],],
[[
0
,
0
,
0
,
0
,],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]]
exp_cls_targets
=
[[[
1
],
[
0
],
[
0
],
[
0
]],
[[
0
],
[
1
],
[
1
],
[
0
]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
agnostic_target_assigner
=
self
.
_get_agnostic_target_assigner
()
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
agnostic_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
,
match_out_1
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
def
test_batch_assign_multiclass_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
[[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
))
gt_box_batch
=
[
box_list1
,
box_list2
]
class_targets1
=
tf
.
constant
([[
0
,
1
,
0
,
0
]],
tf
.
float32
)
class_targets2
=
tf
.
constant
([[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
]],
tf
.
float32
)
gt_class_targets
=
[
class_targets1
,
class_targets2
]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
0
,
0
,
0
],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]]
exp_cls_targets
=
[[[
0
,
1
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]],
[[
1
,
0
,
0
,
0
],
[
0
,
0
,
0
,
1
],
[
0
,
0
,
1
,
0
],
[
1
,
0
,
0
,
0
]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
,
match_out_1
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
def
test_batch_assign_multidimensional_targets
(
self
):
box_list1
=
box_list
.
BoxList
(
tf
.
constant
([[
0.
,
0.
,
0.2
,
0.2
]]))
box_list2
=
box_list
.
BoxList
(
tf
.
constant
(
[[
0
,
0.25123152
,
1
,
1
],
[
0.015789
,
0.0985
,
0.55789
,
0.3842
]]
))
gt_box_batch
=
[
box_list1
,
box_list2
]
class_targets1
=
tf
.
constant
([[[
0
,
1
,
1
],
[
1
,
1
,
0
]]],
tf
.
float32
)
class_targets2
=
tf
.
constant
([[[
0
,
1
,
1
],
[
1
,
1
,
0
]],
[[
0
,
0
,
1
],
[
0
,
0
,
1
]]],
tf
.
float32
)
gt_class_targets
=
[
class_targets1
,
class_targets2
]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
],
[
0
,
.
1
,
.
5
,
.
5
],
[.
75
,
.
75
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
exp_reg_targets
=
[[[
0
,
0
,
-
0.5
,
-
0.5
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]],
[[
0
,
0
,
0
,
0
],
[
0
,
0.01231521
,
0
,
0
],
[
0.15789001
,
-
0.01500003
,
0.57889998
,
-
1.15799987
],
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
,
1
,
1
],
[
1
,
1
,
1
,
1
]]
exp_cls_targets
=
[[[[
0.
,
1.
,
1.
],
[
1.
,
1.
,
0.
]],
[[
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
]],
[[
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
]]],
[[[
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
]],
[[
0.
,
1.
,
1.
],
[
1.
,
1.
,
0.
]],
[[
0.
,
0.
,
1.
],
[
0.
,
0.
,
1.
]],
[[
0.
,
0.
,
0.
],
[
0.
,
0.
,
0.
]]]]
exp_reg_weights
=
[[
1
,
0
,
0
,
0
],
[
0
,
1
,
1
,
0
]]
exp_match_0
=
[
0
]
exp_match_1
=
[
1
,
2
]
multiclass_target_assigner
=
self
.
_get_multi_dimensional_target_assigner
(
target_dimensions
=
(
2
,
3
))
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
2
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
,
match_out_1
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
self
.
assertAllClose
(
match_out_1
,
exp_match_1
)
def
test_batch_assign_empty_groundtruth
(
self
):
box_coords_expanded
=
tf
.
zeros
((
1
,
4
),
tf
.
float32
)
box_coords
=
tf
.
slice
(
box_coords_expanded
,
[
0
,
0
],
[
0
,
4
])
box_list1
=
box_list
.
BoxList
(
box_coords
)
gt_box_batch
=
[
box_list1
]
prior_means
=
tf
.
constant
([[
0
,
0
,
.
25
,
.
25
],
[
0
,
.
25
,
1
,
1
]])
prior_stddevs
=
tf
.
constant
([[.
1
,
.
1
,
.
1
,
.
1
],
[.
1
,
.
1
,
.
1
,
.
1
]])
priors
=
box_list
.
BoxList
(
prior_means
)
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
exp_reg_targets
=
[[[
0
,
0
,
0
,
0
],
[
0
,
0
,
0
,
0
]]]
exp_cls_weights
=
[[
1
,
1
]]
exp_cls_targets
=
[[[
1
,
0
,
0
,
0
],
[
1
,
0
,
0
,
0
]]]
exp_reg_weights
=
[[
0
,
0
]]
exp_match_0
=
[]
num_classes
=
3
pad
=
1
gt_class_targets
=
tf
.
zeros
((
0
,
num_classes
+
pad
))
gt_class_targets_batch
=
[
gt_class_targets
]
multiclass_target_assigner
=
self
.
_get_multi_class_target_assigner
(
num_classes
=
3
)
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match_list
)
=
targetassigner
.
batch_assign_targets
(
multiclass_target_assigner
,
priors
,
gt_box_batch
,
gt_class_targets_batch
)
self
.
assertTrue
(
isinstance
(
match_list
,
list
)
and
len
(
match_list
)
==
1
)
with
self
.
test_session
()
as
sess
:
(
cls_targets_out
,
cls_weights_out
,
reg_targets_out
,
reg_weights_out
,
match_out_0
)
=
sess
.
run
([
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
]
+
[
match
.
matched_column_indices
()
for
match
in
match_list
])
self
.
assertAllClose
(
cls_targets_out
,
exp_cls_targets
)
self
.
assertAllClose
(
cls_weights_out
,
exp_cls_weights
)
self
.
assertAllClose
(
reg_targets_out
,
exp_reg_targets
)
self
.
assertAllClose
(
reg_weights_out
,
exp_reg_weights
)
self
.
assertAllClose
(
match_out_0
,
exp_match_0
)
class
CreateTargetAssignerTest
(
tf
.
test
.
TestCase
):
def
test_create_target_assigner
(
self
):
"""Tests that named constructor gives working target assigners.
TODO: Make this test more general.
"""
corners
=
[[
0.0
,
0.0
,
1.0
,
1.0
]]
groundtruth
=
box_list
.
BoxList
(
tf
.
constant
(
corners
))
priors
=
box_list
.
BoxList
(
tf
.
constant
(
corners
))
prior_stddevs
=
tf
.
constant
([[
1.0
,
1.0
,
1.0
,
1.0
]])
priors
.
add_field
(
'stddev'
,
prior_stddevs
)
multibox_ta
=
(
targetassigner
.
create_target_assigner
(
'Multibox'
,
stage
=
'proposal'
))
multibox_ta
.
assign
(
priors
,
groundtruth
)
# No tests on output, as that may vary arbitrarily as new target assigners
# are added. As long as it is constructed correctly and runs without errors,
# tests on the individual assigners cover correctness of the assignments.
anchors
=
box_list
.
BoxList
(
tf
.
constant
(
corners
))
faster_rcnn_proposals_ta
=
(
targetassigner
.
create_target_assigner
(
'FasterRCNN'
,
stage
=
'proposal'
))
faster_rcnn_proposals_ta
.
assign
(
anchors
,
groundtruth
)
fast_rcnn_ta
=
(
targetassigner
.
create_target_assigner
(
'FastRCNN'
))
fast_rcnn_ta
.
assign
(
anchors
,
groundtruth
)
faster_rcnn_detection_ta
=
(
targetassigner
.
create_target_assigner
(
'FasterRCNN'
,
stage
=
'detection'
))
faster_rcnn_detection_ta
.
assign
(
anchors
,
groundtruth
)
with
self
.
assertRaises
(
ValueError
):
targetassigner
.
create_target_assigner
(
'InvalidDetector'
,
stage
=
'invalid_stage'
)
if
__name__
==
'__main__'
:
tf
.
test
.
main
()
object_detection/create_pascal_tf_record.py
0 → 100644
View file @
44fa1d37
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r
"""Convert raw PASCAL dataset to TFRecord for object_detection.
Example usage:
./create_pascal_tf_record --data_dir=/home/user/VOCdevkit \
--year=VOC2012 \
--output_path=/home/user/pascal.record
"""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
hashlib
import
io
import
logging
import
os
from
lxml
import
etree
import
PIL.Image
import
tensorflow
as
tf
from
object_detection.utils
import
dataset_util
from
object_detection.utils
import
label_map_util
flags
=
tf
.
app
.
flags
flags
.
DEFINE_string
(
'data_dir'
,
''
,
'Root directory to raw PASCAL VOC dataset.'
)
flags
.
DEFINE_string
(
'set'
,
'train'
,
'Convert training set, validation set or '
'merged set.'
)
flags
.
DEFINE_string
(
'annotations_dir'
,
'Annotations'
,
'(Relative) path to annotations directory.'
)
flags
.
DEFINE_string
(
'year'
,
'VOC2007'
,
'Desired challenge year.'
)
flags
.
DEFINE_string
(
'output_path'
,
''
,
'Path to output TFRecord'
)
flags
.
DEFINE_string
(
'label_map_path'
,
'data/pascal_label_map.pbtxt'
,
'Path to label map proto'
)
flags
.
DEFINE_boolean
(
'ignore_difficult_instances'
,
False
,
'Whether to ignore '
'difficult instances'
)
FLAGS
=
flags
.
FLAGS
SETS
=
[
'train'
,
'val'
,
'trainval'
,
'test'
]
YEARS
=
[
'VOC2007'
,
'VOC2012'
,
'merged'
]
def
dict_to_tf_example
(
data
,
dataset_directory
,
label_map_dict
,
ignore_difficult_instances
=
False
,
image_subdirectory
=
'JPEGImages'
):
"""Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided
by the raw data.
Args:
data: dict holding PASCAL XML fields for a single image (obtained by
running dataset_util.recursive_parse_xml_to_dict)
dataset_directory: Path to root directory holding PASCAL dataset
label_map_dict: A map from string label names to integers ids.
ignore_difficult_instances: Whether to skip difficult instances in the
dataset (default: False).
image_subdirectory: String specifying subdirectory within the
PASCAL dataset directory holding the actual image data.
Returns:
example: The converted tf.Example.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
img_path
=
os
.
path
.
join
(
data
[
'folder'
],
image_subdirectory
,
data
[
'filename'
])
full_path
=
os
.
path
.
join
(
dataset_directory
,
img_path
)
with
tf
.
gfile
.
GFile
(
full_path
,
'rb'
)
as
fid
:
encoded_jpg
=
fid
.
read
()
encoded_jpg_io
=
io
.
BytesIO
(
encoded_jpg
)
image
=
PIL
.
Image
.
open
(
encoded_jpg_io
)
if
image
.
format
!=
'JPEG'
:
raise
ValueError
(
'Image format not JPEG'
)
key
=
hashlib
.
sha256
(
encoded_jpg
).
hexdigest
()
width
=
int
(
data
[
'size'
][
'width'
])
height
=
int
(
data
[
'size'
][
'height'
])
xmin
=
[]
ymin
=
[]
xmax
=
[]
ymax
=
[]
classes
=
[]
classes_text
=
[]
truncated
=
[]
poses
=
[]
difficult_obj
=
[]
for
obj
in
data
[
'object'
]:
difficult
=
bool
(
int
(
obj
[
'difficult'
]))
if
ignore_difficult_instances
and
difficult
:
continue
difficult_obj
.
append
(
int
(
difficult
))
xmin
.
append
(
float
(
obj
[
'bndbox'
][
'xmin'
])
/
width
)
ymin
.
append
(
float
(
obj
[
'bndbox'
][
'ymin'
])
/
height
)
xmax
.
append
(
float
(
obj
[
'bndbox'
][
'xmax'
])
/
width
)
ymax
.
append
(
float
(
obj
[
'bndbox'
][
'ymax'
])
/
height
)
classes_text
.
append
(
obj
[
'name'
].
encode
(
'utf8'
))
classes
.
append
(
label_map_dict
[
obj
[
'name'
]])
truncated
.
append
(
int
(
obj
[
'truncated'
]))
poses
.
append
(
obj
[
'pose'
].
encode
(
'utf8'
))
example
=
tf
.
train
.
Example
(
features
=
tf
.
train
.
Features
(
feature
=
{
'image/height'
:
dataset_util
.
int64_feature
(
height
),
'image/width'
:
dataset_util
.
int64_feature
(
width
),
'image/filename'
:
dataset_util
.
bytes_feature
(
data
[
'filename'
].
encode
(
'utf8'
)),
'image/source_id'
:
dataset_util
.
bytes_feature
(
data
[
'filename'
].
encode
(
'utf8'
)),
'image/key/sha256'
:
dataset_util
.
bytes_feature
(
key
.
encode
(
'utf8'
)),
'image/encoded'
:
dataset_util
.
bytes_feature
(
encoded_jpg
),
'image/format'
:
dataset_util
.
bytes_feature
(
'jpeg'
.
encode
(
'utf8'
)),
'image/object/bbox/xmin'
:
dataset_util
.
float_list_feature
(
xmin
),
'image/object/bbox/xmax'
:
dataset_util
.
float_list_feature
(
xmax
),
'image/object/bbox/ymin'
:
dataset_util
.
float_list_feature
(
ymin
),
'image/object/bbox/ymax'
:
dataset_util
.
float_list_feature
(
ymax
),
'image/object/class/text'
:
dataset_util
.
bytes_list_feature
(
classes_text
),
'image/object/class/label'
:
dataset_util
.
int64_list_feature
(
classes
),
'image/object/difficult'
:
dataset_util
.
int64_list_feature
(
difficult_obj
),
'image/object/truncated'
:
dataset_util
.
int64_list_feature
(
truncated
),
'image/object/view'
:
dataset_util
.
bytes_list_feature
(
poses
),
}))
return
example
def
main
(
_
):
if
FLAGS
.
set
not
in
SETS
:
raise
ValueError
(
'set must be in : {}'
.
format
(
SETS
))
if
FLAGS
.
year
not
in
YEARS
:
raise
ValueError
(
'year must be in : {}'
.
format
(
YEARS
))
data_dir
=
FLAGS
.
data_dir
years
=
[
'VOC2007'
,
'VOC2012'
]
if
FLAGS
.
year
!=
'merged'
:
years
=
[
FLAGS
.
year
]
writer
=
tf
.
python_io
.
TFRecordWriter
(
FLAGS
.
output_path
)
label_map_dict
=
label_map_util
.
get_label_map_dict
(
FLAGS
.
label_map_path
)
for
year
in
years
:
logging
.
info
(
'Reading from PASCAL %s dataset.'
,
year
)
examples_path
=
os
.
path
.
join
(
data_dir
,
year
,
'ImageSets'
,
'Main'
,
'aeroplane_'
+
FLAGS
.
set
+
'.txt'
)
annotations_dir
=
os
.
path
.
join
(
data_dir
,
year
,
FLAGS
.
annotations_dir
)
examples_list
=
dataset_util
.
read_examples_list
(
examples_path
)
for
idx
,
example
in
enumerate
(
examples_list
):
if
idx
%
100
==
0
:
logging
.
info
(
'On image %d of %d'
,
idx
,
len
(
examples_list
))
path
=
os
.
path
.
join
(
annotations_dir
,
example
+
'.xml'
)
with
tf
.
gfile
.
GFile
(
path
,
'r'
)
as
fid
:
xml_str
=
fid
.
read
()
xml
=
etree
.
fromstring
(
xml_str
)
data
=
dataset_util
.
recursive_parse_xml_to_dict
(
xml
)[
'annotation'
]
tf_example
=
dict_to_tf_example
(
data
,
FLAGS
.
data_dir
,
label_map_dict
,
FLAGS
.
ignore_difficult_instances
)
writer
.
write
(
tf_example
.
SerializeToString
())
writer
.
close
()
if
__name__
==
'__main__'
:
tf
.
app
.
run
()
Prev
1
…
9
10
11
12
13
14
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment