Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
d0d91e12
Commit
d0d91e12
authored
Apr 15, 2022
by
huchen
Browse files
Merge branch 'tf2' into 'main'
tf2 detection See merge request dcutoolkit/deeplearing/dlexamples_new!2
parents
2795dc1f
c320b6ef
Changes
195
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
4590 additions
and
0 deletions
+4590
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/argmax_matcher.py
...ion/MaskRCNN/mask_rcnn/object_detection/argmax_matcher.py
+202
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/balanced_positive_negative_sampler.py
...nn/object_detection/balanced_positive_negative_sampler.py
+269
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/box_coder.py
...etection/MaskRCNN/mask_rcnn/object_detection/box_coder.py
+157
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/box_list.py
...Detection/MaskRCNN/mask_rcnn/object_detection/box_list.py
+213
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/faster_rcnn_box_coder.py
...kRCNN/mask_rcnn/object_detection/faster_rcnn_box_coder.py
+125
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/matcher.py
.../Detection/MaskRCNN/mask_rcnn/object_detection/matcher.py
+244
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/minibatch_sampler.py
.../MaskRCNN/mask_rcnn/object_detection/minibatch_sampler.py
+95
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/ops.py
...sion/Detection/MaskRCNN/mask_rcnn/object_detection/ops.py
+84
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/preprocessor.py
...ction/MaskRCNN/mask_rcnn/object_detection/preprocessor.py
+444
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/region_similarity_calculator.py
...ask_rcnn/object_detection/region_similarity_calculator.py
+138
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/shape_utils.py
...ection/MaskRCNN/mask_rcnn/object_detection/shape_utils.py
+86
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/target_assigner.py
...on/MaskRCNN/mask_rcnn/object_detection/target_assigner.py
+308
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/tf_example_decoder.py
...MaskRCNN/mask_rcnn/object_detection/tf_example_decoder.py
+153
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/visualization_utils.py
...askRCNN/mask_rcnn/object_detection/visualization_utils.py
+417
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/__init__.py
...omputeVision/Detection/MaskRCNN/mask_rcnn/ops/__init__.py
+0
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/box_utils.py
...mputeVision/Detection/MaskRCNN/mask_rcnn/ops/box_utils.py
+503
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/nms_ops.py
...ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/nms_ops.py
+209
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/postprocess_ops.py
...ision/Detection/MaskRCNN/mask_rcnn/ops/postprocess_ops.py
+297
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/preprocess_ops.py
...Vision/Detection/MaskRCNN/mask_rcnn/ops/preprocess_ops.py
+206
-0
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/roi_ops.py
...ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/roi_ops.py
+440
-0
No files found.
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/argmax_matcher.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Argmax matcher implementation.
This class takes a similarity matrix and matches columns to rows based on the
maximum value per column. One can specify matched_thresholds and
to prevent columns from matching to rows (generally resulting in a negative
training example) and unmatched_theshold to ignore the match (generally
resulting in neither a positive or negative training example).
This matcher is used in Fast(er)-RCNN.
Note: matchers are used in TargetAssigners. There is a create_target_assigner
factory function for popular implementations.
"""
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
matcher
from
mask_rcnn.object_detection
import
shape_utils
class
ArgMaxMatcher
(
matcher
.
Matcher
):
"""Matcher based on highest value.
This class computes matches from a similarity matrix. Each column is matched
to a single row.
To support object detection target assignment this class enables setting both
matched_threshold (upper threshold) and unmatched_threshold (lower thresholds)
defining three categories of similarity which define whether examples are
positive, negative, or ignored:
(1) similarity >= matched_threshold: Highest similarity. Matched/Positive!
(2) matched_threshold > similarity >= unmatched_threshold: Medium similarity.
Depending on negatives_lower_than_unmatched, this is either
Unmatched/Negative OR Ignore.
(3) unmatched_threshold > similarity: Lowest similarity. Depending on flag
negatives_lower_than_unmatched, either Unmatched/Negative OR Ignore.
For ignored matches this class sets the values in the Match object to -2.
"""
def
__init__
(
self
,
matched_threshold
,
unmatched_threshold
=
None
,
negatives_lower_than_unmatched
=
True
,
force_match_for_each_row
=
False
):
"""Construct ArgMaxMatcher.
Args:
matched_threshold: Threshold for positive matches. Positive if
sim >= matched_threshold, where sim is the maximum value of the
similarity matrix for a given column. Set to None for no threshold.
unmatched_threshold: Threshold for negative matches. Negative if
sim < unmatched_threshold. Defaults to matched_threshold
when set to None.
negatives_lower_than_unmatched: Boolean which defaults to True. If True
then negative matches are the ones below the unmatched_threshold,
whereas ignored matches are in between the matched and umatched
threshold. If False, then negative matches are in between the matched
and unmatched threshold, and everything lower than unmatched is ignored.
force_match_for_each_row: If True, ensures that each row is matched to
at least one column (which is not guaranteed otherwise if the
matched_threshold is high). Defaults to False. See
argmax_matcher_test.testMatcherForceMatch() for an example.
Raises:
ValueError: if unmatched_threshold is set but matched_threshold is not set
or if unmatched_threshold > matched_threshold.
"""
if
(
matched_threshold
is
None
)
and
(
unmatched_threshold
is
not
None
):
raise
ValueError
(
'Need to also define matched_threshold when'
'unmatched_threshold is defined'
)
self
.
_matched_threshold
=
matched_threshold
if
unmatched_threshold
is
None
:
self
.
_unmatched_threshold
=
matched_threshold
else
:
if
unmatched_threshold
>
matched_threshold
:
raise
ValueError
(
'unmatched_threshold needs to be smaller or equal'
'to matched_threshold'
)
self
.
_unmatched_threshold
=
unmatched_threshold
if
not
negatives_lower_than_unmatched
:
if
self
.
_unmatched_threshold
==
self
.
_matched_threshold
:
raise
ValueError
(
'When negatives are in between matched and '
'unmatched thresholds, these cannot be of equal '
'value. matched: %s, unmatched: %s'
,
self
.
_matched_threshold
,
self
.
_unmatched_threshold
)
self
.
_force_match_for_each_row
=
force_match_for_each_row
self
.
_negatives_lower_than_unmatched
=
negatives_lower_than_unmatched
def
_match
(
self
,
similarity_matrix
):
"""Tries to match each column of the similarity matrix to a row.
Args:
similarity_matrix: tensor of shape [N, M] representing any similarity
metric.
Returns:
Match object with corresponding matches for each of M columns.
"""
def
_match_when_rows_are_empty
():
"""Performs matching when the rows of similarity matrix are empty.
When the rows are empty, all detections are false positives. So we return
a tensor of -1's to indicate that the columns do not match to any rows.
Returns:
matches: int32 tensor indicating the row each column matches to.
"""
similarity_matrix_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
similarity_matrix
)
return
-
1
*
tf
.
ones
([
similarity_matrix_shape
[
1
]],
dtype
=
tf
.
int32
)
def
_match_when_rows_are_non_empty
():
"""Performs matching when the rows of similarity matrix are non empty.
Returns:
matches: int32 tensor indicating the row each column matches to.
"""
# Matches for each column
matches
=
tf
.
argmax
(
input
=
similarity_matrix
,
axis
=
0
,
output_type
=
tf
.
int32
)
# Deal with matched and unmatched threshold
if
self
.
_matched_threshold
is
not
None
:
# Get logical indices of ignored and unmatched columns as tf.int64
matched_vals
=
tf
.
reduce_max
(
input_tensor
=
similarity_matrix
,
axis
=
0
)
below_unmatched_threshold
=
tf
.
greater
(
self
.
_unmatched_threshold
,
matched_vals
)
between_thresholds
=
tf
.
logical_and
(
tf
.
greater_equal
(
matched_vals
,
self
.
_unmatched_threshold
),
tf
.
greater
(
self
.
_matched_threshold
,
matched_vals
))
if
self
.
_negatives_lower_than_unmatched
:
matches
=
self
.
_set_values_using_indicator
(
matches
,
below_unmatched_threshold
,
-
1
)
matches
=
self
.
_set_values_using_indicator
(
matches
,
between_thresholds
,
-
2
)
else
:
matches
=
self
.
_set_values_using_indicator
(
matches
,
below_unmatched_threshold
,
-
2
)
matches
=
self
.
_set_values_using_indicator
(
matches
,
between_thresholds
,
-
1
)
if
self
.
_force_match_for_each_row
:
similarity_matrix_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
similarity_matrix
)
force_match_column_ids
=
tf
.
argmax
(
input
=
similarity_matrix
,
axis
=
1
,
output_type
=
tf
.
int32
)
force_match_column_indicators
=
tf
.
one_hot
(
force_match_column_ids
,
depth
=
similarity_matrix_shape
[
1
])
force_match_row_ids
=
tf
.
argmax
(
input
=
force_match_column_indicators
,
axis
=
0
,
output_type
=
tf
.
int32
)
force_match_column_mask
=
tf
.
cast
(
tf
.
reduce_max
(
input_tensor
=
force_match_column_indicators
,
axis
=
0
),
tf
.
bool
)
final_matches
=
tf
.
where
(
force_match_column_mask
,
force_match_row_ids
,
matches
)
return
final_matches
else
:
return
matches
if
similarity_matrix
.
shape
.
is_fully_defined
():
if
similarity_matrix
.
shape
[
0
].
value
==
0
:
return
_match_when_rows_are_empty
()
else
:
return
_match_when_rows_are_non_empty
()
else
:
return
tf
.
cond
(
pred
=
tf
.
greater
(
tf
.
shape
(
input
=
similarity_matrix
)[
0
],
0
),
true_fn
=
_match_when_rows_are_non_empty
,
false_fn
=
_match_when_rows_are_empty
)
def
_set_values_using_indicator
(
self
,
x
,
indicator
,
val
):
"""Set the indicated fields of x to val.
Args:
x: tensor.
indicator: boolean with same shape as x.
val: scalar with value to set.
Returns:
modified tensor.
"""
indicator
=
tf
.
cast
(
indicator
,
x
.
dtype
)
return
tf
.
add
(
tf
.
multiply
(
x
,
1
-
indicator
),
val
*
indicator
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/balanced_positive_negative_sampler.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Class to subsample minibatches by balancing positives and negatives.
Subsamples minibatches based on a pre-specified positive fraction in range
[0,1]. The class presumes there are many more negatives than positive examples:
if the desired batch_size cannot be achieved with the pre-specified positive
fraction, it fills the rest with negative examples. If this is not sufficient
for obtaining the desired batch_size, it returns fewer examples.
The main function to call is Subsample(self, indicator, labels). For convenience
one can also call SubsampleWeights(self, weights, labels) which is defined in
the minibatch_sampler base class.
When is_static is True, it implements a method that guarantees static shapes.
It also ensures the length of output of the subsample is always batch_size, even
when number of examples set to True in indicator is less than batch_size.
This is originally implemented in TensorFlow Object Detection API.
"""
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
minibatch_sampler
from
mask_rcnn.object_detection
import
ops
class
BalancedPositiveNegativeSampler
(
minibatch_sampler
.
MinibatchSampler
):
"""Subsamples minibatches to a desired balance of positives and negatives."""
def
__init__
(
self
,
positive_fraction
=
0.5
,
is_static
=
False
):
"""Constructs a minibatch sampler.
Args:
positive_fraction: desired fraction of positive examples (scalar in [0,1])
in the batch.
is_static: If True, uses an implementation with static shape guarantees.
Raises:
ValueError: if positive_fraction < 0, or positive_fraction > 1
"""
if
positive_fraction
<
0
or
positive_fraction
>
1
:
raise
ValueError
(
'positive_fraction should be in range [0,1]. '
'Received: %s.'
%
positive_fraction
)
self
.
_positive_fraction
=
positive_fraction
self
.
_is_static
=
is_static
def
_get_num_pos_neg_samples
(
self
,
sorted_indices_tensor
,
sample_size
):
"""Counts the number of positives and negatives numbers to be sampled.
Args:
sorted_indices_tensor: A sorted int32 tensor of shape [N] which contains
the signed indices of the examples where the sign is based on the label
value. The examples that cannot be sampled are set to 0. It samples
atmost sample_size*positive_fraction positive examples and remaining
from negative examples.
sample_size: Size of subsamples.
Returns:
A tuple containing the number of positive and negative labels in the
subsample.
"""
input_length
=
tf
.
shape
(
input
=
sorted_indices_tensor
)[
0
]
valid_positive_index
=
tf
.
greater
(
sorted_indices_tensor
,
tf
.
zeros
(
input_length
,
tf
.
int32
))
num_sampled_pos
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
valid_positive_index
,
tf
.
int32
))
max_num_positive_samples
=
tf
.
constant
(
int
(
sample_size
*
self
.
_positive_fraction
),
tf
.
int32
)
num_positive_samples
=
tf
.
minimum
(
max_num_positive_samples
,
num_sampled_pos
)
num_negative_samples
=
tf
.
constant
(
sample_size
,
tf
.
int32
)
-
num_positive_samples
return
num_positive_samples
,
num_negative_samples
def
_get_values_from_start_and_end
(
self
,
input_tensor
,
num_start_samples
,
num_end_samples
,
total_num_samples
):
"""slices num_start_samples and last num_end_samples from input_tensor.
Args:
input_tensor: An int32 tensor of shape [N] to be sliced.
num_start_samples: Number of examples to be sliced from the beginning
of the input tensor.
num_end_samples: Number of examples to be sliced from the end of the
input tensor.
total_num_samples: Sum of is num_start_samples and num_end_samples. This
should be a scalar.
Returns:
A tensor containing the first num_start_samples and last num_end_samples
from input_tensor.
"""
input_length
=
tf
.
shape
(
input
=
input_tensor
)[
0
]
start_positions
=
tf
.
less
(
tf
.
range
(
input_length
),
num_start_samples
)
end_positions
=
tf
.
greater_equal
(
tf
.
range
(
input_length
),
input_length
-
num_end_samples
)
selected_positions
=
tf
.
logical_or
(
start_positions
,
end_positions
)
selected_positions
=
tf
.
cast
(
selected_positions
,
tf
.
float32
)
indexed_positions
=
tf
.
multiply
(
tf
.
cumsum
(
selected_positions
),
selected_positions
)
one_hot_selector
=
tf
.
one_hot
(
tf
.
cast
(
indexed_positions
,
tf
.
int32
)
-
1
,
total_num_samples
,
dtype
=
tf
.
float32
)
return
tf
.
cast
(
tf
.
tensordot
(
tf
.
cast
(
input_tensor
,
tf
.
float32
),
one_hot_selector
,
axes
=
[
0
,
0
]),
tf
.
int32
)
def
_static_subsample
(
self
,
indicator
,
batch_size
,
labels
):
"""Returns subsampled minibatch.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
N should be a complie time constant.
batch_size: desired batch size. This scalar cannot be None.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples. N should be a complie time constant.
Returns:
sampled_idx_indicator: boolean tensor of shape [N], True for entries which
are sampled. It ensures the length of output of the subsample is always
batch_size, even when number of examples set to True in indicator is
less than batch_size.
Raises:
ValueError: if labels and indicator are not 1D boolean tensors.
"""
# Check if indicator and labels have a static size.
if
not
indicator
.
shape
.
is_fully_defined
():
raise
ValueError
(
'indicator must be static in shape when is_static is'
'True'
)
if
not
labels
.
shape
.
is_fully_defined
():
raise
ValueError
(
'labels must be static in shape when is_static is'
'True'
)
if
not
isinstance
(
batch_size
,
int
):
raise
ValueError
(
'batch_size has to be an integer when is_static is'
'True.'
)
input_length
=
tf
.
shape
(
input
=
indicator
)[
0
]
# Set the number of examples set True in indicator to be at least
# batch_size.
num_true_sampled
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
indicator
,
tf
.
float32
))
additional_false_sample
=
tf
.
less_equal
(
tf
.
cumsum
(
tf
.
cast
(
tf
.
logical_not
(
indicator
),
tf
.
float32
)),
batch_size
-
num_true_sampled
)
indicator
=
tf
.
logical_or
(
indicator
,
additional_false_sample
)
# Shuffle indicator and label. Need to store the permutation to restore the
# order post sampling.
permutation
=
tf
.
random
.
shuffle
(
tf
.
range
(
input_length
))
indicator
=
ops
.
matmul_gather_on_zeroth_axis
(
tf
.
cast
(
indicator
,
tf
.
float32
),
permutation
)
labels
=
ops
.
matmul_gather_on_zeroth_axis
(
tf
.
cast
(
labels
,
tf
.
float32
),
permutation
)
# index (starting from 1) when indicator is True, 0 when False
indicator_idx
=
tf
.
where
(
tf
.
cast
(
indicator
,
tf
.
bool
),
tf
.
range
(
1
,
input_length
+
1
),
tf
.
zeros
(
input_length
,
tf
.
int32
))
# Replace -1 for negative, +1 for positive labels
signed_label
=
tf
.
where
(
tf
.
cast
(
labels
,
tf
.
bool
),
tf
.
ones
(
input_length
,
tf
.
int32
),
tf
.
scalar_mul
(
-
1
,
tf
.
ones
(
input_length
,
tf
.
int32
)))
# negative of index for negative label, positive index for positive label,
# 0 when indicator is False.
signed_indicator_idx
=
tf
.
multiply
(
indicator_idx
,
signed_label
)
sorted_signed_indicator_idx
=
tf
.
nn
.
top_k
(
signed_indicator_idx
,
input_length
,
sorted
=
True
).
values
[
num_positive_samples
,
num_negative_samples
]
=
self
.
_get_num_pos_neg_samples
(
sorted_signed_indicator_idx
,
batch_size
)
sampled_idx
=
self
.
_get_values_from_start_and_end
(
sorted_signed_indicator_idx
,
num_positive_samples
,
num_negative_samples
,
batch_size
)
# Shift the indices to start from 0 and remove any samples that are set as
# False.
sampled_idx
=
tf
.
abs
(
sampled_idx
)
-
tf
.
ones
(
batch_size
,
tf
.
int32
)
sampled_idx
=
tf
.
multiply
(
tf
.
cast
(
tf
.
greater_equal
(
sampled_idx
,
tf
.
constant
(
0
)),
tf
.
int32
),
sampled_idx
)
sampled_idx_indicator
=
tf
.
cast
(
tf
.
reduce_sum
(
input_tensor
=
tf
.
one_hot
(
sampled_idx
,
depth
=
input_length
),
axis
=
0
),
tf
.
bool
)
# project back the order based on stored permutations
reprojections
=
tf
.
one_hot
(
permutation
,
depth
=
input_length
,
dtype
=
tf
.
float32
)
return
tf
.
cast
(
tf
.
tensordot
(
tf
.
cast
(
sampled_idx_indicator
,
tf
.
float32
),
reprojections
,
axes
=
[
0
,
0
]),
tf
.
bool
)
def
subsample
(
self
,
indicator
,
batch_size
,
labels
,
scope
=
None
):
"""Returns subsampled minibatch.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size. If None, keeps all positive samples and
randomly selects negative samples so that the positive sample fraction
matches self._positive_fraction. It cannot be None is is_static is True.
labels: boolean tensor of shape [N] denoting positive(=True) and negative
(=False) examples.
scope: name scope.
Returns:
sampled_idx_indicator: boolean tensor of shape [N], True for entries which
are sampled.
Raises:
ValueError: if labels and indicator are not 1D boolean tensors.
"""
if
len
(
indicator
.
get_shape
().
as_list
())
!=
1
:
raise
ValueError
(
'indicator must be 1 dimensional, got a tensor of '
'shape %s'
%
indicator
.
get_shape
())
if
len
(
labels
.
get_shape
().
as_list
())
!=
1
:
raise
ValueError
(
'labels must be 1 dimensional, got a tensor of '
'shape %s'
%
labels
.
get_shape
())
if
labels
.
dtype
!=
tf
.
bool
:
raise
ValueError
(
'labels should be of type bool. Received: %s'
%
labels
.
dtype
)
if
indicator
.
dtype
!=
tf
.
bool
:
raise
ValueError
(
'indicator should be of type bool. Received: %s'
%
indicator
.
dtype
)
if
self
.
_is_static
:
return
self
.
_static_subsample
(
indicator
,
batch_size
,
labels
)
else
:
# Only sample from indicated samples
negative_idx
=
tf
.
logical_not
(
labels
)
positive_idx
=
tf
.
logical_and
(
labels
,
indicator
)
negative_idx
=
tf
.
logical_and
(
negative_idx
,
indicator
)
# Sample positive and negative samples separately
if
batch_size
is
None
:
max_num_pos
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
positive_idx
,
dtype
=
tf
.
int32
))
else
:
max_num_pos
=
int
(
self
.
_positive_fraction
*
batch_size
)
sampled_pos_idx
=
self
.
subsample_indicator
(
positive_idx
,
max_num_pos
)
num_sampled_pos
=
tf
.
reduce_sum
(
input_tensor
=
tf
.
cast
(
sampled_pos_idx
,
tf
.
int32
))
if
batch_size
is
None
:
negative_positive_ratio
=
(
1
-
self
.
_positive_fraction
)
/
self
.
_positive_fraction
max_num_neg
=
tf
.
cast
(
negative_positive_ratio
*
tf
.
cast
(
num_sampled_pos
,
dtype
=
tf
.
float32
),
dtype
=
tf
.
int32
)
else
:
max_num_neg
=
batch_size
-
num_sampled_pos
sampled_neg_idx
=
self
.
subsample_indicator
(
negative_idx
,
max_num_neg
)
return
tf
.
logical_or
(
sampled_pos_idx
,
sampled_neg_idx
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/box_coder.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base box coder.
Box coders convert between coordinate frames, namely image-centric
(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
defined by a specific anchor).
Users of a BoxCoder can call two methods:
encode: which encodes a box with respect to a given anchor
(or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
decode: which inverts this encoding with a decode operation.
In both cases, the arguments are assumed to be in 1-1 correspondence already;
it is not the job of a BoxCoder to perform matching.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
from
abc
import
abstractproperty
import
tensorflow
as
tf
# Box coder types.
FASTER_RCNN
=
'faster_rcnn'
KEYPOINT
=
'keypoint'
MEAN_STDDEV
=
'mean_stddev'
SQUARE
=
'square'
class
BoxCoder
(
object
):
"""Abstract base class for box coder."""
__metaclass__
=
ABCMeta
@
abstractproperty
def
code_size
(
self
):
"""Return the size of each code.
This number is a constant and should agree with the output of the `encode`
op (e.g. if rel_codes is the output of self.encode(...), then it should have
shape [N, code_size()]). This abstractproperty should be overridden by
implementations.
Returns:
an integer constant
"""
pass
def
encode
(
self
,
boxes
,
anchors
):
"""Encode a box list relative to an anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
return
self
.
_encode
(
boxes
,
anchors
)
def
decode
(
self
,
rel_codes
,
anchors
):
"""Decode boxes that are encoded relative to an anchor collection.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
return
self
.
_decode
(
rel_codes
,
anchors
)
@
abstractmethod
def
_encode
(
self
,
boxes
,
anchors
):
"""Method to be overriden by implementations.
Args:
boxes: BoxList holding N boxes to be encoded
anchors: BoxList of N anchors
Returns:
a tensor representing N relative-encoded boxes
"""
pass
@
abstractmethod
def
_decode
(
self
,
rel_codes
,
anchors
):
"""Method to be overriden by implementations.
Args:
rel_codes: a tensor representing N relative-encoded boxes
anchors: BoxList of anchors
Returns:
boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
with corners y_min, x_min, y_max, x_max)
"""
pass
def
batch_decode
(
encoded_boxes
,
box_coder
,
anchors
):
"""Decode a batch of encoded boxes.
This op takes a batch of encoded bounding boxes and transforms
them to a batch of bounding boxes specified by their corners in
the order of [y_min, x_min, y_max, x_max].
Args:
encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
code_size] representing the location of the objects.
box_coder: a BoxCoder object.
anchors: a BoxList of anchors used to encode `encoded_boxes`.
Returns:
decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
coder_size] representing the corners of the objects in the order
of [y_min, x_min, y_max, x_max].
Raises:
ValueError: if batch sizes of the inputs are inconsistent, or if
the number of anchors inferred from encoded_boxes and anchors are
inconsistent.
"""
if
encoded_boxes
.
get_shape
()[
1
].
value
!=
anchors
.
num_boxes_static
():
raise
ValueError
(
'The number of anchors inferred from encoded_boxes'
' and anchors are inconsistent: shape[1] of encoded_boxes'
' %s should be equal to the number of anchors: %s.'
%
(
encoded_boxes
.
get_shape
()[
1
].
value
,
anchors
.
num_boxes_static
()
)
)
decoded_boxes
=
tf
.
stack
([
box_coder
.
decode
(
boxes
,
anchors
).
get
()
for
boxes
in
tf
.
unstack
(
encoded_boxes
)
])
return
decoded_boxes
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/box_list.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Bounding Box List definition.
BoxList represents a list of bounding boxes as tensorflow
tensors, where each bounding box is represented as a row of 4 numbers,
[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes
within a given list correspond to a single image. See also
box_list_ops.py for common box related operations (such as area, iou, etc).
Optionally, users can add additional related fields (such as weights).
We assume the following things to be true about fields:
* they correspond to boxes in the box_list along the 0th dimension
* they have inferrable rank at graph construction time
* all dimensions except for possibly the 0th can be inferred
(i.e., not None) at graph construction time.
Some other notes:
* Following tensorflow conventions, we use height, width ordering,
and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
* Tensors are always provided as (flat) [N, 4] tensors.
"""
import
tensorflow
as
tf
class
BoxList
(
object
):
"""Box collection."""
def
__init__
(
self
,
boxes
):
"""Constructs box collection.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data or if bbox data is not in
float32 format.
"""
if
len
(
boxes
.
get_shape
())
!=
2
or
boxes
.
get_shape
()[
-
1
]
!=
4
:
raise
ValueError
(
'Invalid dimensions for box data.'
)
if
boxes
.
dtype
!=
tf
.
float32
:
raise
ValueError
(
'Invalid tensor type: should be tf.float32'
)
self
.
data
=
{
'boxes'
:
boxes
}
def
num_boxes
(
self
):
"""Returns number of boxes held in collection.
Returns:
a tensor representing the number of boxes held in the collection.
"""
return
tf
.
shape
(
input
=
self
.
data
[
'boxes'
])[
0
]
def
num_boxes_static
(
self
):
"""Returns number of boxes held in collection.
This number is inferred at graph construction time rather than run-time.
Returns:
Number of boxes held in collection (integer) or None if this is not
inferrable at graph construction time.
"""
try
:
return
self
.
data
[
'boxes'
].
get_shape
()[
0
].
value
except
AttributeError
:
return
self
.
data
[
'boxes'
].
get_shape
()[
0
]
def
get_all_fields
(
self
):
"""Returns all fields."""
return
self
.
data
.
keys
()
def
get_extra_fields
(
self
):
"""Returns all non-box fields (i.e., everything not named 'boxes')."""
return
[
k
for
k
in
self
.
data
.
keys
()
if
k
!=
'boxes'
]
def
add_field
(
self
,
field
,
field_data
):
"""Add field to box list.
This method can be used to add related box data such as
weights/labels, etc.
Args:
field: a string key to access the data via `get`
field_data: a tensor containing the data to store in the BoxList
"""
self
.
data
[
field
]
=
field_data
def
has_field
(
self
,
field
):
return
field
in
self
.
data
def
get
(
self
):
"""Convenience function for accessing box coordinates.
Returns:
a tensor with shape [N, 4] representing box coordinates.
"""
return
self
.
get_field
(
'boxes'
)
def
set
(
self
,
boxes
):
"""Convenience function for setting box coordinates.
Args:
boxes: a tensor of shape [N, 4] representing box corners
Raises:
ValueError: if invalid dimensions for bbox data
"""
if
len
(
boxes
.
get_shape
())
!=
2
or
boxes
.
get_shape
()[
-
1
]
!=
4
:
raise
ValueError
(
'Invalid dimensions for box data.'
)
self
.
data
[
'boxes'
]
=
boxes
def
get_field
(
self
,
field
):
"""Accesses a box collection and associated fields.
This function returns specified field with object; if no field is specified,
it returns the box coordinates.
Args:
field: this optional string parameter can be used to specify
a related field to be accessed.
Returns:
a tensor representing the box collection or an associated field.
Raises:
ValueError: if invalid field
"""
if
not
self
.
has_field
(
field
):
raise
ValueError
(
'field '
+
str
(
field
)
+
' does not exist'
)
return
self
.
data
[
field
]
def
set_field
(
self
,
field
,
value
):
"""Sets the value of a field.
Updates the field of a box_list with a given value.
Args:
field: (string) name of the field to set value.
value: the value to assign to the field.
Raises:
ValueError: if the box_list does not have specified field.
"""
if
not
self
.
has_field
(
field
):
raise
ValueError
(
'field %s does not exist'
%
field
)
self
.
data
[
field
]
=
value
def
get_center_coordinates_and_sizes
(
self
,
scope
=
None
):
"""Computes the center coordinates, height and width of the boxes.
Args:
scope: name scope of the function.
Returns:
a list of 4 1-D tensors [ycenter, xcenter, height, width].
"""
box_corners
=
self
.
get
()
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
unstack
(
tf
.
transpose
(
a
=
box_corners
))
width
=
xmax
-
xmin
height
=
ymax
-
ymin
ycenter
=
ymin
+
height
/
2.
xcenter
=
xmin
+
width
/
2.
return
[
ycenter
,
xcenter
,
height
,
width
]
def
transpose_coordinates
(
self
,
scope
=
None
):
"""Transpose the coordinate representation in a boxlist.
Args:
scope: name scope of the function.
"""
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
self
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
self
.
set
(
tf
.
concat
([
x_min
,
y_min
,
x_max
,
y_max
],
1
))
def
as_tensor_dict
(
self
,
fields
=
None
):
"""Retrieves specified fields as a dictionary of tensors.
Args:
fields: (optional) list of fields to return in the dictionary.
If None (default), all fields are returned.
Returns:
tensor_dict: A dictionary of tensors specified by fields.
Raises:
ValueError: if specified field is not contained in boxlist.
"""
tensor_dict
=
{}
if
fields
is
None
:
fields
=
self
.
get_all_fields
()
for
field
in
fields
:
if
not
self
.
has_field
(
field
):
raise
ValueError
(
'boxlist must contain all specified fields'
)
tensor_dict
[
field
]
=
self
.
get_field
(
field
)
return
tensor_dict
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/faster_rcnn_box_coder.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Faster RCNN box coder.
Faster RCNN box coder follows the coding schema described below:
ty = (y - ya) / ha
tx = (x - xa) / wa
th = log(h / ha)
tw = log(w / wa)
where x, y, w, h denote the box's center coordinates, width and height
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
center, width and height respectively.
See http://arxiv.org/abs/1506.01497 for details.
"""
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
box_coder
from
mask_rcnn.object_detection
import
box_list
EPSILON
=
1e-8
class
FasterRcnnBoxCoder
(
box_coder
.
BoxCoder
):
"""Faster RCNN box coder."""
def
__init__
(
self
,
scale_factors
=
None
):
"""Constructor for FasterRcnnBoxCoder.
Args:
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
If set to None, does not perform scaling. For Faster RCNN,
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
"""
if
scale_factors
is
not
None
:
assert
len
(
scale_factors
)
==
4
assert
all
([
scalar
>
0
for
scalar
in
scale_factors
])
self
.
_scale_factors
=
scale_factors
@
property
def
code_size
(
self
):
return
4
def
_encode
(
self
,
boxes
,
anchors
):
"""Encode a box collection with respect to anchor collection.
Args:
boxes: BoxList holding N boxes to be encoded.
anchors: BoxList of anchors.
Returns:
a tensor representing N anchor-encoded boxes of the format
[ty, tx, th, tw].
"""
# Convert anchors to the center coordinate representation.
ycenter_a
,
xcenter_a
,
ha
,
wa
=
anchors
.
get_center_coordinates_and_sizes
()
ycenter
,
xcenter
,
h
,
w
=
boxes
.
get_center_coordinates_and_sizes
()
# Avoid NaN in division and log below.
ha
+=
EPSILON
wa
+=
EPSILON
h
+=
EPSILON
w
+=
EPSILON
tx
=
(
xcenter
-
xcenter_a
)
/
wa
ty
=
(
ycenter
-
ycenter_a
)
/
ha
tw
=
tf
.
math
.
log
(
w
/
wa
)
th
=
tf
.
math
.
log
(
h
/
ha
)
# Scales location targets as used in paper for joint training.
if
self
.
_scale_factors
:
ty
*=
self
.
_scale_factors
[
0
]
tx
*=
self
.
_scale_factors
[
1
]
th
*=
self
.
_scale_factors
[
2
]
tw
*=
self
.
_scale_factors
[
3
]
return
tf
.
transpose
(
a
=
tf
.
stack
([
ty
,
tx
,
th
,
tw
]))
def
_decode
(
self
,
rel_codes
,
anchors
):
"""Decode relative codes to boxes.
Args:
rel_codes: a tensor representing N anchor-encoded boxes.
anchors: BoxList of anchors.
Returns:
boxes: BoxList holding N bounding boxes.
"""
ycenter_a
,
xcenter_a
,
ha
,
wa
=
anchors
.
get_center_coordinates_and_sizes
()
ty
,
tx
,
th
,
tw
=
tf
.
unstack
(
tf
.
transpose
(
a
=
rel_codes
))
if
self
.
_scale_factors
:
ty
/=
self
.
_scale_factors
[
0
]
tx
/=
self
.
_scale_factors
[
1
]
th
/=
self
.
_scale_factors
[
2
]
tw
/=
self
.
_scale_factors
[
3
]
w
=
tf
.
exp
(
tw
)
*
wa
h
=
tf
.
exp
(
th
)
*
ha
ycenter
=
ty
*
ha
+
ycenter_a
xcenter
=
tx
*
wa
+
xcenter_a
ymin
=
ycenter
-
h
/
2.
xmin
=
xcenter
-
w
/
2.
ymax
=
ycenter
+
h
/
2.
xmax
=
xcenter
+
w
/
2.
return
box_list
.
BoxList
(
tf
.
transpose
(
a
=
tf
.
stack
([
ymin
,
xmin
,
ymax
,
xmax
])))
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/matcher.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Matcher interface and Match class.
This module defines the Matcher interface and the Match object. The job of the
matcher is to match row and column indices based on the similarity matrix and
other optional parameters. Each column is matched to at most one row. There
are three possibilities for the matching:
1) match: A column matches a row.
2) no_match: A column does not match any row.
3) ignore: A column that is neither 'match' nor no_match.
The ignore case is regularly encountered in object detection: when an anchor has
a relatively small overlap with a ground-truth box, one neither wants to
consider this box a positive example (match) nor a negative example (no match).
The Match class is used to store the match results and it provides simple apis
to query the results.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
class
Match
(
object
):
"""Class to store results from the matcher.
This class is used to store the results from the matcher. It provides
convenient methods to query the matching results.
"""
def
__init__
(
self
,
match_results
):
"""Constructs a Match object.
Args:
match_results: Integer tensor of shape [N] with (1) match_results[i]>=0,
meaning that column i is matched with row match_results[i].
(2) match_results[i]=-1, meaning that column i is not matched.
(3) match_results[i]=-2, meaning that column i is ignored.
Raises:
ValueError: if match_results does not have rank 1 or is not an
integer int32 scalar tensor
"""
if
match_results
.
shape
.
ndims
!=
1
:
raise
ValueError
(
'match_results should have rank 1'
)
if
match_results
.
dtype
!=
tf
.
int32
:
raise
ValueError
(
'match_results should be an int32 or int64 scalar '
'tensor'
)
self
.
_match_results
=
match_results
@
property
def
match_results
(
self
):
"""The accessor for match results.
Returns:
the tensor which encodes the match results.
"""
return
self
.
_match_results
def
matched_column_indices
(
self
):
"""Returns column indices that match to some row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
greater
(
self
.
_match_results
,
-
1
)))
def
matched_column_indicator
(
self
):
"""Returns column indices that are matched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
tf
.
greater_equal
(
self
.
_match_results
,
0
)
def
num_matched_columns
(
self
):
"""Returns number (int32 scalar tensor) of matched columns."""
return
tf
.
size
(
input
=
self
.
matched_column_indices
())
def
unmatched_column_indices
(
self
):
"""Returns column indices that do not match any row.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
equal
(
self
.
_match_results
,
-
1
)))
def
unmatched_column_indicator
(
self
):
"""Returns column indices that are unmatched.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
tf
.
equal
(
self
.
_match_results
,
-
1
)
def
num_unmatched_columns
(
self
):
"""Returns number (int32 scalar tensor) of unmatched columns."""
return
tf
.
size
(
input
=
self
.
unmatched_column_indices
())
def
ignored_column_indices
(
self
):
"""Returns column indices that are ignored (neither Matched nor Unmatched).
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
self
.
ignored_column_indicator
()))
def
ignored_column_indicator
(
self
):
"""Returns boolean column indicator where True means the colum is ignored.
Returns:
column_indicator: boolean vector which is True for all ignored column
indices.
"""
return
tf
.
equal
(
self
.
_match_results
,
-
2
)
def
num_ignored_columns
(
self
):
"""Returns number (int32 scalar tensor) of matched columns."""
return
tf
.
size
(
input
=
self
.
ignored_column_indices
())
def
unmatched_or_ignored_column_indices
(
self
):
"""Returns column indices that are unmatched or ignored.
The indices returned by this op are always sorted in increasing order.
Returns:
column_indices: int32 tensor of shape [K] with column indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
where
(
tf
.
greater
(
0
,
self
.
_match_results
)))
def
matched_row_indices
(
self
):
"""Returns row indices that match some column.
The indices returned by this op are ordered so as to be in correspondence
with the output of matched_column_indicator(). For example if
self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
[7, 3], then we know that column 0 was matched to row 7 and column 2 was
matched to row 3.
Returns:
row_indices: int32 tensor of shape [K] with row indices.
"""
return
self
.
_reshape_and_cast
(
tf
.
gather
(
self
.
_match_results
,
self
.
matched_column_indices
()))
def
_reshape_and_cast
(
self
,
t
):
return
tf
.
cast
(
tf
.
reshape
(
t
,
[
-
1
]),
tf
.
int32
)
def
gather_based_on_match
(
self
,
input_tensor
,
unmatched_value
,
ignored_value
):
"""Gathers elements from `input_tensor` based on match results.
For columns that are matched to a row, gathered_tensor[col] is set to
input_tensor[match_results[col]]. For columns that are unmatched,
gathered_tensor[col] is set to unmatched_value. Finally, for columns that
are ignored gathered_tensor[col] is set to ignored_value.
Note that the input_tensor.shape[1:] must match with unmatched_value.shape
and ignored_value.shape
Args:
input_tensor: Tensor to gather values from.
unmatched_value: Constant tensor value for unmatched columns.
ignored_value: Constant tensor value for ignored columns.
Returns:
gathered_tensor: A tensor containing values gathered from input_tensor.
The shape of the gathered tensor is [match_results.shape[0]] +
input_tensor.shape[1:].
"""
input_tensor
=
tf
.
concat
([
tf
.
stack
([
ignored_value
,
unmatched_value
]),
input_tensor
],
axis
=
0
)
gather_indices
=
tf
.
maximum
(
self
.
match_results
+
2
,
0
)
gathered_tensor
=
tf
.
gather
(
input_tensor
,
gather_indices
)
return
gathered_tensor
class
Matcher
(
object
):
"""Abstract base class for matcher.
"""
__metaclass__
=
ABCMeta
def
match
(
self
,
similarity_matrix
,
scope
=
None
,
**
params
):
"""Computes matches among row and column indices and returns the result.
Computes matches among the row and column indices based on the similarity
matrix and optional arguments.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
scope: Op scope name. Defaults to 'Match' if None.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
A Match object with the results of matching.
"""
return
Match
(
self
.
_match
(
similarity_matrix
,
**
params
))
@
abstractmethod
def
_match
(
self
,
similarity_matrix
,
**
params
):
"""Method to be overridden by implementations.
Args:
similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
where higher value means more similar.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
match_results: Integer tensor of shape [M]: match_results[i]>=0 means
that column i is matched to row match_results[i], match_results[i]=-1
means that the column is not matched. match_results[i]=-2 means that
the column is ignored (usually this happens when there is a very weak
match which one neither wants as positive nor negative example).
"""
pass
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/minibatch_sampler.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base minibatch sampler module.
The job of the minibatch_sampler is to subsample a minibatch based on some
criterion.
The main function call is:
subsample(indicator, batch_size, **params).
Indicator is a 1d boolean tensor where True denotes which examples can be
sampled. It returns a boolean indicator where True denotes an example has been
sampled..
Subclasses should implement the Subsample function and can make use of the
@staticmethod SubsampleIndicator.
This is originally implemented in TensorFlow Object Detection API.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
ops
class
MinibatchSampler
(
object
):
"""Abstract base class for subsampling minibatches."""
__metaclass__
=
ABCMeta
def
__init__
(
self
):
"""Constructs a minibatch sampler."""
pass
@
abstractmethod
def
subsample
(
self
,
indicator
,
batch_size
,
**
params
):
"""Returns subsample of entries in indicator.
Args:
indicator: boolean tensor of shape [N] whose True entries can be sampled.
batch_size: desired batch size.
**params: additional keyword arguments for specific implementations of
the MinibatchSampler.
Returns:
sample_indicator: boolean tensor of shape [N] whose True entries have been
sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
"""
pass
@
staticmethod
def
subsample_indicator
(
indicator
,
num_samples
):
"""Subsample indicator vector.
Given a boolean indicator vector with M elements set to `True`, the function
assigns all but `num_samples` of these previously `True` elements to
`False`. If `num_samples` is greater than M, the original indicator vector
is returned.
Args:
indicator: a 1-dimensional boolean tensor indicating which elements
are allowed to be sampled and which are not.
num_samples: int32 scalar tensor
Returns:
a boolean tensor with the same shape as input (indicator) tensor
"""
indices
=
tf
.
where
(
indicator
)
indices
=
tf
.
random
.
shuffle
(
indices
)
indices
=
tf
.
reshape
(
indices
,
[
-
1
])
num_samples
=
tf
.
minimum
(
tf
.
size
(
input
=
indices
),
num_samples
)
selected_indices
=
tf
.
slice
(
indices
,
[
0
],
tf
.
reshape
(
num_samples
,
[
1
]))
selected_indicator
=
ops
.
indices_to_dense_vector
(
selected_indices
,
tf
.
shape
(
input
=
indicator
)[
0
])
return
tf
.
equal
(
selected_indicator
,
1
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/ops.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A module for helper tensorflow ops.
This is originally implemented in TensorFlow Object Detection API.
"""
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
shape_utils
def
indices_to_dense_vector
(
indices
,
size
,
indices_value
=
1.
,
default_value
=
0
,
dtype
=
tf
.
float32
):
"""Creates dense vector with indices set to specific value and rest to zeros.
This function exists because it is unclear if it is safe to use
tf.sparse_to_dense(indices, [size], 1, validate_indices=False)
with indices which are not ordered.
This function accepts a dynamic size (e.g. tf.shape(tensor)[0])
Args:
indices: 1d Tensor with integer indices which are to be set to
indices_values.
size: scalar with size (integer) of output Tensor.
indices_value: values of elements specified by indices in the output vector
default_value: values of other elements in the output vector.
dtype: data type.
Returns:
dense 1D Tensor of shape [size] with indices set to indices_values and the
rest set to default_value.
"""
size
=
tf
.
cast
(
size
,
dtype
=
tf
.
int32
)
zeros
=
tf
.
ones
([
size
],
dtype
=
dtype
)
*
default_value
values
=
tf
.
ones_like
(
indices
,
dtype
=
dtype
)
*
indices_value
return
tf
.
dynamic_stitch
([
tf
.
range
(
size
),
tf
.
cast
(
indices
,
dtype
=
tf
.
int32
)],
[
zeros
,
values
])
def
matmul_gather_on_zeroth_axis
(
params
,
indices
,
scope
=
None
):
"""Matrix multiplication based implementation of tf.gather on zeroth axis.
TODO(rathodv, jonathanhuang): enable sparse matmul option.
Args:
params: A float32 Tensor. The tensor from which to gather values.
Must be at least rank 1.
indices: A Tensor. Must be one of the following types: int32, int64.
Must be in range [0, params.shape[0])
scope: A name for the operation (optional).
Returns:
A Tensor. Has the same type as params. Values from params gathered
from indices given by indices, with shape indices.shape + params.shape[1:].
"""
params_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
params
)
indices_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
indices
)
params2d
=
tf
.
reshape
(
params
,
[
params_shape
[
0
],
-
1
])
indicator_matrix
=
tf
.
one_hot
(
indices
,
params_shape
[
0
])
gathered_result_flattened
=
tf
.
matmul
(
indicator_matrix
,
params2d
)
return
tf
.
reshape
(
gathered_result_flattened
,
tf
.
stack
(
indices_shape
+
params_shape
[
1
:]))
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/preprocessor.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocess images and bounding boxes for detection.
We perform two sets of operations in preprocessing stage:
(a) operations that are applied to both training and testing data,
(b) operations that are applied only to training data for the purpose of
data augmentation.
A preprocessing function receives a set of inputs,
e.g. an image and bounding boxes,
performs an operation on them, and returns them.
Some examples are: randomly cropping the image, randomly mirroring the image,
randomly changing the brightness, contrast, hue and
randomly jittering the bounding boxes.
The image is a rank 4 tensor: [1, height, width, channels] with
dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
in each row there is a box with [ymin xmin ymax xmax].
Boxes are in normalized coordinates meaning
their coordinate values range in [0, 1]
Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
functions receive a rank 3 tensor for processing the image. Thus, inside the
preprocess function we squeeze the image to become a rank 3 tensor and then
we pass it to the functions. At the end of the preprocess we expand the image
back to rank 4.
"""
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
box_list
def
_flip_boxes_left_right
(
boxes
):
"""Left-right flip the boxes.
Args:
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
Returns:
Flipped boxes.
"""
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
1
)
flipped_xmin
=
tf
.
subtract
(
1.0
,
xmax
)
flipped_xmax
=
tf
.
subtract
(
1.0
,
xmin
)
flipped_boxes
=
tf
.
concat
([
ymin
,
flipped_xmin
,
ymax
,
flipped_xmax
],
1
)
return
flipped_boxes
def
_flip_masks_left_right
(
masks
):
"""Left-right flip masks.
Args:
masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
Returns:
flipped masks: rank 3 float32 tensor with shape
[num_instances, height, width] representing instance masks.
"""
return
masks
[:,
:,
::
-
1
]
def
keypoint_flip_horizontal
(
keypoints
,
flip_point
,
flip_permutation
,
scope
=
None
):
"""Flips the keypoints horizontally around the flip_point.
This operation flips the x coordinate for each keypoint around the flip_point
and also permutes the keypoints in a manner specified by flip_permutation.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
flip_point: (float) scalar tensor representing the x coordinate to flip the
keypoints around.
flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation. This specifies the mapping from original keypoint indices
to the flipped keypoint indices. This is used primarily for keypoints
that are not reflection invariant. E.g. Suppose there are 3 keypoints
representing ['head', 'right_eye', 'left_eye'], then a logical choice for
flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
and 'right_eye' after a horizontal flip.
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
keypoints
=
tf
.
transpose
(
a
=
keypoints
,
perm
=
[
1
,
0
,
2
])
keypoints
=
tf
.
gather
(
keypoints
,
flip_permutation
)
v
,
u
=
tf
.
split
(
value
=
keypoints
,
num_or_size_splits
=
2
,
axis
=
2
)
u
=
flip_point
*
2.0
-
u
new_keypoints
=
tf
.
concat
([
v
,
u
],
2
)
new_keypoints
=
tf
.
transpose
(
a
=
new_keypoints
,
perm
=
[
1
,
0
,
2
])
return
new_keypoints
def
random_horizontal_flip
(
image
,
boxes
=
None
,
masks
=
None
,
keypoints
=
None
,
keypoint_flip_permutation
=
None
,
seed
=
None
):
"""Randomly flips the image and detections horizontally.
The probability of flipping the image is 50%.
Args:
image: rank 3 float32 tensor with shape [height, width, channels].
boxes: (optional) rank 2 float32 tensor with shape [N, 4]
containing the bounding boxes.
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
Each row is in the form of [ymin, xmin, ymax, xmax].
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks. The masks
are of the same height, width as the input `image`.
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x
normalized coordinates.
keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
permutation.
seed: random seed
Returns:
image: image which is the same shape as input image.
If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
the function also returns the following tensors.
boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
Boxes are in normalized form meaning their coordinates vary
between [0, 1].
masks: rank 3 float32 tensor with shape [num_instances, height, width]
containing instance masks.
keypoints: rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]
Raises:
ValueError: if keypoints are provided but keypoint_flip_permutation is not.
"""
def
_flip_image
(
image
):
# flip image
image_flipped
=
tf
.
image
.
flip_left_right
(
image
)
return
image_flipped
if
keypoints
is
not
None
and
keypoint_flip_permutation
is
None
:
raise
ValueError
(
'keypoints are provided but keypoints_flip_permutation is not provided'
)
result
=
[]
# random variable defining whether to do flip or not
do_a_flip_random
=
tf
.
greater
(
tf
.
random
.
uniform
([],
seed
=
seed
),
0.5
)
# flip image
image
=
tf
.
cond
(
pred
=
do_a_flip_random
,
true_fn
=
lambda
:
_flip_image
(
image
),
false_fn
=
lambda
:
image
)
result
.
append
(
image
)
# flip boxes
if
boxes
is
not
None
:
boxes
=
tf
.
cond
(
pred
=
do_a_flip_random
,
true_fn
=
lambda
:
_flip_boxes_left_right
(
boxes
),
false_fn
=
lambda
:
boxes
)
result
.
append
(
boxes
)
# flip masks
if
masks
is
not
None
:
masks
=
tf
.
cond
(
pred
=
do_a_flip_random
,
true_fn
=
lambda
:
_flip_masks_left_right
(
masks
),
false_fn
=
lambda
:
masks
)
result
.
append
(
masks
)
# flip keypoints
if
keypoints
is
not
None
and
keypoint_flip_permutation
is
not
None
:
permutation
=
keypoint_flip_permutation
keypoints
=
tf
.
cond
(
pred
=
do_a_flip_random
,
true_fn
=
lambda
:
keypoint_flip_horizontal
(
keypoints
,
0.5
,
permutation
),
false_fn
=
lambda
:
keypoints
)
result
.
append
(
keypoints
)
return
tuple
(
result
)
def
_compute_new_static_size
(
image
,
min_dimension
,
max_dimension
):
"""Compute new static shape for resize_to_range method."""
image_shape
=
image
.
get_shape
().
as_list
()
orig_height
=
image_shape
[
0
]
orig_width
=
image_shape
[
1
]
num_channels
=
image_shape
[
2
]
orig_min_dim
=
min
(
orig_height
,
orig_width
)
# Calculates the larger of the possible sizes
large_scale_factor
=
min_dimension
/
float
(
orig_min_dim
)
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height
=
int
(
round
(
orig_height
*
large_scale_factor
))
large_width
=
int
(
round
(
orig_width
*
large_scale_factor
))
large_size
=
[
large_height
,
large_width
]
if
max_dimension
:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim
=
max
(
orig_height
,
orig_width
)
small_scale_factor
=
max_dimension
/
float
(
orig_max_dim
)
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height
=
int
(
round
(
orig_height
*
small_scale_factor
))
small_width
=
int
(
round
(
orig_width
*
small_scale_factor
))
small_size
=
[
small_height
,
small_width
]
new_size
=
large_size
if
max
(
large_size
)
>
max_dimension
:
new_size
=
small_size
else
:
new_size
=
large_size
return
tf
.
constant
(
new_size
+
[
num_channels
])
def
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
):
"""Compute new dynamic shape for resize_to_range method."""
image_shape
=
tf
.
shape
(
input
=
image
)
orig_height
=
tf
.
cast
(
image_shape
[
0
],
dtype
=
tf
.
float32
)
orig_width
=
tf
.
cast
(
image_shape
[
1
],
dtype
=
tf
.
float32
)
num_channels
=
image_shape
[
2
]
orig_min_dim
=
tf
.
minimum
(
orig_height
,
orig_width
)
# Calculates the larger of the possible sizes
min_dimension
=
tf
.
constant
(
min_dimension
,
dtype
=
tf
.
float32
)
large_scale_factor
=
min_dimension
/
orig_min_dim
# Scaling orig_(height|width) by large_scale_factor will make the smaller
# dimension equal to min_dimension, save for floating point rounding errors.
# For reasonably-sized images, taking the nearest integer will reliably
# eliminate this error.
large_height
=
tf
.
cast
(
tf
.
round
(
orig_height
*
large_scale_factor
),
dtype
=
tf
.
int32
)
large_width
=
tf
.
cast
(
tf
.
round
(
orig_width
*
large_scale_factor
),
dtype
=
tf
.
int32
)
large_size
=
tf
.
stack
([
large_height
,
large_width
])
if
max_dimension
:
# Calculates the smaller of the possible sizes, use that if the larger
# is too big.
orig_max_dim
=
tf
.
maximum
(
orig_height
,
orig_width
)
max_dimension
=
tf
.
constant
(
max_dimension
,
dtype
=
tf
.
float32
)
small_scale_factor
=
max_dimension
/
orig_max_dim
# Scaling orig_(height|width) by small_scale_factor will make the larger
# dimension equal to max_dimension, save for floating point rounding
# errors. For reasonably-sized images, taking the nearest integer will
# reliably eliminate this error.
small_height
=
tf
.
cast
(
tf
.
round
(
orig_height
*
small_scale_factor
),
dtype
=
tf
.
int32
)
small_width
=
tf
.
cast
(
tf
.
round
(
orig_width
*
small_scale_factor
),
dtype
=
tf
.
int32
)
small_size
=
tf
.
stack
([
small_height
,
small_width
])
new_size
=
tf
.
cond
(
pred
=
tf
.
cast
(
tf
.
reduce_max
(
input_tensor
=
large_size
),
dtype
=
tf
.
float32
)
>
max_dimension
,
true_fn
=
lambda
:
small_size
,
false_fn
=
lambda
:
large_size
)
else
:
new_size
=
large_size
return
tf
.
stack
(
tf
.
unstack
(
new_size
)
+
[
num_channels
])
def
resize_to_range
(
image
,
masks
=
None
,
min_dimension
=
None
,
max_dimension
=
None
,
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
,
align_corners
=
False
,
pad_to_max_dimension
=
False
):
"""Resizes an image so its dimensions are within the provided value.
The output size can be described by two cases:
1. If the image can be rescaled so its minimum dimension is equal to the
provided value without the other dimension exceeding max_dimension,
then do so.
2. Otherwise, resize so the largest dimension is equal to max_dimension.
Args:
image: A 3D tensor of shape [height, width, channels]
masks: (optional) rank 3 float32 tensor with shape
[num_instances, height, width] containing instance masks.
min_dimension: (optional) (scalar) desired size of the smaller image
dimension.
max_dimension: (optional) (scalar) maximum allowed size
of the larger image dimension.
method: (optional) interpolation method used in resizing. Defaults to
BILINEAR.
align_corners: bool. If true, exactly align all 4 corners of the input
and output. Defaults to False.
pad_to_max_dimension: Whether to resize the image and pad it with zeros
so the resulting image is of the spatial size
[max_dimension, max_dimension]. If masks are included they are padded
similarly.
Returns:
Note that the position of the resized_image_shape changes based on whether
masks are present.
resized_image: A 3D tensor of shape [new_height, new_width, channels],
where the image has been resized (with bilinear interpolation) so that
min(new_height, new_width) == min_dimension or
max(new_height, new_width) == max_dimension.
resized_masks: If masks is not None, also outputs masks. A 3D tensor of
shape [num_instances, new_height, new_width].
resized_image_shape: A 1D tensor of shape [3] containing shape of the
resized image.
Raises:
ValueError: if the image is not a 3D tensor.
"""
if
len
(
image
.
get_shape
())
!=
3
:
raise
ValueError
(
'Image should be 3D tensor'
)
if
image
.
get_shape
().
is_fully_defined
():
new_size
=
_compute_new_static_size
(
image
,
min_dimension
,
max_dimension
)
else
:
new_size
=
_compute_new_dynamic_size
(
image
,
min_dimension
,
max_dimension
)
new_image
=
tf
.
image
.
resize
(
image
,
new_size
[:
-
1
],
method
=
method
)
if
pad_to_max_dimension
:
new_image
=
tf
.
image
.
pad_to_bounding_box
(
new_image
,
0
,
0
,
max_dimension
,
max_dimension
)
result
=
[
new_image
]
if
masks
is
not
None
:
new_masks
=
tf
.
expand_dims
(
masks
,
3
)
new_masks
=
tf
.
image
.
resize
(
new_masks
,
new_size
[:
-
1
],
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
new_masks
=
tf
.
squeeze
(
new_masks
,
3
)
if
pad_to_max_dimension
:
new_masks
=
tf
.
image
.
pad_to_bounding_box
(
new_masks
,
0
,
0
,
max_dimension
,
max_dimension
)
result
.
append
(
new_masks
)
result
.
append
(
new_size
)
return
result
def
_copy_extra_fields
(
boxlist_to_copy_to
,
boxlist_to_copy_from
):
"""Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
Args:
boxlist_to_copy_to: BoxList to which extra fields are copied.
boxlist_to_copy_from: BoxList from which fields are copied.
Returns:
boxlist_to_copy_to with extra fields.
"""
for
field
in
boxlist_to_copy_from
.
get_extra_fields
():
boxlist_to_copy_to
.
add_field
(
field
,
boxlist_to_copy_from
.
get_field
(
field
))
return
boxlist_to_copy_to
def
box_list_scale
(
boxlist
,
y_scale
,
x_scale
,
scope
=
None
):
"""scale box coordinates in x and y dimensions.
Args:
boxlist: BoxList holding N boxes
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
boxlist: BoxList holding N boxes
"""
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
y_min
=
y_scale
*
y_min
y_max
=
y_scale
*
y_max
x_min
=
x_scale
*
x_min
x_max
=
x_scale
*
x_max
scaled_boxlist
=
box_list
.
BoxList
(
tf
.
concat
([
y_min
,
x_min
,
y_max
,
x_max
],
1
))
return
_copy_extra_fields
(
scaled_boxlist
,
boxlist
)
def
keypoint_scale
(
keypoints
,
y_scale
,
x_scale
,
scope
=
None
):
"""Scales keypoint coordinates in x and y dimensions.
Args:
keypoints: a tensor of shape [num_instances, num_keypoints, 2]
y_scale: (float) scalar tensor
x_scale: (float) scalar tensor
scope: name scope.
Returns:
new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
"""
y_scale
=
tf
.
cast
(
y_scale
,
tf
.
float32
)
x_scale
=
tf
.
cast
(
x_scale
,
tf
.
float32
)
new_keypoints
=
keypoints
*
[[[
y_scale
,
x_scale
]]]
return
new_keypoints
def
scale_boxes_to_pixel_coordinates
(
image
,
boxes
,
keypoints
=
None
):
"""Scales boxes from normalized to pixel coordinates.
Args:
image: A 3D float32 tensor of shape [height, width, channels].
boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
boxes in normalized coordinates. Each row is of the form
[ymin, xmin, ymax, xmax].
keypoints: (optional) rank 3 float32 tensor with shape
[num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
coordinates.
Returns:
image: unchanged input image.
scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
bounding boxes in pixel coordinates.
scaled_keypoints: a 3D float32 tensor with shape
[num_instances, num_keypoints, 2] containing the keypoints in pixel
coordinates.
"""
boxlist
=
box_list
.
BoxList
(
boxes
)
image_height
=
tf
.
shape
(
input
=
image
)[
0
]
image_width
=
tf
.
shape
(
input
=
image
)[
1
]
scaled_boxes
=
box_list_scale
(
boxlist
,
image_height
,
image_width
).
get
()
result
=
[
image
,
scaled_boxes
]
if
keypoints
is
not
None
:
scaled_keypoints
=
keypoint_scale
(
keypoints
,
image_height
,
image_width
)
result
.
append
(
scaled_keypoints
)
return
tuple
(
result
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/region_similarity_calculator.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Region Similarity Calculators for BoxLists.
Region Similarity Calculators compare a pairwise measure of similarity
between the boxes in two BoxLists.
"""
from
abc
import
ABCMeta
from
abc
import
abstractmethod
import
tensorflow
as
tf
def
area
(
boxlist
,
scope
=
None
):
"""Computes area of boxes.
Args:
boxlist: BoxList holding N boxes
scope: name scope.
Returns:
a tensor with shape [N] representing box areas.
"""
y_min
,
x_min
,
y_max
,
x_max
=
tf
.
split
(
value
=
boxlist
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
return
tf
.
squeeze
((
y_max
-
y_min
)
*
(
x_max
-
x_min
),
[
1
])
def
intersection
(
boxlist1
,
boxlist2
,
scope
=
None
):
"""Compute pairwise intersection areas between boxes.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise intersections
"""
y_min1
,
x_min1
,
y_max1
,
x_max1
=
tf
.
split
(
value
=
boxlist1
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
y_min2
,
x_min2
,
y_max2
,
x_max2
=
tf
.
split
(
value
=
boxlist2
.
get
(),
num_or_size_splits
=
4
,
axis
=
1
)
all_pairs_min_ymax
=
tf
.
minimum
(
y_max1
,
tf
.
transpose
(
a
=
y_max2
))
all_pairs_max_ymin
=
tf
.
maximum
(
y_min1
,
tf
.
transpose
(
a
=
y_min2
))
intersect_heights
=
tf
.
maximum
(
0.0
,
all_pairs_min_ymax
-
all_pairs_max_ymin
)
all_pairs_min_xmax
=
tf
.
minimum
(
x_max1
,
tf
.
transpose
(
a
=
x_max2
))
all_pairs_max_xmin
=
tf
.
maximum
(
x_min1
,
tf
.
transpose
(
a
=
x_min2
))
intersect_widths
=
tf
.
maximum
(
0.0
,
all_pairs_min_xmax
-
all_pairs_max_xmin
)
return
intersect_heights
*
intersect_widths
def
iou
(
boxlist1
,
boxlist2
,
scope
=
None
):
"""Computes pairwise intersection-over-union between box collections.
Args:
boxlist1: BoxList holding N boxes
boxlist2: BoxList holding M boxes
scope: name scope.
Returns:
a tensor with shape [N, M] representing pairwise iou scores.
"""
intersections
=
intersection
(
boxlist1
,
boxlist2
)
areas1
=
area
(
boxlist1
)
areas2
=
area
(
boxlist2
)
unions
=
(
tf
.
expand_dims
(
areas1
,
1
)
+
tf
.
expand_dims
(
areas2
,
0
)
-
intersections
)
return
tf
.
where
(
tf
.
equal
(
intersections
,
0.0
),
tf
.
zeros_like
(
intersections
),
tf
.
truediv
(
intersections
,
unions
))
class
RegionSimilarityCalculator
(
object
):
"""Abstract base class for region similarity calculator."""
__metaclass__
=
ABCMeta
def
compare
(
self
,
boxlist1
,
boxlist2
,
scope
=
None
):
"""Computes matrix of pairwise similarity between BoxLists.
This op (to be overriden) computes a measure of pairwise similarity between
the boxes in the given BoxLists. Higher values indicate more similarity.
Note that this method simply measures similarity and does not explicitly
perform a matching.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
scope: Op scope name. Defaults to 'Compare' if None.
Returns:
a (float32) tensor of shape [N, M] with pairwise similarity score.
"""
return
self
.
_compare
(
boxlist1
,
boxlist2
)
@
abstractmethod
def
_compare
(
self
,
boxlist1
,
boxlist2
):
pass
class
IouSimilarity
(
RegionSimilarityCalculator
):
"""Class to compute similarity based on Intersection over Union (IOU) metric.
This class computes pairwise similarity between two BoxLists based on IOU.
"""
def
_compare
(
self
,
boxlist1
,
boxlist2
):
"""Compute pairwise IOU similarity between the two BoxLists.
Args:
boxlist1: BoxList holding N boxes.
boxlist2: BoxList holding M boxes.
Returns:
A tensor with shape [N, M] representing pairwise iou scores.
"""
return
iou
(
boxlist1
,
boxlist2
)
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/shape_utils.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utils used to manipulate tensor shapes."""
import
tensorflow
as
tf
def
combined_static_and_dynamic_shape
(
tensor
):
"""Returns a list containing static and dynamic values for the dimensions.
Returns a list of static and dynamic values for shape dimensions. This is
useful to preserve static shapes when available in reshape operation.
Args:
tensor: A tensor of any type.
Returns:
A list of size tensor.shape.ndims containing integers or a scalar tensor.
"""
static_tensor_shape
=
tensor
.
shape
.
as_list
()
dynamic_tensor_shape
=
tf
.
shape
(
input
=
tensor
)
combined_shape
=
[]
for
index
,
dim
in
enumerate
(
static_tensor_shape
):
if
dim
is
not
None
:
combined_shape
.
append
(
dim
)
else
:
combined_shape
.
append
(
dynamic_tensor_shape
[
index
])
return
combined_shape
def
pad_or_clip_nd
(
tensor
,
output_shape
):
"""Pad or Clip given tensor to the output shape.
Args:
tensor: Input tensor to pad or clip.
output_shape: A list of integers / scalar tensors (or None for dynamic dim)
representing the size to pad or clip each dimension of the input tensor.
Returns:
Input tensor padded and clipped to the output shape.
"""
tensor_shape
=
tf
.
shape
(
input
=
tensor
)
clip_size
=
[
tf
.
where
(
tensor_shape
[
i
]
-
shape
>
0
,
shape
,
-
1
)
if
shape
is
not
None
else
-
1
for
i
,
shape
in
enumerate
(
output_shape
)
]
clipped_tensor
=
tf
.
slice
(
tensor
,
begin
=
tf
.
zeros
(
len
(
clip_size
),
dtype
=
tf
.
int32
),
size
=
clip_size
)
# Pad tensor if the shape of clipped tensor is smaller than the expected
# shape.
clipped_tensor_shape
=
tf
.
shape
(
input
=
clipped_tensor
)
trailing_paddings
=
[
shape
-
clipped_tensor_shape
[
i
]
if
shape
is
not
None
else
0
for
i
,
shape
in
enumerate
(
output_shape
)
]
paddings
=
tf
.
stack
(
[
tf
.
zeros
(
len
(
trailing_paddings
),
dtype
=
tf
.
int32
),
trailing_paddings
],
axis
=
1
)
padded_tensor
=
tf
.
pad
(
tensor
=
clipped_tensor
,
paddings
=
paddings
)
output_static_shape
=
[
dim
if
not
isinstance
(
dim
,
tf
.
Tensor
)
else
None
for
dim
in
output_shape
]
padded_tensor
.
set_shape
(
output_static_shape
)
return
padded_tensor
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/target_assigner.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Base target assigner module.
The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
groundtruth detections (bounding boxes), to assign classification and regression
targets to each anchor as well as weights to each anchor (specifying, e.g.,
which anchors should not contribute to training loss).
It assigns classification/regression targets by performing the following steps:
1) Computing pairwise similarity between anchors and groundtruth boxes using a
provided RegionSimilarity Calculator
2) Computing a matching based on the similarity matrix using a provided Matcher
3) Assigning regression targets based on the matching and a provided BoxCoder
4) Assigning classification targets based on the matching and groundtruth labels
Note that TargetAssigners only operate on detections from a single
image at a time, so any logic for applying a TargetAssigner to multiple
images must be handled externally.
"""
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
box_list
from
mask_rcnn.object_detection
import
shape_utils
KEYPOINTS_FIELD_NAME
=
'keypoints'
class
TargetAssigner
(
object
):
"""Target assigner to compute classification and regression targets."""
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
):
"""Construct Object Detection Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
matcher: Matcher used to match groundtruth to anchors.
box_coder: BoxCoder used to encode matching groundtruth boxes with
respect to anchors.
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0). The weight must be in [0., 1.].
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each
anchor (and can be empty for scalar targets). This shape must thus be
compatible with the groundtruth labels that are passed to the "assign"
function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
If set to None, unmatched_cls_target is set to be [0] for each anchor.
Raises:
ValueError: if similarity_calc is not a RegionSimilarityCalculator or
if matcher is not a Matcher or if box_coder is not a BoxCoder
"""
self
.
_similarity_calc
=
similarity_calc
self
.
_matcher
=
matcher
self
.
_box_coder
=
box_coder
self
.
_negative_class_weight
=
negative_class_weight
if
unmatched_cls_target
is
None
:
self
.
_unmatched_cls_target
=
tf
.
constant
([
0
],
tf
.
float32
)
else
:
self
.
_unmatched_cls_target
=
unmatched_cls_target
@
property
def
box_coder
(
self
):
return
self
.
_box_coder
def
assign
(
self
,
anchors
,
groundtruth_boxes
,
groundtruth_labels
=
None
,
groundtruth_weights
=
None
,
**
params
):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
to groundtruth_boxes and assign classification and regression targets to
each anchor as well as weights based on the resulting match (specifying,
e.g., which anchors should not contribute to training loss).
Anchors that are not matched to anything are given a classification target
of self._unmatched_cls_target which can be specified via the constructor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [M, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
to None, groundtruth_labels assumes a binary problem where all
ground_truth boxes get a positive label (of 1).
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of
the Matcher.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
groundtruth boxes, with rows corresponding to groundtruth boxes
and columns corresponding to anchors.
Raises:
ValueError: if anchors or groundtruth_boxes are not of type
box_list.BoxList
"""
if
not
isinstance
(
anchors
,
box_list
.
BoxList
):
raise
ValueError
(
'anchors must be an BoxList'
)
if
not
isinstance
(
groundtruth_boxes
,
box_list
.
BoxList
):
raise
ValueError
(
'groundtruth_boxes must be an BoxList'
)
if
groundtruth_labels
is
None
:
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
0
))
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
if
groundtruth_weights
is
None
:
num_gt_boxes
=
groundtruth_boxes
.
num_boxes_static
()
if
not
num_gt_boxes
:
num_gt_boxes
=
groundtruth_boxes
.
num_boxes
()
groundtruth_weights
=
tf
.
ones
([
num_gt_boxes
],
dtype
=
tf
.
float32
)
match_quality_matrix
=
self
.
_similarity_calc
.
compare
(
groundtruth_boxes
,
anchors
)
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
**
params
)
reg_targets
=
self
.
_create_regression_targets
(
anchors
,
groundtruth_boxes
,
match
)
cls_targets
=
self
.
_create_classification_targets
(
groundtruth_labels
,
match
)
reg_weights
=
self
.
_create_regression_weights
(
match
,
groundtruth_weights
)
cls_weights
=
self
.
_create_classification_weights
(
match
,
groundtruth_weights
)
num_anchors
=
anchors
.
num_boxes_static
()
if
num_anchors
is
not
None
:
reg_targets
=
self
.
_reset_target_shape
(
reg_targets
,
num_anchors
)
cls_targets
=
self
.
_reset_target_shape
(
cls_targets
,
num_anchors
)
reg_weights
=
self
.
_reset_target_shape
(
reg_weights
,
num_anchors
)
cls_weights
=
self
.
_reset_target_shape
(
cls_weights
,
num_anchors
)
return
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
def
_reset_target_shape
(
self
,
target
,
num_anchors
):
"""Sets the static shape of the target.
Args:
target: the target tensor. Its first dimension will be overwritten.
num_anchors: the number of anchors, which is used to override the target's
first dimension.
Returns:
A tensor with the shape info filled in.
"""
target_shape
=
target
.
get_shape
().
as_list
()
target_shape
[
0
]
=
num_anchors
target
.
set_shape
(
target_shape
)
return
target
def
_create_regression_targets
(
self
,
anchors
,
groundtruth_boxes
,
match
):
"""Returns a regression target for each anchor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth_boxes
match: a matcher.Match object
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_gt_boxes
=
match
.
gather_based_on_match
(
groundtruth_boxes
.
get
(),
unmatched_value
=
tf
.
zeros
(
4
),
ignored_value
=
tf
.
zeros
(
4
)
)
matched_gt_boxlist
=
box_list
.
BoxList
(
matched_gt_boxes
)
if
groundtruth_boxes
.
has_field
(
KEYPOINTS_FIELD_NAME
):
groundtruth_keypoints
=
groundtruth_boxes
.
get_field
(
KEYPOINTS_FIELD_NAME
)
matched_keypoints
=
match
.
gather_based_on_match
(
groundtruth_keypoints
,
unmatched_value
=
tf
.
zeros
(
groundtruth_keypoints
.
get_shape
()[
1
:]),
ignored_value
=
tf
.
zeros
(
groundtruth_keypoints
.
get_shape
()[
1
:])
)
matched_gt_boxlist
.
add_field
(
KEYPOINTS_FIELD_NAME
,
matched_keypoints
)
matched_reg_targets
=
self
.
_box_coder
.
encode
(
matched_gt_boxlist
,
anchors
)
match_results_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
match
.
match_results
)
# Zero out the unmatched and ignored regression targets.
unmatched_ignored_reg_targets
=
tf
.
tile
(
self
.
_default_regression_target
(),
[
match_results_shape
[
0
],
1
])
matched_anchors_mask
=
match
.
matched_column_indicator
()
matched_anchors_mask
=
tf
.
expand_dims
(
matched_anchors_mask
,
axis
=
1
)
matched_anchors_mask
=
tf
.
broadcast_to
(
matched_anchors_mask
,
shape
=
matched_reg_targets
.
get_shape
())
reg_targets
=
tf
.
where
(
matched_anchors_mask
,
matched_reg_targets
,
unmatched_ignored_reg_targets
)
return
reg_targets
def
_default_regression_target
(
self
):
"""Returns the default target for anchors to regress to.
Default regression targets are set to zero (though in
this implementation what these targets are set to should
not matter as the regression weight of any box set to
regress to the default target is zero).
Returns:
default_target: a float32 tensor with shape [1, box_code_dimension]
"""
return
tf
.
constant
([
self
.
_box_coder
.
code_size
*
[
0
]],
tf
.
float32
)
def
_create_classification_targets
(
self
,
groundtruth_labels
,
match
):
"""Create classification targets for each anchor.
Assign a classification target of for each anchor to the matching
groundtruth label that is provided by match. Anchors that are not matched
to anything are given the target self._unmatched_cls_target
Args:
groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
with labels for each of the ground_truth boxes. The subshape
[d_1, ... d_k] can be empty (corresponding to scalar labels).
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
Returns:
a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
shape [num_gt_boxes, d_1, d_2, ... d_k].
"""
return
match
.
gather_based_on_match
(
groundtruth_labels
,
unmatched_value
=
self
.
_unmatched_cls_target
,
ignored_value
=
self
.
_unmatched_cls_target
)
def
_create_regression_weights
(
self
,
match
,
groundtruth_weights
):
"""Set regression weight for each anchor.
Only positive anchors are set to contribute to the regression loss, so this
method returns a weight of 1 for every positive anchor and 0 for every
negative anchor.
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
a float32 tensor with shape [num_anchors] representing regression weights.
"""
return
match
.
gather_based_on_match
(
groundtruth_weights
,
ignored_value
=
0.
,
unmatched_value
=
0.
)
def
_create_classification_weights
(
self
,
match
,
groundtruth_weights
):
"""Create classification weights for each anchor.
Positive (matched) anchors are associated with a weight of
positive_class_weight and negative (unmatched) anchors are associated with
a weight of negative_class_weight. When anchors are ignored, weights are set
to zero. By default, both positive/negative weights are set to 1.0,
but they can be adjusted to handle class imbalance (which is almost always
the case in object detection).
Args:
match: a matcher.Match object that provides a matching between anchors
and groundtruth boxes.
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box.
Returns:
a float32 tensor with shape [num_anchors] representing classification
weights.
"""
return
match
.
gather_based_on_match
(
groundtruth_weights
,
ignored_value
=
0.
,
unmatched_value
=
self
.
_negative_class_weight
)
def
get_box_coder
(
self
):
"""Get BoxCoder of this TargetAssigner.
Returns:
BoxCoder object.
"""
return
self
.
_box_coder
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/tf_example_decoder.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow Example proto decoder for object detection.
A decoder to decode string tensors containing serialized tensorflow.Example
protos for object detection.
"""
import
tensorflow
as
tf
def
_get_source_id_from_encoded_image
(
parsed_tensors
):
return
tf
.
strings
.
as_string
(
tf
.
strings
.
to_hash_bucket_fast
(
parsed_tensors
[
'image/encoded'
],
2
**
63
-
1
))
class
TfExampleDecoder
(
object
):
"""Tensorflow Example proto decoder."""
def
__init__
(
self
,
use_instance_mask
=
False
,
regenerate_source_id
=
False
):
self
.
_include_mask
=
use_instance_mask
self
.
_regenerate_source_id
=
regenerate_source_id
self
.
_keys_to_features
=
{
'image/encoded'
:
tf
.
io
.
FixedLenFeature
((),
tf
.
string
),
'image/source_id'
:
tf
.
io
.
FixedLenFeature
((),
tf
.
string
),
'image/height'
:
tf
.
io
.
FixedLenFeature
((),
tf
.
int64
),
'image/width'
:
tf
.
io
.
FixedLenFeature
((),
tf
.
int64
),
'image/object/bbox/xmin'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/bbox/xmax'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/bbox/ymin'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/bbox/ymax'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/class/label'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
'image/object/area'
:
tf
.
io
.
VarLenFeature
(
tf
.
float32
),
'image/object/is_crowd'
:
tf
.
io
.
VarLenFeature
(
tf
.
int64
),
}
if
use_instance_mask
:
self
.
_keys_to_features
.
update
({
'image/object/mask'
:
tf
.
io
.
VarLenFeature
(
tf
.
string
),
})
def
_decode_image
(
self
,
parsed_tensors
):
"""Decodes the image and set its static shape."""
image
=
tf
.
io
.
decode_image
(
parsed_tensors
[
'image/encoded'
],
channels
=
3
)
image
.
set_shape
([
None
,
None
,
3
])
return
image
def
_decode_boxes
(
self
,
parsed_tensors
):
"""Concat box coordinates in the format of [ymin, xmin, ymax, xmax]."""
xmin
=
parsed_tensors
[
'image/object/bbox/xmin'
]
xmax
=
parsed_tensors
[
'image/object/bbox/xmax'
]
ymin
=
parsed_tensors
[
'image/object/bbox/ymin'
]
ymax
=
parsed_tensors
[
'image/object/bbox/ymax'
]
return
tf
.
stack
([
ymin
,
xmin
,
ymax
,
xmax
],
axis
=-
1
)
def
_decode_masks
(
self
,
parsed_tensors
):
"""Decode a set of PNG masks to the tf.float32 tensors."""
def
_decode_png_mask
(
png_bytes
):
mask
=
tf
.
squeeze
(
tf
.
io
.
decode_png
(
png_bytes
,
channels
=
1
,
dtype
=
tf
.
uint8
),
axis
=-
1
)
mask
=
tf
.
cast
(
mask
,
dtype
=
tf
.
float32
)
mask
.
set_shape
([
None
,
None
])
return
mask
height
=
parsed_tensors
[
'image/height'
]
width
=
parsed_tensors
[
'image/width'
]
masks
=
parsed_tensors
[
'image/object/mask'
]
return
tf
.
cond
(
tf
.
greater
(
tf
.
size
(
masks
),
0
),
lambda
:
tf
.
map_fn
(
_decode_png_mask
,
masks
,
dtype
=
tf
.
float32
),
lambda
:
tf
.
zeros
([
0
,
height
,
width
],
dtype
=
tf
.
float32
)
)
def
decode
(
self
,
serialized_example
):
"""Decode the serialized example.
Args:
serialized_example: a single serialized tf.Example string.
Returns:
decoded_tensors: a dictionary of tensors with the following fields:
- image: a uint8 tensor of shape [None, None, 3].
- source_id: a string scalar tensor.
- height: an integer scalar tensor.
- width: an integer scalar tensor.
- groundtruth_classes: a int64 tensor of shape [None].
- groundtruth_is_crowd: a bool tensor of shape [None].
- groundtruth_area: a float32 tensor of shape [None].
- groundtruth_boxes: a float32 tensor of shape [None, 4].
- groundtruth_instance_masks: a float32 tensor of shape
[None, None, None].
- groundtruth_instance_masks_png: a string tensor of shape [None].
"""
parsed_tensors
=
tf
.
io
.
parse_single_example
(
serialized_example
,
self
.
_keys_to_features
)
for
k
in
parsed_tensors
:
if
isinstance
(
parsed_tensors
[
k
],
tf
.
SparseTensor
):
if
parsed_tensors
[
k
].
dtype
==
tf
.
string
:
parsed_tensors
[
k
]
=
tf
.
sparse
.
to_dense
(
parsed_tensors
[
k
],
default_value
=
''
)
else
:
parsed_tensors
[
k
]
=
tf
.
sparse
.
to_dense
(
parsed_tensors
[
k
],
default_value
=
0
)
image
=
self
.
_decode_image
(
parsed_tensors
)
boxes
=
self
.
_decode_boxes
(
parsed_tensors
)
is_crowd
=
tf
.
cast
(
parsed_tensors
[
'image/object/is_crowd'
],
dtype
=
tf
.
bool
)
if
self
.
_include_mask
:
masks
=
self
.
_decode_masks
(
parsed_tensors
)
if
self
.
_regenerate_source_id
:
source_id
=
_get_source_id_from_encoded_image
(
parsed_tensors
)
else
:
source_id
=
tf
.
cond
(
tf
.
greater
(
tf
.
strings
.
length
(
parsed_tensors
[
'image/source_id'
]),
0
),
lambda
:
parsed_tensors
[
'image/source_id'
],
lambda
:
_get_source_id_from_encoded_image
(
parsed_tensors
)
)
decoded_tensors
=
{
'image'
:
image
,
# 'source_id': parsed_tensors['image/source_id'],
'source_id'
:
source_id
,
'height'
:
parsed_tensors
[
'image/height'
],
'width'
:
parsed_tensors
[
'image/width'
],
'groundtruth_classes'
:
parsed_tensors
[
'image/object/class/label'
],
'groundtruth_is_crowd'
:
is_crowd
,
'groundtruth_area'
:
parsed_tensors
[
'image/object/area'
],
'groundtruth_boxes'
:
boxes
,
}
if
self
.
_include_mask
:
decoded_tensors
.
update
({
'groundtruth_instance_masks'
:
masks
,
'groundtruth_instance_masks_png'
:
parsed_tensors
[
'image/object/mask'
],
})
return
decoded_tensors
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/object_detection/visualization_utils.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""A set of functions that are used for visualization.
These functions often receive an image, perform some visualization on the image.
The functions do not return a value, instead they modify the image itself.
"""
import
collections
import
functools
import
six
import
matplotlib
import
matplotlib.pyplot
as
plt
import
PIL.Image
as
Image
import
PIL.ImageColor
as
ImageColor
import
PIL.ImageDraw
as
ImageDraw
import
PIL.ImageFont
as
ImageFont
import
numpy
as
np
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
shape_utils
# Set headless-friendly backend.
matplotlib
.
use
(
'Agg'
)
_TITLE_LEFT_MARGIN
=
10
_TITLE_TOP_MARGIN
=
10
STANDARD_COLORS
=
[
'AliceBlue'
,
'Chartreuse'
,
'Aqua'
,
'Aquamarine'
,
'Azure'
,
'Beige'
,
'Bisque'
,
'BlanchedAlmond'
,
'BlueViolet'
,
'BurlyWood'
,
'CadetBlue'
,
'AntiqueWhite'
,
'Chocolate'
,
'Coral'
,
'CornflowerBlue'
,
'Cornsilk'
,
'Crimson'
,
'Cyan'
,
'DarkCyan'
,
'DarkGoldenRod'
,
'DarkGrey'
,
'DarkKhaki'
,
'DarkOrange'
,
'DarkOrchid'
,
'DarkSalmon'
,
'DarkSeaGreen'
,
'DarkTurquoise'
,
'DarkViolet'
,
'DeepPink'
,
'DeepSkyBlue'
,
'DodgerBlue'
,
'FireBrick'
,
'FloralWhite'
,
'ForestGreen'
,
'Fuchsia'
,
'Gainsboro'
,
'GhostWhite'
,
'Gold'
,
'GoldenRod'
,
'Salmon'
,
'Tan'
,
'HoneyDew'
,
'HotPink'
,
'IndianRed'
,
'Ivory'
,
'Khaki'
,
'Lavender'
,
'LavenderBlush'
,
'LawnGreen'
,
'LemonChiffon'
,
'LightBlue'
,
'LightCoral'
,
'LightCyan'
,
'LightGoldenRodYellow'
,
'LightGray'
,
'LightGrey'
,
'LightGreen'
,
'LightPink'
,
'LightSalmon'
,
'LightSeaGreen'
,
'LightSkyBlue'
,
'LightSlateGray'
,
'LightSlateGrey'
,
'LightSteelBlue'
,
'LightYellow'
,
'Lime'
,
'LimeGreen'
,
'Linen'
,
'Magenta'
,
'MediumAquaMarine'
,
'MediumOrchid'
,
'MediumPurple'
,
'MediumSeaGreen'
,
'MediumSlateBlue'
,
'MediumSpringGreen'
,
'MediumTurquoise'
,
'MediumVioletRed'
,
'MintCream'
,
'MistyRose'
,
'Moccasin'
,
'NavajoWhite'
,
'OldLace'
,
'Olive'
,
'OliveDrab'
,
'Orange'
,
'OrangeRed'
,
'Orchid'
,
'PaleGoldenRod'
,
'PaleGreen'
,
'PaleTurquoise'
,
'PaleVioletRed'
,
'PapayaWhip'
,
'PeachPuff'
,
'Peru'
,
'Pink'
,
'Plum'
,
'PowderBlue'
,
'Purple'
,
'Red'
,
'RosyBrown'
,
'RoyalBlue'
,
'SaddleBrown'
,
'Green'
,
'SandyBrown'
,
'SeaGreen'
,
'SeaShell'
,
'Sienna'
,
'Silver'
,
'SkyBlue'
,
'SlateBlue'
,
'SlateGray'
,
'SlateGrey'
,
'Snow'
,
'SpringGreen'
,
'SteelBlue'
,
'GreenYellow'
,
'Teal'
,
'Thistle'
,
'Tomato'
,
'Turquoise'
,
'Violet'
,
'Wheat'
,
'White'
,
'WhiteSmoke'
,
'Yellow'
,
'YellowGreen'
]
def
draw_bounding_box_on_image_array
(
image
,
ymin
,
xmin
,
ymax
,
xmax
,
color
=
'red'
,
thickness
=
4
,
display_str_list
=
(),
use_normalized_coordinates
=
True
):
"""Adds a bounding box to an image (numpy array).
Bounding box coordinates can be specified in either absolute (pixel) or
normalized coordinates by setting the use_normalized_coordinates argument.
Args:
image: a numpy array with shape [height, width, 3].
ymin: ymin of bounding box.
xmin: xmin of bounding box.
ymax: ymax of bounding box.
xmax: xmax of bounding box.
color: color to draw bounding box. Default is red.
thickness: line thickness. Default value is 4.
display_str_list: list of strings to display in box
(each to be shown on its own line).
use_normalized_coordinates: If True (default), treat coordinates
ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
coordinates as absolute.
"""
image_pil
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
'RGB'
)
draw_bounding_box_on_image
(
image_pil
,
ymin
,
xmin
,
ymax
,
xmax
,
color
,
thickness
,
display_str_list
,
use_normalized_coordinates
)
np
.
copyto
(
image
,
np
.
array
(
image_pil
))
def
draw_bounding_box_on_image
(
image
,
ymin
,
xmin
,
ymax
,
xmax
,
color
=
'red'
,
thickness
=
4
,
display_str_list
=
(),
use_normalized_coordinates
=
True
):
"""Adds a bounding box to an image.
Bounding box coordinates can be specified in either absolute (pixel) or
normalized coordinates by setting the use_normalized_coordinates argument.
Each string in display_str_list is displayed on a separate line above the
bounding box in black text on a rectangle filled with the input 'color'.
If the top of the bounding box extends to the edge of the image, the strings
are displayed below the bounding box.
Args:
image: a PIL.Image object.
ymin: ymin of bounding box.
xmin: xmin of bounding box.
ymax: ymax of bounding box.
xmax: xmax of bounding box.
color: color to draw bounding box. Default is red.
thickness: line thickness. Default value is 4.
display_str_list: list of strings to display in box
(each to be shown on its own line).
use_normalized_coordinates: If True (default), treat coordinates
ymin, xmin, ymax, xmax as relative to the image. Otherwise treat
coordinates as absolute.
"""
draw
=
ImageDraw
.
Draw
(
image
)
im_width
,
im_height
=
image
.
size
if
use_normalized_coordinates
:
(
left
,
right
,
top
,
bottom
)
=
(
xmin
*
im_width
,
xmax
*
im_width
,
ymin
*
im_height
,
ymax
*
im_height
)
else
:
(
left
,
right
,
top
,
bottom
)
=
(
xmin
,
xmax
,
ymin
,
ymax
)
draw
.
line
([(
left
,
top
),
(
left
,
bottom
),
(
right
,
bottom
),
(
right
,
top
),
(
left
,
top
)],
width
=
thickness
,
fill
=
color
)
try
:
font
=
ImageFont
.
truetype
(
'arial.ttf'
,
24
)
except
IOError
:
font
=
ImageFont
.
load_default
()
# If the total height of the display strings added to the top of the bounding
# box exceeds the top of the image, stack the strings below the bounding box
# instead of above.
display_str_heights
=
[
font
.
getsize
(
ds
)[
1
]
for
ds
in
display_str_list
]
# Each display_str has a top and bottom margin of 0.05x.
total_display_str_height
=
(
1
+
2
*
0.05
)
*
sum
(
display_str_heights
)
if
top
>
total_display_str_height
:
text_bottom
=
top
else
:
text_bottom
=
bottom
+
total_display_str_height
# Reverse list and print from bottom to top.
for
display_str
in
display_str_list
[::
-
1
]:
text_width
,
text_height
=
font
.
getsize
(
display_str
)
margin
=
np
.
ceil
(
0.05
*
text_height
)
draw
.
rectangle
(
[(
left
,
text_bottom
-
text_height
-
2
*
margin
),
(
left
+
text_width
,
text_bottom
)],
fill
=
color
)
draw
.
text
(
(
left
+
margin
,
text_bottom
-
text_height
-
margin
),
display_str
,
fill
=
'black'
,
font
=
font
)
text_bottom
-=
text_height
-
2
*
margin
def
draw_keypoints_on_image_array
(
image
,
keypoints
,
color
=
'red'
,
radius
=
2
,
use_normalized_coordinates
=
True
):
"""Draws keypoints on an image (numpy array).
Args:
image: a numpy array with shape [height, width, 3].
keypoints: a numpy array with shape [num_keypoints, 2].
color: color to draw the keypoints with. Default is red.
radius: keypoint radius. Default value is 2.
use_normalized_coordinates: if True (default), treat keypoint values as
relative to the image. Otherwise treat them as absolute.
"""
image_pil
=
Image
.
fromarray
(
np
.
uint8
(
image
)).
convert
(
'RGB'
)
draw_keypoints_on_image
(
image_pil
,
keypoints
,
color
,
radius
,
use_normalized_coordinates
)
np
.
copyto
(
image
,
np
.
array
(
image_pil
))
def
draw_keypoints_on_image
(
image
,
keypoints
,
color
=
'red'
,
radius
=
2
,
use_normalized_coordinates
=
True
):
"""Draws keypoints on an image.
Args:
image: a PIL.Image object.
keypoints: a numpy array with shape [num_keypoints, 2].
color: color to draw the keypoints with. Default is red.
radius: keypoint radius. Default value is 2.
use_normalized_coordinates: if True (default), treat keypoint values as
relative to the image. Otherwise treat them as absolute.
"""
draw
=
ImageDraw
.
Draw
(
image
)
im_width
,
im_height
=
image
.
size
keypoints_x
=
[
k
[
1
]
for
k
in
keypoints
]
keypoints_y
=
[
k
[
0
]
for
k
in
keypoints
]
if
use_normalized_coordinates
:
keypoints_x
=
tuple
([
im_width
*
x
for
x
in
keypoints_x
])
keypoints_y
=
tuple
([
im_height
*
y
for
y
in
keypoints_y
])
for
keypoint_x
,
keypoint_y
in
zip
(
keypoints_x
,
keypoints_y
):
draw
.
ellipse
([(
keypoint_x
-
radius
,
keypoint_y
-
radius
),
(
keypoint_x
+
radius
,
keypoint_y
+
radius
)],
outline
=
color
,
fill
=
color
)
def
draw_mask_on_image_array
(
image
,
mask
,
color
=
'red'
,
alpha
=
0.4
):
"""Draws mask on an image.
Args:
image: uint8 numpy array with shape (img_height, img_height, 3)
mask: a uint8 numpy array of shape (img_height, img_height) with
values between either 0 or 1.
color: color to draw the keypoints with. Default is red.
alpha: transparency value between 0 and 1. (default: 0.4)
Raises:
ValueError: On incorrect data type for image or masks.
"""
if
image
.
dtype
!=
np
.
uint8
:
raise
ValueError
(
'`image` not of type np.uint8'
)
if
mask
.
dtype
!=
np
.
uint8
:
raise
ValueError
(
'`mask` not of type np.uint8'
)
if
np
.
any
(
np
.
logical_and
(
mask
!=
1
,
mask
!=
0
)):
raise
ValueError
(
'`mask` elements should be in [0, 1]'
)
if
image
.
shape
[:
2
]
!=
mask
.
shape
:
raise
ValueError
(
'The image has spatial dimensions %s but the mask has '
'dimensions %s'
%
(
image
.
shape
[:
2
],
mask
.
shape
))
rgb
=
ImageColor
.
getrgb
(
color
)
pil_image
=
Image
.
fromarray
(
image
)
solid_color
=
np
.
expand_dims
(
np
.
ones_like
(
mask
),
axis
=
2
)
*
np
.
reshape
(
list
(
rgb
),
[
1
,
1
,
3
])
pil_solid_color
=
Image
.
fromarray
(
np
.
uint8
(
solid_color
)).
convert
(
'RGBA'
)
pil_mask
=
Image
.
fromarray
(
np
.
uint8
(
255.0
*
alpha
*
mask
)).
convert
(
'L'
)
pil_image
=
Image
.
composite
(
pil_solid_color
,
pil_image
,
pil_mask
)
np
.
copyto
(
image
,
np
.
array
(
pil_image
.
convert
(
'RGB'
)))
def
visualize_boxes_and_labels_on_image_array
(
image
,
boxes
,
classes
,
scores
,
category_index
,
instance_masks
=
None
,
instance_boundaries
=
None
,
keypoints
=
None
,
use_normalized_coordinates
=
False
,
max_boxes_to_draw
=
20
,
min_score_thresh
=
.
5
,
agnostic_mode
=
False
,
line_thickness
=
4
,
groundtruth_box_visualization_color
=
'black'
,
skip_scores
=
False
,
skip_labels
=
False
):
"""Overlay labeled boxes on an image with formatted scores and label names.
This function groups boxes that correspond to the same location
and creates a display string for each detection and overlays these
on the image. Note that this function modifies the image in place, and returns
that same image.
Args:
image: uint8 numpy array with shape (img_height, img_width, 3)
boxes: a numpy array of shape [N, 4]
classes: a numpy array of shape [N]. Note that class indices are 1-based,
and match the keys in the label map.
scores: a numpy array of shape [N] or None. If scores=None, then
this function assumes that the boxes to be plotted are groundtruth
boxes and plot all boxes as black with no classes or scores.
category_index: a dict containing category dictionaries (each holding
category index `id` and category name `name`) keyed by category indices.
instance_masks: a numpy array of shape [N, image_height, image_width] with
values ranging between 0 and 1, can be None.
instance_boundaries: a numpy array of shape [N, image_height, image_width]
with values ranging between 0 and 1, can be None.
keypoints: a numpy array of shape [N, num_keypoints, 2], can
be None
use_normalized_coordinates: whether boxes is to be interpreted as
normalized coordinates or not.
max_boxes_to_draw: maximum number of boxes to visualize. If None, draw
all boxes.
min_score_thresh: minimum score threshold for a box to be visualized
agnostic_mode: boolean (default: False) controlling whether to evaluate in
class-agnostic mode or not. This mode will display scores but ignore
classes.
line_thickness: integer (default: 4) controlling line width of the boxes.
groundtruth_box_visualization_color: box color for visualizing groundtruth
boxes
skip_scores: whether to skip score when drawing a single detection
skip_labels: whether to skip label when drawing a single detection
Returns:
uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes.
"""
# Create a display string (and color) for every box location, group any boxes
# that correspond to the same location.
box_to_display_str_map
=
collections
.
defaultdict
(
list
)
box_to_color_map
=
collections
.
defaultdict
(
str
)
box_to_instance_masks_map
=
{}
box_to_instance_boundaries_map
=
{}
box_to_keypoints_map
=
collections
.
defaultdict
(
list
)
if
not
max_boxes_to_draw
:
max_boxes_to_draw
=
boxes
.
shape
[
0
]
for
i
in
range
(
min
(
max_boxes_to_draw
,
boxes
.
shape
[
0
])):
if
scores
is
None
or
scores
[
i
]
>
min_score_thresh
:
box
=
tuple
(
boxes
[
i
].
tolist
())
if
instance_masks
is
not
None
:
box_to_instance_masks_map
[
box
]
=
instance_masks
[
i
]
if
instance_boundaries
is
not
None
:
box_to_instance_boundaries_map
[
box
]
=
instance_boundaries
[
i
]
if
keypoints
is
not
None
:
box_to_keypoints_map
[
box
].
extend
(
keypoints
[
i
])
if
scores
is
None
:
box_to_color_map
[
box
]
=
groundtruth_box_visualization_color
else
:
display_str
=
''
if
not
skip_labels
:
if
not
agnostic_mode
:
if
classes
[
i
]
in
category_index
.
keys
():
class_name
=
category_index
[
classes
[
i
]][
'name'
]
else
:
class_name
=
'N/A'
display_str
=
str
(
class_name
)
if
not
skip_scores
:
if
not
display_str
:
display_str
=
'{}%'
.
format
(
int
(
100
*
scores
[
i
]))
else
:
display_str
=
'{}: {}%'
.
format
(
display_str
,
int
(
100
*
scores
[
i
]))
box_to_display_str_map
[
box
].
append
(
display_str
)
if
agnostic_mode
:
box_to_color_map
[
box
]
=
'DarkOrange'
else
:
box_to_color_map
[
box
]
=
STANDARD_COLORS
[
classes
[
i
]
%
len
(
STANDARD_COLORS
)]
# Draw all boxes onto image.
for
box
,
color
in
box_to_color_map
.
items
():
ymin
,
xmin
,
ymax
,
xmax
=
box
if
instance_masks
is
not
None
:
draw_mask_on_image_array
(
image
,
box_to_instance_masks_map
[
box
],
color
=
color
)
if
instance_boundaries
is
not
None
:
draw_mask_on_image_array
(
image
,
box_to_instance_boundaries_map
[
box
],
color
=
'red'
,
alpha
=
1.0
)
draw_bounding_box_on_image_array
(
image
,
ymin
,
xmin
,
ymax
,
xmax
,
color
=
color
,
thickness
=
line_thickness
,
display_str_list
=
box_to_display_str_map
[
box
],
use_normalized_coordinates
=
use_normalized_coordinates
)
if
keypoints
is
not
None
:
draw_keypoints_on_image_array
(
image
,
box_to_keypoints_map
[
box
],
color
=
color
,
radius
=
line_thickness
/
2
,
use_normalized_coordinates
=
use_normalized_coordinates
)
return
image
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/__init__.py
0 → 100644
View file @
d0d91e12
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/box_utils.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Utility functions for bounding box processing."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
numpy
as
np
import
tensorflow
as
tf
EPSILON
=
1e-8
BBOX_XFORM_CLIP
=
np
.
log
(
1000.
/
16.
)
def
jitter_boxes
(
boxes
,
noise_scale
=
0.025
):
"""Jitter the box coordinates by some noise distribution.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
noise_scale: a python float which specifies the magnitude of noise. The
rule of thumb is to set this between (0, 0.1]. The default value is found
to mimic the noisy detections best empirically.
Returns:
jittered_boxes: a tensor whose shape is the same as `boxes` representing
the jittered boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'jitter_boxes'
):
bbox_jitters
=
tf
.
random_normal
(
boxes
.
get_shape
(),
stddev
=
noise_scale
)
ymin
=
boxes
[...,
0
:
1
]
xmin
=
boxes
[...,
1
:
2
]
ymax
=
boxes
[...,
2
:
3
]
xmax
=
boxes
[...,
3
:
4
]
width
=
xmax
-
xmin
height
=
ymax
-
ymin
new_center_x
=
(
xmin
+
xmax
)
/
2.0
+
bbox_jitters
[...,
0
:
1
]
*
width
new_center_y
=
(
ymin
+
ymax
)
/
2.0
+
bbox_jitters
[...,
1
:
2
]
*
height
new_width
=
width
*
tf
.
exp
(
bbox_jitters
[...,
2
:
3
])
new_height
=
height
*
tf
.
exp
(
bbox_jitters
[...,
3
:
4
])
jittered_boxes
=
tf
.
concat
([
new_center_y
-
new_height
*
0.5
,
new_center_x
-
new_width
*
0.5
,
new_center_y
+
new_height
*
0.5
,
new_center_x
+
new_width
*
0.5
],
axis
=-
1
)
return
jittered_boxes
def
normalize_boxes
(
boxes
,
image_shape
):
"""Converts boxes to the normalized coordinates.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
Returns:
normalized_boxes: a tensor whose shape is the same as `boxes` representing
the normalized boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'normalize_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
=
image_shape
[...,
0
:
1
]
width
=
image_shape
[...,
1
:
2
]
ymin
=
boxes
[...,
0
:
1
]
/
height
xmin
=
boxes
[...,
1
:
2
]
/
width
ymax
=
boxes
[...,
2
:
3
]
/
height
xmax
=
boxes
[...,
3
:
4
]
/
width
normalized_boxes
=
tf
.
concat
([
ymin
,
xmin
,
ymax
,
xmax
],
axis
=-
1
)
return
normalized_boxes
def
denormalize_boxes
(
boxes
,
image_shape
):
"""Converts boxes normalized by [height, width] to pixel coordinates.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
Returns:
denormalized_boxes: a tensor whose shape is the same as `boxes` representing
the denormalized boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
with
tf
.
name_scope
(
'denormalize_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
,
width
=
tf
.
split
(
image_shape
,
2
,
axis
=-
1
)
ymin
,
xmin
,
ymax
,
xmax
=
tf
.
split
(
boxes
,
4
,
axis
=-
1
)
ymin
=
ymin
*
height
xmin
=
xmin
*
width
ymax
=
ymax
*
height
xmax
=
xmax
*
width
denormalized_boxes
=
tf
.
concat
([
ymin
,
xmin
,
ymax
,
xmax
],
axis
=-
1
)
return
denormalized_boxes
def
clip_boxes
(
boxes
,
image_shape
):
"""Clips boxes to image boundaries.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
Returns:
clipped_boxes: a tensor whose shape is the same as `boxes` representing the
clipped boxes.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'clip_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
=
image_shape
[...,
0
:
1
]
width
=
image_shape
[...,
1
:
2
]
ymin
=
boxes
[...,
0
:
1
]
xmin
=
boxes
[...,
1
:
2
]
ymax
=
boxes
[...,
2
:
3
]
xmax
=
boxes
[...,
3
:
4
]
clipped_ymin
=
tf
.
maximum
(
tf
.
minimum
(
ymin
,
height
-
1.0
),
0.0
)
clipped_ymax
=
tf
.
maximum
(
tf
.
minimum
(
ymax
,
height
-
1.0
),
0.0
)
clipped_xmin
=
tf
.
maximum
(
tf
.
minimum
(
xmin
,
width
-
1.0
),
0.0
)
clipped_xmax
=
tf
.
maximum
(
tf
.
minimum
(
xmax
,
width
-
1.0
),
0.0
)
clipped_boxes
=
tf
.
concat
(
[
clipped_ymin
,
clipped_xmin
,
clipped_ymax
,
clipped_xmax
],
axis
=-
1
)
return
clipped_boxes
def
compute_outer_boxes
(
boxes
,
image_shape
,
scale
=
1.0
):
"""Compute outer box encloses an object with a margin.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
image_shape: a list of two integers, a two-element vector or a tensor such
that all but the last dimensions are `broadcastable` to `boxes`. The last
dimension is 2, which represents [height, width].
scale: a float number specifying the scale of output outer boxes to input
`boxes`.
Returns:
outer_boxes: a tensor whose shape is the same as `boxes` representing the
outer boxes.
"""
if
scale
<
1.0
:
raise
ValueError
(
'scale is {}, but outer box scale must be greater than 1.0.'
.
format
(
scale
))
centers_y
=
(
boxes
[...,
0
]
+
boxes
[...,
2
])
/
2.0
centers_x
=
(
boxes
[...,
1
]
+
boxes
[...,
3
])
/
2.0
box_height
=
(
boxes
[...,
2
]
-
boxes
[...,
0
])
*
scale
box_width
=
(
boxes
[...,
3
]
-
boxes
[...,
1
])
*
scale
outer_boxes
=
tf
.
stack
([
centers_y
-
box_height
/
2.0
,
centers_x
-
box_width
/
2.0
,
centers_y
+
box_height
/
2.0
,
centers_x
+
box_width
/
2.0
],
axis
=
1
)
outer_boxes
=
clip_boxes
(
outer_boxes
,
image_shape
)
return
outer_boxes
def
encode_boxes
(
boxes
,
anchors
,
weights
=
None
):
"""Encode boxes to targets.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
encoded box targets.
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'encode_boxes'
):
boxes
=
tf
.
cast
(
boxes
,
dtype
=
anchors
.
dtype
)
ymin
=
boxes
[...,
0
:
1
]
xmin
=
boxes
[...,
1
:
2
]
ymax
=
boxes
[...,
2
:
3
]
xmax
=
boxes
[...,
3
:
4
]
box_h
=
ymax
-
ymin
+
1.0
box_w
=
xmax
-
xmin
+
1.0
box_yc
=
ymin
+
0.5
*
box_h
box_xc
=
xmin
+
0.5
*
box_w
anchor_ymin
=
anchors
[...,
0
:
1
]
anchor_xmin
=
anchors
[...,
1
:
2
]
anchor_ymax
=
anchors
[...,
2
:
3
]
anchor_xmax
=
anchors
[...,
3
:
4
]
anchor_h
=
anchor_ymax
-
anchor_ymin
+
1.0
anchor_w
=
anchor_xmax
-
anchor_xmin
+
1.0
anchor_yc
=
anchor_ymin
+
0.5
*
anchor_h
anchor_xc
=
anchor_xmin
+
0.5
*
anchor_w
encoded_dy
=
(
box_yc
-
anchor_yc
)
/
anchor_h
encoded_dx
=
(
box_xc
-
anchor_xc
)
/
anchor_w
encoded_dh
=
tf
.
log
(
box_h
/
anchor_h
)
encoded_dw
=
tf
.
log
(
box_w
/
anchor_w
)
if
weights
:
encoded_dy
*=
weights
[
0
]
encoded_dx
*=
weights
[
1
]
encoded_dh
*=
weights
[
2
]
encoded_dw
*=
weights
[
3
]
encoded_boxes
=
tf
.
concat
(
[
encoded_dy
,
encoded_dx
,
encoded_dh
,
encoded_dw
],
axis
=-
1
)
return
encoded_boxes
def
decode_boxes
(
encoded_boxes
,
anchors
,
weights
=
None
):
"""Decode boxes.
Args:
encoded_boxes: a tensor whose last dimension is 4 representing the
coordinates of encoded boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes: a tensor whose shape is the same as `boxes` representing the
decoded box targets.
"""
if
encoded_boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'encoded_boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
encoded_boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'decode_boxes'
):
encoded_boxes
=
tf
.
cast
(
encoded_boxes
,
dtype
=
anchors
.
dtype
)
dy
=
encoded_boxes
[...,
0
:
1
]
dx
=
encoded_boxes
[...,
1
:
2
]
dh
=
encoded_boxes
[...,
2
:
3
]
dw
=
encoded_boxes
[...,
3
:
4
]
if
weights
:
dy
/=
weights
[
0
]
dx
/=
weights
[
1
]
dh
/=
weights
[
2
]
dw
/=
weights
[
3
]
dh
=
tf
.
minimum
(
dh
,
BBOX_XFORM_CLIP
)
dw
=
tf
.
minimum
(
dw
,
BBOX_XFORM_CLIP
)
anchor_ymin
=
anchors
[...,
0
:
1
]
anchor_xmin
=
anchors
[...,
1
:
2
]
anchor_ymax
=
anchors
[...,
2
:
3
]
anchor_xmax
=
anchors
[...,
3
:
4
]
anchor_h
=
anchor_ymax
-
anchor_ymin
+
1.0
anchor_w
=
anchor_xmax
-
anchor_xmin
+
1.0
anchor_yc
=
anchor_ymin
+
0.5
*
anchor_h
anchor_xc
=
anchor_xmin
+
0.5
*
anchor_w
decoded_boxes_yc
=
dy
*
anchor_h
+
anchor_yc
decoded_boxes_xc
=
dx
*
anchor_w
+
anchor_xc
decoded_boxes_h
=
tf
.
exp
(
dh
)
*
anchor_h
decoded_boxes_w
=
tf
.
exp
(
dw
)
*
anchor_w
decoded_boxes_ymin
=
decoded_boxes_yc
-
0.5
*
decoded_boxes_h
decoded_boxes_xmin
=
decoded_boxes_xc
-
0.5
*
decoded_boxes_w
decoded_boxes_ymax
=
decoded_boxes_ymin
+
decoded_boxes_h
-
1.0
decoded_boxes_xmax
=
decoded_boxes_xmin
+
decoded_boxes_w
-
1.0
decoded_boxes
=
tf
.
concat
(
[
decoded_boxes_ymin
,
decoded_boxes_xmin
,
decoded_boxes_ymax
,
decoded_boxes_xmax
],
axis
=-
1
)
return
decoded_boxes
def
filter_boxes
(
boxes
,
scores
,
image_shape
,
min_size_threshold
):
"""Filter and remove boxes that are too small or fall outside the image.
Args:
boxes: a tensor whose last dimension is 4 representing the
coordinates of boxes in ymin, xmin, ymax, xmax order.
scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
representing the original scores of the boxes.
image_shape: a tensor whose shape is the same as, or `broadcastable` to
`boxes` except the last dimension, which is 2, representing
[height, width] of the scaled image.
min_size_threshold: a float representing the minimal box size in each
side (w.r.t. the scaled image). Boxes whose sides are smaller than it will
be filtered out.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` but with
the position of the filtered boxes are filled with 0.
filtered_scores: a tensor whose shape is the same as 'scores' but with
the positinon of the filtered boxes filled with 0.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'filter_boxes'
):
if
isinstance
(
image_shape
,
list
)
or
isinstance
(
image_shape
,
tuple
):
height
,
width
=
image_shape
else
:
image_shape
=
tf
.
cast
(
image_shape
,
dtype
=
boxes
.
dtype
)
height
=
image_shape
[...,
0
]
width
=
image_shape
[...,
1
]
ymin
=
boxes
[...,
0
]
xmin
=
boxes
[...,
1
]
ymax
=
boxes
[...,
2
]
xmax
=
boxes
[...,
3
]
h
=
ymax
-
ymin
+
1.0
w
=
xmax
-
xmin
+
1.0
yc
=
ymin
+
0.5
*
h
xc
=
xmin
+
0.5
*
w
min_size
=
tf
.
cast
(
tf
.
maximum
(
min_size_threshold
,
1.0
),
dtype
=
boxes
.
dtype
)
filtered_size_mask
=
tf
.
logical_and
(
tf
.
greater
(
h
,
min_size
),
tf
.
greater
(
w
,
min_size
))
filtered_center_mask
=
tf
.
logical_and
(
tf
.
logical_and
(
tf
.
greater
(
yc
,
0.0
),
tf
.
less
(
yc
,
height
)),
tf
.
logical_and
(
tf
.
greater
(
xc
,
0.0
),
tf
.
less
(
xc
,
width
)))
filtered_mask
=
tf
.
logical_and
(
filtered_size_mask
,
filtered_center_mask
)
filtered_scores
=
tf
.
where
(
filtered_mask
,
scores
,
tf
.
zeros_like
(
scores
))
filtered_boxes
=
tf
.
cast
(
tf
.
expand_dims
(
filtered_mask
,
axis
=-
1
),
dtype
=
boxes
.
dtype
)
*
boxes
return
filtered_boxes
,
filtered_scores
def
filter_boxes_by_scores
(
boxes
,
scores
,
min_score_threshold
):
"""Filter and remove boxes whose scores are smaller than the threshold.
Args:
boxes: a tensor whose last dimension is 4 representing the
coordinates of boxes in ymin, xmin, ymax, xmax order.
scores: a tensor whose shape is the same as tf.shape(boxes)[:-1]
representing the original scores of the boxes.
min_score_threshold: a float representing the minimal box score threshold.
Boxes whose score are smaller than it will be filtered out.
Returns:
filtered_boxes: a tensor whose shape is the same as `boxes` but with
the position of the filtered boxes are filled with 0.
filtered_scores: a tensor whose shape is the same as 'scores' but with
the
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'filter_boxes_by_scores'
):
filtered_mask
=
tf
.
greater
(
scores
,
min_score_threshold
)
filtered_scores
=
tf
.
where
(
filtered_mask
,
scores
,
tf
.
zeros_like
(
scores
))
filtered_boxes
=
tf
.
cast
(
tf
.
expand_dims
(
filtered_mask
,
axis
=-
1
),
dtype
=
boxes
.
dtype
)
*
boxes
return
filtered_boxes
,
filtered_scores
def
top_k_boxes
(
boxes
,
scores
,
k
):
"""Sort and select top k boxes according to the scores.
Args:
boxes: a tensor of shape [batch_size, N, 4] representing the coordiante of
the boxes. N is the number of boxes per image.
scores: a tensor of shsape [batch_size, N] representing the socre of the
boxes.
k: an integer or a tensor indicating the top k number.
Returns:
selected_boxes: a tensor of shape [batch_size, k, 4] representing the
selected top k box coordinates.
selected_scores: a tensor of shape [batch_size, k] representing the selected
top k box scores.
"""
with
tf
.
name_scope
(
'top_k_boxes'
):
selected_scores
,
top_k_indices
=
tf
.
nn
.
top_k
(
scores
,
k
=
k
,
sorted
=
True
)
batch_size
,
_
=
scores
.
get_shape
().
as_list
()
if
batch_size
==
1
:
selected_boxes
=
tf
.
squeeze
(
tf
.
gather
(
boxes
,
top_k_indices
,
axis
=
1
),
axis
=
1
)
else
:
top_k_indices_shape
=
tf
.
shape
(
top_k_indices
)
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
top_k_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
top_k_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_indices
=
tf
.
stack
([
batch_indices
,
top_k_indices
],
axis
=-
1
)
selected_boxes
=
tf
.
gather_nd
(
boxes
,
gather_nd_indices
)
return
selected_boxes
,
selected_scores
def
bbox_overlap
(
boxes
,
gt_boxes
):
"""Calculates the overlap between proposal and ground truth boxes.
Some `gt_boxes` may have been padded. The returned `iou` tensor for these
boxes will be -1.
Args:
boxes: a tensor with a shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment (e.g., rpn_post_nms_topn). The
last dimension is the pixel coordinates in [ymin, xmin, ymax, xmax] form.
gt_boxes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES, 4]. This
tensor might have paddings with a negative value.
Returns:
iou: a tensor with as a shape of [batch_size, N, MAX_NUM_INSTANCES].
"""
with
tf
.
name_scope
(
'bbox_overlap'
):
bb_y_min
,
bb_x_min
,
bb_y_max
,
bb_x_max
=
tf
.
split
(
value
=
boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
gt_y_min
,
gt_x_min
,
gt_y_max
,
gt_x_max
=
tf
.
split
(
value
=
gt_boxes
,
num_or_size_splits
=
4
,
axis
=
2
)
# Calculates the intersection area.
i_xmin
=
tf
.
maximum
(
bb_x_min
,
tf
.
transpose
(
gt_x_min
,
[
0
,
2
,
1
]))
i_xmax
=
tf
.
minimum
(
bb_x_max
,
tf
.
transpose
(
gt_x_max
,
[
0
,
2
,
1
]))
i_ymin
=
tf
.
maximum
(
bb_y_min
,
tf
.
transpose
(
gt_y_min
,
[
0
,
2
,
1
]))
i_ymax
=
tf
.
minimum
(
bb_y_max
,
tf
.
transpose
(
gt_y_max
,
[
0
,
2
,
1
]))
i_area
=
tf
.
maximum
((
i_xmax
-
i_xmin
),
0
)
*
tf
.
maximum
((
i_ymax
-
i_ymin
),
0
)
# Calculates the union area.
bb_area
=
(
bb_y_max
-
bb_y_min
)
*
(
bb_x_max
-
bb_x_min
)
gt_area
=
(
gt_y_max
-
gt_y_min
)
*
(
gt_x_max
-
gt_x_min
)
# Adds a small epsilon to avoid divide-by-zero.
u_area
=
bb_area
+
tf
.
transpose
(
gt_area
,
[
0
,
2
,
1
])
-
i_area
+
1e-8
# Calculates IoU.
iou
=
i_area
/
u_area
# Fills -1 for padded ground truth boxes.
padding_mask
=
tf
.
less
(
i_xmin
,
tf
.
zeros_like
(
i_xmin
))
iou
=
tf
.
where
(
padding_mask
,
-
tf
.
ones_like
(
iou
),
iou
)
return
iou
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/nms_ops.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tensorflow implementation of non max suppression."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
# Standard Imports
import
tensorflow
as
tf
from
mask_rcnn.ops
import
box_utils
NMS_TILE_SIZE
=
512
def
_self_suppression
(
iou
,
_
,
iou_sum
):
batch_size
=
tf
.
shape
(
iou
)[
0
]
can_suppress_others
=
tf
.
cast
(
tf
.
reshape
(
tf
.
reduce_max
(
iou
,
1
)
<=
0.5
,
[
batch_size
,
-
1
,
1
]),
iou
.
dtype
)
iou_suppressed
=
tf
.
reshape
(
tf
.
cast
(
tf
.
reduce_max
(
can_suppress_others
*
iou
,
1
)
<=
0.5
,
iou
.
dtype
),
[
batch_size
,
-
1
,
1
])
*
iou
iou_sum_new
=
tf
.
reduce_sum
(
iou_suppressed
,
[
1
,
2
])
return
[
iou_suppressed
,
tf
.
reduce_any
(
iou_sum
-
iou_sum_new
>
0.5
),
iou_sum_new
]
def
_cross_suppression
(
boxes
,
box_slice
,
iou_threshold
,
inner_idx
):
batch_size
=
tf
.
shape
(
boxes
)[
0
]
new_slice
=
tf
.
slice
(
boxes
,
[
0
,
inner_idx
*
NMS_TILE_SIZE
,
0
],
[
batch_size
,
NMS_TILE_SIZE
,
4
])
iou
=
box_utils
.
bbox_overlap
(
new_slice
,
box_slice
)
ret_slice
=
tf
.
expand_dims
(
tf
.
cast
(
tf
.
reduce_all
(
iou
<
iou_threshold
,
[
1
]),
box_slice
.
dtype
),
2
)
*
box_slice
return
boxes
,
ret_slice
,
iou_threshold
,
inner_idx
+
1
def
_suppression_loop_body
(
boxes
,
iou_threshold
,
output_size
,
idx
):
"""Process boxes in the range [idx*NMS_TILE_SIZE, (idx+1)*NMS_TILE_SIZE).
Args:
boxes: a tensor with a shape of [batch_size, anchors, 4].
iou_threshold: a float representing the threshold for deciding whether boxes
overlap too much with respect to IOU.
output_size: an int32 tensor of size [batch_size]. Representing the number
of selected boxes for each batch.
idx: an integer scalar representing induction variable.
Returns:
boxes: updated boxes.
iou_threshold: pass down iou_threshold to the next iteration.
output_size: the updated output_size.
idx: the updated induction variable.
"""
num_tiles
=
tf
.
shape
(
boxes
)[
1
]
//
NMS_TILE_SIZE
batch_size
=
tf
.
shape
(
boxes
)[
0
]
# Iterates over tiles that can possibly suppress the current tile.
box_slice
=
tf
.
slice
(
boxes
,
[
0
,
idx
*
NMS_TILE_SIZE
,
0
],
[
batch_size
,
NMS_TILE_SIZE
,
4
])
_
,
box_slice
,
_
,
_
=
tf
.
while_loop
(
lambda
_boxes
,
_box_slice
,
_threshold
,
inner_idx
:
inner_idx
<
idx
,
_cross_suppression
,
[
boxes
,
box_slice
,
iou_threshold
,
tf
.
constant
(
0
)])
# Iterates over the current tile to compute self-suppression.
iou
=
box_utils
.
bbox_overlap
(
box_slice
,
box_slice
)
mask
=
tf
.
expand_dims
(
tf
.
reshape
(
tf
.
range
(
NMS_TILE_SIZE
),
[
1
,
-
1
])
>
tf
.
reshape
(
tf
.
range
(
NMS_TILE_SIZE
),
[
-
1
,
1
]),
0
)
iou
*=
tf
.
cast
(
tf
.
logical_and
(
mask
,
iou
>=
iou_threshold
),
iou
.
dtype
)
suppressed_iou
,
_
,
_
=
tf
.
while_loop
(
lambda
_iou
,
loop_condition
,
_iou_sum
:
loop_condition
,
_self_suppression
,
[
iou
,
tf
.
constant
(
True
),
tf
.
reduce_sum
(
iou
,
[
1
,
2
])])
suppressed_box
=
tf
.
reduce_sum
(
suppressed_iou
,
1
)
>
0
box_slice
*=
tf
.
expand_dims
(
1.0
-
tf
.
cast
(
suppressed_box
,
box_slice
.
dtype
),
2
)
# Uses box_slice to update the input boxes.
mask
=
tf
.
reshape
(
tf
.
cast
(
tf
.
equal
(
tf
.
range
(
num_tiles
),
idx
),
boxes
.
dtype
),
[
1
,
-
1
,
1
,
1
])
boxes
=
tf
.
tile
(
tf
.
expand_dims
(
box_slice
,
[
1
]),
[
1
,
num_tiles
,
1
,
1
])
*
mask
+
tf
.
reshape
(
boxes
,
[
batch_size
,
num_tiles
,
NMS_TILE_SIZE
,
4
])
*
(
1
-
mask
)
boxes
=
tf
.
reshape
(
boxes
,
[
batch_size
,
-
1
,
4
])
# Updates output_size.
output_size
+=
tf
.
reduce_sum
(
tf
.
cast
(
tf
.
reduce_any
(
box_slice
>
0
,
[
2
]),
tf
.
int32
),
[
1
])
return
boxes
,
iou_threshold
,
output_size
,
idx
+
1
def
sorted_non_max_suppression_padded
(
scores
,
boxes
,
max_output_size
,
iou_threshold
):
"""A wrapper that handles non-maximum suppression.
Assumption:
* The boxes are sorted by scores unless the box is a dot (all coordinates
are zero).
* Boxes with higher scores can be used to suppress boxes with lower scores.
The overal design of the algorithm is to handle boxes tile-by-tile:
boxes = boxes.pad_to_multiply_of(tile_size)
num_tiles = len(boxes) // tile_size
output_boxes = []
for i in range(num_tiles):
box_tile = boxes[i*tile_size : (i+1)*tile_size]
for j in range(i - 1):
suppressing_tile = boxes[j*tile_size : (j+1)*tile_size]
iou = bbox_overlap(box_tile, suppressing_tile)
# if the box is suppressed in iou, clear it to a dot
box_tile *= _update_boxes(iou)
# Iteratively handle the diagnal tile.
iou = _box_overlap(box_tile, box_tile)
iou_changed = True
while iou_changed:
# boxes that are not suppressed by anything else
suppressing_boxes = _get_suppressing_boxes(iou)
# boxes that are suppressed by suppressing_boxes
suppressed_boxes = _get_suppressed_boxes(iou, suppressing_boxes)
# clear iou to 0 for boxes that are suppressed, as they cannot be used
# to suppress other boxes any more
new_iou = _clear_iou(iou, suppressed_boxes)
iou_changed = (new_iou != iou)
iou = new_iou
# remaining boxes that can still suppress others, are selected boxes.
output_boxes.append(_get_suppressing_boxes(iou))
if len(output_boxes) >= max_output_size:
break
Args:
scores: a tensor with a shape of [batch_size, anchors].
boxes: a tensor with a shape of [batch_size, anchors, 4].
max_output_size: a scalar integer `Tensor` representing the maximum number
of boxes to be selected by non max suppression.
iou_threshold: a float representing the threshold for deciding whether boxes
overlap too much with respect to IOU.
Returns:
nms_scores: a tensor with a shape of [batch_size, anchors]. It has same
dtype as input scores.
nms_proposals: a tensor with a shape of [batch_size, anchors, 4]. It has
same dtype as input boxes.
"""
batch_size
=
tf
.
shape
(
boxes
)[
0
]
num_boxes
=
tf
.
shape
(
boxes
)[
1
]
pad
=
tf
.
cast
(
tf
.
ceil
(
tf
.
cast
(
num_boxes
,
tf
.
float32
)
/
NMS_TILE_SIZE
),
tf
.
int32
)
*
NMS_TILE_SIZE
-
num_boxes
boxes
=
tf
.
pad
(
tf
.
cast
(
boxes
,
tf
.
float32
),
[[
0
,
0
],
[
0
,
pad
],
[
0
,
0
]])
scores
=
tf
.
pad
(
tf
.
cast
(
scores
,
tf
.
float32
),
[[
0
,
0
],
[
0
,
pad
]])
num_boxes
+=
pad
def
_loop_cond
(
unused_boxes
,
unused_threshold
,
output_size
,
idx
):
return
tf
.
logical_and
(
tf
.
reduce_min
(
output_size
)
<
max_output_size
,
idx
<
num_boxes
//
NMS_TILE_SIZE
)
selected_boxes
,
_
,
output_size
,
_
=
tf
.
while_loop
(
_loop_cond
,
_suppression_loop_body
,
[
boxes
,
iou_threshold
,
tf
.
zeros
([
batch_size
],
tf
.
int32
),
tf
.
constant
(
0
)
])
idx
=
num_boxes
-
tf
.
cast
(
tf
.
nn
.
top_k
(
tf
.
cast
(
tf
.
reduce_any
(
selected_boxes
>
0
,
[
2
]),
tf
.
int32
)
*
tf
.
expand_dims
(
tf
.
range
(
num_boxes
,
0
,
-
1
),
0
),
max_output_size
)[
0
],
tf
.
int32
)
idx
=
tf
.
minimum
(
idx
,
num_boxes
-
1
)
idx
=
tf
.
reshape
(
idx
+
tf
.
reshape
(
tf
.
range
(
batch_size
)
*
num_boxes
,
[
-
1
,
1
]),
[
-
1
])
boxes
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
boxes
,
[
-
1
,
4
]),
idx
),
[
batch_size
,
max_output_size
,
4
])
boxes
=
boxes
*
tf
.
cast
(
tf
.
reshape
(
tf
.
range
(
max_output_size
),
[
1
,
-
1
,
1
])
<
tf
.
reshape
(
output_size
,
[
-
1
,
1
,
1
]),
boxes
.
dtype
)
scores
=
tf
.
reshape
(
tf
.
gather
(
tf
.
reshape
(
scores
,
[
-
1
,
1
]),
idx
),
[
batch_size
,
max_output_size
])
scores
=
scores
*
tf
.
cast
(
tf
.
reshape
(
tf
.
range
(
max_output_size
),
[
1
,
-
1
])
<
tf
.
reshape
(
output_size
,
[
-
1
,
1
]),
scores
.
dtype
)
return
scores
,
boxes
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/postprocess_ops.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Ops used to post-process raw detections."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
mask_rcnn.utils
import
box_utils
def
generate_detections_per_image_tpu
(
cls_outputs
,
box_outputs
,
anchor_boxes
,
image_info
,
pre_nms_num_detections
=
1000
,
post_nms_num_detections
=
100
,
nms_threshold
=
0.3
,
bbox_reg_weights
=
(
10.
,
10.
,
5.
,
5.
)):
"""Generate the final detections per image given the model outputs.
Args:
cls_outputs: a tensor with shape [N, num_classes], which stacks class
logit outputs on all feature levels. The N is the number of total anchors
on all levels. The num_classes is the number of classes predicted by the
model. Note that the cls_outputs should be the output of softmax().
box_outputs: a tensor with shape [N, num_classes*4], which stacks box
regression outputs on all feature levels. The N is the number of total
anchors on all levels.
anchor_boxes: a tensor with shape [N, 4], which stacks anchors on all
feature levels. The N is the number of total anchors on all levels.
image_info: a tensor of shape [5] which encodes the input image's [height,
width, scale, original_height, original_width]
pre_nms_num_detections: an integer that specifies the number of candidates
before NMS.
post_nms_num_detections: an integer that specifies the number of candidates
after NMS.
nms_threshold: a float number to specify the IOU threshold of NMS.
bbox_reg_weights: a list of 4 float scalars, which are default weights on
(dx, dy, dw, dh) for normalizing bbox regression targets.
Returns:
detections: Tuple of tensors corresponding to number of valid boxes,
box coordinates, object categories for each boxes, and box scores
-- respectively.
"""
num_boxes
,
num_classes
=
cls_outputs
.
get_shape
().
as_list
()
# Remove background class scores.
cls_outputs
=
cls_outputs
[:,
1
:
num_classes
]
top_k_scores
,
top_k_indices_with_classes
=
tf
.
nn
.
top_k
(
tf
.
reshape
(
cls_outputs
,
[
-
1
]),
k
=
pre_nms_num_detections
,
sorted
=
False
)
classes
=
tf
.
math
.
mod
(
top_k_indices_with_classes
,
num_classes
-
1
)
top_k_indices
=
tf
.
math
.
floordiv
(
top_k_indices_with_classes
,
num_classes
-
1
)
anchor_boxes
=
tf
.
gather
(
anchor_boxes
,
top_k_indices
)
box_outputs
=
tf
.
reshape
(
box_outputs
,
[
num_boxes
,
num_classes
,
4
])[:,
1
:
num_classes
,
:]
class_indices
=
classes
box_outputs
=
tf
.
gather_nd
(
box_outputs
,
tf
.
stack
([
top_k_indices
,
class_indices
],
axis
=
1
))
# apply bounding box regression to anchors
boxes
=
box_utils
.
decode_boxes
(
box_outputs
,
anchor_boxes
,
bbox_reg_weights
)
boxes
=
box_utils
.
clip_boxes
(
boxes
,
image_info
[
0
],
image_info
[
1
])
list_of_all_boxes
=
[]
list_of_all_scores
=
[]
list_of_all_classes
=
[]
# Skip background class.
for
class_i
in
range
(
num_classes
):
# Compute bitmask for the given classes.
class_i_bitmask
=
tf
.
cast
(
tf
.
equal
(
classes
,
class_i
),
top_k_scores
.
dtype
)
# This works because score is in [0, 1].
class_i_scores
=
top_k_scores
*
class_i_bitmask
# The TPU and CPU have different behaviors for
# tf.image.non_max_suppression_padded (b/116754376).
class_i_post_nms_indices
,
class_i_nms_num_valid
=
tf
.
image
.
non_max_suppression_padded
(
tf
.
cast
(
boxes
,
dtype
=
tf
.
float32
),
tf
.
cast
(
class_i_scores
,
dtype
=
tf
.
float32
),
post_nms_num_detections
,
iou_threshold
=
nms_threshold
,
score_threshold
=
0.05
,
pad_to_max_output_size
=
True
,
name
=
'nms_detections_'
+
str
(
class_i
)
)
class_i_post_nms_boxes
=
tf
.
gather
(
boxes
,
class_i_post_nms_indices
)
class_i_post_nms_scores
=
tf
.
gather
(
class_i_scores
,
class_i_post_nms_indices
)
mask
=
tf
.
less
(
tf
.
range
(
post_nms_num_detections
),
[
class_i_nms_num_valid
])
class_i_post_nms_scores
=
tf
.
where
(
mask
,
class_i_post_nms_scores
,
tf
.
zeros_like
(
class_i_post_nms_scores
)
)
class_i_classes
=
tf
.
fill
(
tf
.
shape
(
input
=
class_i_post_nms_scores
),
class_i
+
1
)
list_of_all_boxes
.
append
(
class_i_post_nms_boxes
)
list_of_all_scores
.
append
(
class_i_post_nms_scores
)
list_of_all_classes
.
append
(
class_i_classes
)
post_nms_boxes
=
tf
.
concat
(
list_of_all_boxes
,
axis
=
0
)
post_nms_scores
=
tf
.
concat
(
list_of_all_scores
,
axis
=
0
)
post_nms_classes
=
tf
.
concat
(
list_of_all_classes
,
axis
=
0
)
# sort all results.
post_nms_scores
,
sorted_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
post_nms_scores
,
dtype
=
tf
.
float32
),
k
=
post_nms_num_detections
,
sorted
=
True
)
post_nms_boxes
=
tf
.
gather
(
post_nms_boxes
,
sorted_indices
)
post_nms_classes
=
tf
.
gather
(
post_nms_classes
,
sorted_indices
)
valid_mask
=
tf
.
where
(
tf
.
greater
(
post_nms_scores
,
0
),
tf
.
ones_like
(
post_nms_scores
),
tf
.
zeros_like
(
post_nms_scores
)
)
num_valid_boxes
=
tf
.
reduce_sum
(
input_tensor
=
valid_mask
,
axis
=-
1
)
box_classes
=
tf
.
cast
(
post_nms_classes
,
dtype
=
tf
.
float32
)
return
num_valid_boxes
,
post_nms_boxes
,
box_classes
,
post_nms_scores
def
generate_detections_tpu
(
class_outputs
,
box_outputs
,
anchor_boxes
,
image_info
,
pre_nms_num_detections
=
1000
,
post_nms_num_detections
=
100
,
nms_threshold
=
0.3
,
bbox_reg_weights
=
(
10.
,
10.
,
5.
,
5.
)
):
"""Generate the final detections given the model outputs (TPU version).
Args:
class_outputs: a tensor with shape [batch_size, N, num_classes], which
stacks class logit outputs on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
box_outputs: a tensor with shape [batch_size, N, num_classes*4], which
stacks box regression outputs on all feature levels. The N is the number
of total anchors on all levels.
anchor_boxes: a tensor with shape [batch_size, N, 4], which stacks anchors
on all feature levels. The N is the number of total anchors on all levels.
image_info: a tensor of shape [batch_size, 5] which encodes each image's
[height, width, scale, original_height, original_width].
pre_nms_num_detections: an integer that specifies the number of candidates
before NMS.
post_nms_num_detections: an integer that specifies the number of candidates
after NMS.
nms_threshold: a float number to specify the IOU threshold of NMS.
bbox_reg_weights: a list of 4 float scalars, which are default weights on
(dx, dy, dw, dh) for normalizing bbox regression targets.
Returns:
a tuple of tensors corresponding to number of valid boxes,
box coordinates, object categories for each boxes, and box scores stacked
in batch_size.
"""
with
tf
.
name_scope
(
'generate_detections'
):
batch_size
,
_
,
_
=
class_outputs
.
get_shape
().
as_list
()
softmax_class_outputs
=
tf
.
nn
.
softmax
(
class_outputs
)
num_valid_boxes
,
box_coordinates
,
box_classes
,
box_scores
=
([],
[],
[],
[])
for
i
in
range
(
batch_size
):
result
=
generate_detections_per_image_tpu
(
softmax_class_outputs
[
i
],
box_outputs
[
i
],
anchor_boxes
[
i
],
image_info
[
i
],
pre_nms_num_detections
,
post_nms_num_detections
,
nms_threshold
,
bbox_reg_weights
)
num_valid_boxes
.
append
(
result
[
0
])
box_coordinates
.
append
(
result
[
1
])
box_classes
.
append
(
result
[
2
])
box_scores
.
append
(
result
[
3
])
num_valid_boxes
=
tf
.
stack
(
num_valid_boxes
)
box_coordinates
=
tf
.
stack
(
box_coordinates
)
box_classes
=
tf
.
stack
(
box_classes
)
box_scores
=
tf
.
stack
(
box_scores
)
return
num_valid_boxes
,
box_coordinates
,
box_classes
,
box_scores
def
generate_detections_gpu
(
class_outputs
,
box_outputs
,
anchor_boxes
,
image_info
,
pre_nms_num_detections
=
1000
,
post_nms_num_detections
=
100
,
nms_threshold
=
0.3
,
bbox_reg_weights
=
(
10.
,
10.
,
5.
,
5.
)
):
"""Generate the final detections given the model outputs (GPU version).
Args:
class_outputs: a tensor with shape [batch_size, N, num_classes], which
stacks class logit outputs on all feature levels. The N is the number of
total anchors on all levels. The num_classes is the number of classes
predicted by the model. Note that the class_outputs here is the raw score.
box_outputs: a tensor with shape [batch_size, N, num_classes*4], which
stacks box regression outputs on all feature levels. The N is the number
of total anchors on all levels.
anchor_boxes: a tensor with shape [batch_size, N, 4], which stacks anchors
on all feature levels. The N is the number of total anchors on all levels.
image_info: a tensor of shape [batch_size, 5] which encodes each image's
[height, width, scale, original_height, original_width].
pre_nms_num_detections: an integer that specifies the number of candidates
before NMS.
post_nms_num_detections: an integer that specifies the number of candidates
after NMS.
nms_threshold: a float number to specify the IOU threshold of NMS.
bbox_reg_weights: a list of 4 float scalars, which are default weights on
(dx, dy, dw, dh) for normalizing bbox regression targets.
Returns:
a tuple of tensors corresponding to number of valid boxes,
box coordinates, object categories for each boxes, and box scores stacked
in batch_size.
"""
with
tf
.
name_scope
(
'generate_detections'
):
batch_size
,
num_boxes
,
num_classes
=
class_outputs
.
get_shape
().
as_list
()
softmax_class_outputs
=
tf
.
nn
.
softmax
(
class_outputs
)
# Remove background
scores
=
tf
.
slice
(
softmax_class_outputs
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
boxes
=
tf
.
slice
(
tf
.
reshape
(
box_outputs
,
[
batch_size
,
num_boxes
,
num_classes
,
4
]),
[
0
,
0
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
]
)
anchor_boxes
=
tf
.
expand_dims
(
anchor_boxes
,
axis
=
2
)
*
tf
.
ones
([
1
,
1
,
num_classes
-
1
,
1
])
num_detections
=
num_boxes
*
(
num_classes
-
1
)
boxes
=
tf
.
reshape
(
boxes
,
[
batch_size
,
num_detections
,
4
])
scores
=
tf
.
reshape
(
scores
,
[
batch_size
,
num_detections
,
1
])
anchor_boxes
=
tf
.
reshape
(
anchor_boxes
,
[
batch_size
,
num_detections
,
4
])
# Decode
boxes
=
box_utils
.
decode_boxes
(
boxes
,
anchor_boxes
,
bbox_reg_weights
)
# Clip boxes
height
=
tf
.
expand_dims
(
image_info
[:,
0
:
1
],
axis
=-
1
)
width
=
tf
.
expand_dims
(
image_info
[:,
1
:
2
],
axis
=-
1
)
boxes
=
box_utils
.
clip_boxes
(
boxes
,
height
,
width
)
# NMS
pre_nms_boxes
=
box_utils
.
to_normalized_coordinates
(
boxes
,
height
,
width
)
pre_nms_boxes
=
tf
.
reshape
(
pre_nms_boxes
,
[
batch_size
,
num_boxes
,
num_classes
-
1
,
4
])
pre_nms_scores
=
tf
.
reshape
(
scores
,
[
batch_size
,
num_boxes
,
num_classes
-
1
])
post_nms_boxes
,
post_nms_scores
,
post_nms_classes
,
\
post_nms_num_valid_boxes
=
tf
.
image
.
combined_non_max_suppression
(
pre_nms_boxes
,
pre_nms_scores
,
max_output_size_per_class
=
pre_nms_num_detections
,
max_total_size
=
post_nms_num_detections
,
iou_threshold
=
nms_threshold
,
score_threshold
=
0.0
,
pad_per_class
=
False
)
post_nms_classes
=
post_nms_classes
+
1
post_nms_boxes
=
box_utils
.
to_absolute_coordinates
(
post_nms_boxes
,
height
,
width
)
return
post_nms_num_valid_boxes
,
post_nms_boxes
,
tf
.
cast
(
post_nms_classes
,
dtype
=
tf
.
float32
),
post_nms_scores
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/preprocess_ops.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Preprocessing ops."""
import
math
import
tensorflow
as
tf
from
mask_rcnn.object_detection
import
preprocessor
def
normalize_image
(
image
):
"""Normalize the image.
Args:
image: a tensor of shape [height, width, 3] in dtype=tf.float32.
Returns:
normalized_image: a tensor which has the same shape and dtype as image,
with pixel values normalized.
"""
offset
=
tf
.
constant
([
0.485
,
0.456
,
0.406
])
offset
=
tf
.
reshape
(
offset
,
shape
=
(
1
,
1
,
3
))
scale
=
tf
.
constant
([
0.229
,
0.224
,
0.225
])
scale
=
tf
.
reshape
(
scale
,
shape
=
(
1
,
1
,
3
))
normalized_image
=
(
image
-
offset
)
/
scale
return
normalized_image
def
random_horizontal_flip
(
image
,
boxes
=
None
,
masks
=
None
,
seed
=
None
):
"""Random horizontal flip the image, boxes, and masks.
Args:
image: a tensor of shape [height, width, 3] representing the image.
boxes: (Optional) a tensor of shape [num_boxes, 4] represneting the box
corners in normalized coordinates.
masks: (Optional) a tensor of shape [num_masks, height, width]
representing the object masks. Note that the size of the mask is the
same as the image.
Returns:
image: the processed image tensor after being randomly flipped.
boxes: None or the processed box tensor after being randomly flipped.
masks: None or the processed mask tensor after being randomly flipped.
"""
return
preprocessor
.
random_horizontal_flip
(
image
,
boxes
,
masks
,
seed
=
seed
)
def
resize_and_pad
(
image
,
target_size
,
stride
,
boxes
=
None
,
masks
=
None
):
"""Resize and pad images, boxes and masks.
Resize and pad images, (optionally boxes and masks) given the desired output
size of the image and stride size.
Here are the preprocessing steps.
1. For a given image, keep its aspect ratio and rescale the image to make it
the largest rectangle to be bounded by the rectangle specified by the
`target_size`.
2. Pad the rescaled image such that the height and width of the image become
the smallest multiple of the stride that is larger or equal to the desired
output diemension.
Args:
image: an image tensor of shape [original_height, original_width, 3].
target_size: a tuple of two integers indicating the desired output
image size. Note that the actual output size could be different from this.
stride: the stride of the backbone network. Each of the output image sides
must be the multiple of this.
boxes: (Optional) a tensor of shape [num_boxes, 4] represneting the box
corners in normalized coordinates.
masks: (Optional) a tensor of shape [num_masks, height, width]
representing the object masks. Note that the size of the mask is the
same as the image.
Returns:
image: the processed image tensor after being resized and padded.
image_info: a tensor of shape [5] which encodes the height, width before
and after resizing and the scaling factor.
boxes: None or the processed box tensor after being resized and padded.
After the processing, boxes will be in the absolute coordinates w.r.t.
the scaled image.
masks: None or the processed mask tensor after being resized and padded.
"""
input_height
,
input_width
,
_
=
tf
.
unstack
(
tf
.
cast
(
tf
.
shape
(
input
=
image
),
dtype
=
tf
.
float32
),
axis
=
0
)
target_height
,
target_width
=
target_size
scale_if_resize_height
=
target_height
/
input_height
scale_if_resize_width
=
target_width
/
input_width
scale
=
tf
.
minimum
(
scale_if_resize_height
,
scale_if_resize_width
)
scaled_height
=
tf
.
cast
(
scale
*
input_height
,
dtype
=
tf
.
int32
)
scaled_width
=
tf
.
cast
(
scale
*
input_width
,
dtype
=
tf
.
int32
)
image
=
tf
.
image
.
resize
(
image
,
[
scaled_height
,
scaled_width
],
method
=
tf
.
image
.
ResizeMethod
.
BILINEAR
)
padded_height
=
int
(
math
.
ceil
(
target_height
*
1.0
/
stride
)
*
stride
)
padded_width
=
int
(
math
.
ceil
(
target_width
*
1.0
/
stride
)
*
stride
)
image
=
tf
.
image
.
pad_to_bounding_box
(
image
,
0
,
0
,
padded_height
,
padded_width
)
image
.
set_shape
([
padded_height
,
padded_width
,
3
])
image_info
=
tf
.
stack
([
tf
.
cast
(
scaled_height
,
dtype
=
tf
.
float32
),
tf
.
cast
(
scaled_width
,
dtype
=
tf
.
float32
),
1.0
/
scale
,
input_height
,
input_width
]
)
if
boxes
is
not
None
:
normalized_box_list
=
preprocessor
.
box_list
.
BoxList
(
boxes
)
scaled_boxes
=
preprocessor
.
box_list_scale
(
normalized_box_list
,
scaled_height
,
scaled_width
).
get
()
else
:
scaled_boxes
=
None
if
masks
is
not
None
:
scaled_masks
=
tf
.
image
.
resize
(
tf
.
expand_dims
(
masks
,
-
1
),
[
scaled_height
,
scaled_width
],
method
=
tf
.
image
.
ResizeMethod
.
NEAREST_NEIGHBOR
)
# Check if there is any instance in this image or not.
num_masks
=
tf
.
shape
(
input
=
scaled_masks
)[
0
]
scaled_masks
=
tf
.
cond
(
pred
=
tf
.
greater
(
num_masks
,
0
),
true_fn
=
lambda
:
tf
.
image
.
pad_to_bounding_box
(
scaled_masks
,
0
,
0
,
padded_height
,
padded_width
),
false_fn
=
lambda
:
tf
.
zeros
([
0
,
padded_height
,
padded_width
,
1
])
)
else
:
scaled_masks
=
None
return
image
,
image_info
,
scaled_boxes
,
scaled_masks
def
crop_gt_masks
(
instance_masks
,
boxes
,
gt_mask_size
,
image_size
):
"""Crops the ground truth binary masks and resize to fixed-size masks."""
num_masks
=
tf
.
shape
(
input
=
instance_masks
)[
0
]
scale_sizes
=
tf
.
convert_to_tensor
(
value
=
[
image_size
[
0
],
image_size
[
1
]]
*
2
,
dtype
=
tf
.
float32
)
boxes
=
boxes
/
scale_sizes
cropped_gt_masks
=
tf
.
image
.
crop_and_resize
(
image
=
instance_masks
,
boxes
=
boxes
,
box_indices
=
tf
.
range
(
num_masks
,
dtype
=
tf
.
int32
),
crop_size
=
[
gt_mask_size
,
gt_mask_size
],
method
=
'bilinear'
)[:,
:,
:,
0
]
cropped_gt_masks
=
tf
.
pad
(
tensor
=
cropped_gt_masks
,
paddings
=
tf
.
constant
([[
0
,
0
],
[
2
,
2
],
[
2
,
2
]]),
mode
=
'CONSTANT'
,
constant_values
=
0.
)
return
cropped_gt_masks
def
pad_to_fixed_size
(
data
,
pad_value
,
output_shape
):
"""Pad data to a fixed length at the first dimension.
Args:
data: Tensor to be padded to output_shape.
pad_value: A constant value assigned to the paddings.
output_shape: The output shape of a 2D tensor.
Returns:
The Padded tensor with output_shape [max_num_instances, dimension].
"""
max_num_instances
=
output_shape
[
0
]
dimension
=
output_shape
[
1
]
data
=
tf
.
reshape
(
data
,
[
-
1
,
dimension
])
num_instances
=
tf
.
shape
(
input
=
data
)[
0
]
pad_length
=
max_num_instances
-
num_instances
paddings
=
pad_value
*
tf
.
ones
([
pad_length
,
dimension
])
padded_data
=
tf
.
reshape
(
tf
.
concat
([
data
,
paddings
],
axis
=
0
),
output_shape
)
return
padded_data
TensorFlow2x/ComputeVision/Detection/MaskRCNN/mask_rcnn/ops/roi_ops.py
0 → 100644
View file @
d0d91e12
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""ROI-related ops."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
tensorflow
as
tf
from
mask_rcnn.utils.logging_formatter
import
logging
# from absl import logging
from
mask_rcnn.utils
import
box_utils
from
mask_rcnn.ops
import
nms_ops
# TODO: Remove when Batched NMS stop leading to eval metrics being all 0
def
_propose_rois_tpu
(
scores
,
boxes
,
anchor_boxes
,
height
,
width
,
scale
,
rpn_pre_nms_topn
,
rpn_post_nms_topn
,
rpn_nms_threshold
,
rpn_min_size
,
bbox_reg_weights
):
"""Proposes RoIs giva group of candidates (TPU version).
Args:
scores: a tensor with a shape of [batch_size, num_boxes].
boxes: a tensor with a shape of [batch_size, num_boxes, 4],
in the encoded form.
anchor_boxes: an Anchors object that contains the anchors with a shape of
[batch_size, num_boxes, 4].
height: a tensor of shape [batch_size, 1, 1] representing the image height.
width: a tensor of shape [batch_size, 1, 1] representing the image width.
scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
before applying NMS. This is *per FPN level* (not total).
rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
after applying NMS. This is the total number of RPN proposals produced.
rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
used on RPN proposals.
rpn_min_size: a integer number as the minimum proposal height and width as
both need to be greater than this number. Note that this number is at
origingal image scale; not scale used during training or inference).
bbox_reg_weights: None or a list of four integer specifying the weights used
when decoding the box.
Returns:
scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
representing the scores of the proposals. It has same dtype as input
scores.
boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
represneting the boxes of the proposals. The boxes are in normalized
coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
input boxes.
"""
_
,
num_boxes
=
scores
.
get_shape
().
as_list
()
topk_limit
=
num_boxes
if
num_boxes
<
rpn_pre_nms_topn
else
rpn_pre_nms_topn
scores
,
boxes_list
=
box_utils
.
top_k
(
scores
,
k
=
topk_limit
,
boxes_list
=
[
boxes
,
anchor_boxes
])
boxes
=
boxes_list
[
0
]
anchor_boxes
=
boxes_list
[
1
]
# Decode boxes w.r.t. anchors and transform to the absoluate coordinates.
boxes
=
box_utils
.
decode_boxes
(
boxes
,
anchor_boxes
,
bbox_reg_weights
)
# Clip boxes that exceed the boundary.
boxes
=
box_utils
.
clip_boxes
(
boxes
,
height
,
width
)
# Filter boxes that one side is less than rpn_min_size threshold.
boxes
,
scores
=
box_utils
.
filter_boxes
(
boxes
,
tf
.
expand_dims
(
scores
,
axis
=-
1
),
rpn_min_size
,
height
,
width
,
scale
)
scores
=
tf
.
squeeze
(
scores
,
axis
=-
1
)
post_nms_topk_limit
=
topk_limit
if
topk_limit
<
rpn_post_nms_topn
else
rpn_post_nms_topn
# NMS.
if
rpn_nms_threshold
>
0
:
scores
,
boxes
=
box_utils
.
sorted_non_max_suppression_padded
(
scores
,
boxes
,
max_output_size
=
post_nms_topk_limit
,
iou_threshold
=
rpn_nms_threshold
)
# Pick top-K post NMS'ed boxes.
scores
,
boxes
=
box_utils
.
top_k
(
scores
,
k
=
post_nms_topk_limit
,
boxes_list
=
[
boxes
])
boxes
=
boxes
[
0
]
return
scores
,
boxes
def
_propose_rois_gpu
(
scores
,
boxes
,
anchor_boxes
,
height
,
width
,
scale
,
rpn_pre_nms_topn
,
rpn_post_nms_topn
,
rpn_nms_threshold
,
rpn_min_size
,
bbox_reg_weights
):
"""Proposes RoIs giva group of candidates (GPU version).
Args:
scores: a tensor with a shape of [batch_size, num_boxes].
boxes: a tensor with a shape of [batch_size, num_boxes, 4],
in the encoded form.
anchor_boxes: an Anchors object that contains the anchors with a shape of
[batch_size, num_boxes, 4].
height: a tensor of shape [batch_size, 1, 1] representing the image height.
width: a tensor of shape [batch_size, 1, 1] representing the image width.
scale: a tensor of shape [batch_size, 1, 1] representing the image scale.
rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
before applying NMS. This is *per FPN level* (not total).
rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
after applying NMS. This is the total number of RPN proposals produced.
rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
used on RPN proposals.
rpn_min_size: a integer number as the minimum proposal height and width as
both need to be greater than this number. Note that this number is at
origingal image scale; not scale used during training or inference).
bbox_reg_weights: None or a list of four integer specifying the weights used
when decoding the box.
Returns:
scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
representing the scores of the proposals. It has same dtype as input
scores.
boxes: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
represneting the boxes of the proposals. The boxes are in normalized
coordinates with a form of [ymin, xmin, ymax, xmax]. It has same dtype as
input boxes.
"""
batch_size
,
num_boxes
=
scores
.
get_shape
().
as_list
()
topk_limit
=
min
(
num_boxes
,
rpn_pre_nms_topn
)
boxes
=
box_utils
.
decode_boxes
(
boxes
,
anchor_boxes
,
bbox_reg_weights
)
boxes
=
box_utils
.
clip_boxes
(
boxes
,
height
,
width
)
if
rpn_min_size
>
0.0
:
boxes
,
scores
=
box_utils
.
filter_boxes
(
boxes
,
tf
.
expand_dims
(
scores
,
axis
=-
1
),
rpn_min_size
,
height
,
width
,
scale
)
scores
=
tf
.
squeeze
(
scores
,
axis
=-
1
)
post_nms_topk_limit
=
topk_limit
if
topk_limit
<
rpn_post_nms_topn
else
rpn_post_nms_topn
if
rpn_nms_threshold
>
0
:
# Normalize coordinates as combined_non_max_suppression currently
# only support normalized coordinates.
pre_nms_boxes
=
box_utils
.
to_normalized_coordinates
(
boxes
,
height
,
width
)
pre_nms_boxes
=
tf
.
reshape
(
pre_nms_boxes
,
[
batch_size
,
num_boxes
,
1
,
4
])
pre_nms_scores
=
tf
.
reshape
(
scores
,
[
batch_size
,
num_boxes
,
1
])
with
tf
.
device
(
'CPU:0'
):
boxes
,
scores
,
_
,
_
=
tf
.
image
.
combined_non_max_suppression
(
pre_nms_boxes
,
pre_nms_scores
,
max_output_size_per_class
=
topk_limit
,
max_total_size
=
post_nms_topk_limit
,
iou_threshold
=
rpn_nms_threshold
,
score_threshold
=
0.0
,
pad_per_class
=
False
)
boxes
=
box_utils
.
to_absolute_coordinates
(
boxes
,
height
,
width
)
else
:
scores
,
boxes
=
box_utils
.
top_k
(
scores
,
k
=
post_nms_topk_limit
,
boxes_list
=
[
boxes
])
boxes
=
boxes
[
0
]
return
scores
,
boxes
def
multilevel_propose_rois
(
scores_outputs
,
box_outputs
,
all_anchors
,
image_info
,
rpn_pre_nms_topn
,
rpn_post_nms_topn
,
rpn_nms_threshold
,
rpn_min_size
,
bbox_reg_weights
,
use_batched_nms
=
False
):
"""Proposes RoIs given a group of candidates from different FPN levels.
Args:
scores_outputs: an OrderDict with keys representing levels and values
representing logits in [batch_size, height, width, num_anchors].
box_outputs: an OrderDict with keys representing levels and values
representing box regression targets in
[batch_size, height, width, num_anchors * 4]
all_anchors: an Anchors object that contains the all anchors.
image_info: a tensor of shape [batch_size, 5] where the three columns
encode the input image's [height, width, scale,
original_height, original_width]. Height and width are for
the input to the network, not the original image; scale is the scale
factor used to scale the network input size to the original image size.
See dataloader.DetectionInputProcessor for details. The last two are
original height and width. See dataloader.DetectionInputProcessor for
details.
rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
before applying NMS. This is *per FPN level* (not total).
rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
after applying NMS. This is the total number of RPN proposals produced.
rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
used on RPN proposals.
rpn_min_size: a integer number as the minimum proposal height and width as
both need to be greater than this number. Note that this number is at
origingal image scale; not scale used during training or inference).
bbox_reg_weights: None or a list of four integer specifying the weights used
when decoding the box.
use_batched_nms: whether use batched nms. The batched nms will use
tf.combined_non_max_suppression, which is only available for CPU/GPU.
Returns:
scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
representing the scores of the proposals.
rois: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
representing the boxes of the proposals. The boxes are in normalized
coordinates with a form of [ymin, xmin, ymax, xmax].
"""
with
tf
.
name_scope
(
'multilevel_propose_rois'
):
levels
=
scores_outputs
.
keys
()
scores
=
[]
rois
=
[]
anchor_boxes
=
all_anchors
.
get_unpacked_boxes
()
height
=
tf
.
expand_dims
(
image_info
[:,
0
:
1
],
axis
=-
1
)
width
=
tf
.
expand_dims
(
image_info
[:,
1
:
2
],
axis
=-
1
)
scale
=
tf
.
expand_dims
(
image_info
[:,
2
:
3
],
axis
=-
1
)
for
level
in
levels
:
with
tf
.
name_scope
(
'level_%d'
%
level
)
as
scope
:
batch_size
,
feature_h
,
feature_w
,
num_anchors_per_location
=
scores_outputs
[
level
].
get_shape
().
as_list
()
num_boxes
=
feature_h
*
feature_w
*
num_anchors_per_location
this_level_scores
=
tf
.
reshape
(
scores_outputs
[
level
],
[
batch_size
,
num_boxes
])
this_level_scores
=
tf
.
sigmoid
(
this_level_scores
)
this_level_boxes
=
tf
.
reshape
(
box_outputs
[
level
],
[
batch_size
,
num_boxes
,
4
])
this_level_anchors
=
tf
.
cast
(
tf
.
reshape
(
tf
.
expand_dims
(
anchor_boxes
[
level
],
axis
=
0
)
*
tf
.
ones
([
batch_size
,
1
,
1
,
1
]),
[
batch_size
,
num_boxes
,
4
]
),
dtype
=
this_level_scores
.
dtype
)
# TODO: Remove when Batched NMS stop leading to eval metrics being all 0
# commented out because scope no longer exists
if
use_batched_nms
:
logging
.
info
(
"[ROI OPs] Using Batched NMS... Scope: %s"
%
scope
)
propose_rois_fn
=
_propose_rois_gpu
else
:
logging
.
debug
(
"[ROI OPs] Not Using Batched NMS... Scope: %s"
%
scope
)
propose_rois_fn
=
_propose_rois_tpu
this_level_scores
,
this_level_boxes
=
propose_rois_fn
(
this_level_scores
,
this_level_boxes
,
this_level_anchors
,
height
,
width
,
scale
,
rpn_pre_nms_topn
,
rpn_post_nms_topn
,
rpn_nms_threshold
,
rpn_min_size
,
bbox_reg_weights
)
scores
.
append
(
this_level_scores
)
rois
.
append
(
this_level_boxes
)
scores
=
tf
.
concat
(
scores
,
axis
=
1
)
rois
=
tf
.
concat
(
rois
,
axis
=
1
)
with
tf
.
name_scope
(
'roi_post_nms_topk'
):
post_nms_num_anchors
=
scores
.
shape
[
1
]
post_nms_topk_limit
=
min
(
post_nms_num_anchors
,
rpn_post_nms_topn
)
top_k_scores
,
top_k_rois
=
box_utils
.
top_k
(
scores
,
k
=
post_nms_topk_limit
,
boxes_list
=
[
rois
]
)
top_k_rois
=
top_k_rois
[
0
]
return
top_k_scores
,
top_k_rois
def
custom_multilevel_propose_rois
(
scores_outputs
,
box_outputs
,
all_anchors
,
image_info
,
rpn_pre_nms_topn
,
rpn_post_nms_topn
,
rpn_nms_threshold
,
rpn_min_size
):
"""Proposes RoIs for the second stage nets.
This proposal op performs the following operations.
1. propose rois at each level.
2. collect all proposals.
3. keep rpn_post_nms_topn proposals by their sorted scores from the highest
to the lowest.
Reference:
https://github.com/facebookresearch/Detectron/blob/master/detectron/ops/collect_and_distribute_fpn_rpn_proposals.py
Args:
scores_outputs: an OrderDict with keys representing levels and values
representing logits in [batch_size, height, width, num_anchors].
box_outputs: an OrderDict with keys representing levels and values
representing box regression targets in
[batch_size, height, width, num_anchors * 4]
all_anchors: an Anchors object that contains the all anchors.
image_info: a tensor of shape [batch_size, 5] where the three columns
encode the input image's [height, width, scale,
original_height, original_width]. Height and width are for
the input to the network, not the original image; scale is the scale
factor used to scale the network input size to the original image size.
See dataloader.DetectionInputProcessor for details. The last two are
original height and width. See dataloader.DetectionInputProcessor for
details.
rpn_pre_nms_topn: a integer number of top scoring RPN proposals to keep
before applying NMS. This is *per FPN level* (not total).
rpn_post_nms_topn: a integer number of top scoring RPN proposals to keep
after applying NMS. This is the total number of RPN proposals produced.
rpn_nms_threshold: a float number between 0 and 1 as the NMS threshold
used on RPN proposals.
rpn_min_size: a integer number as the minimum proposal height and width as
both need to be greater than this number. Note that this number is at
origingal image scale; not scale used during training or inference).
Returns:
scores: a tensor with a shape of [batch_size, rpn_post_nms_topn, 1]
representing the scores of the proposals.
rois: a tensor with a shape of [batch_size, rpn_post_nms_topn, 4]
representing the boxes of the proposals. The boxes are in normalized
coordinates with a form of [ymin, xmin, ymax, xmax].
"""
with
tf
.
name_scope
(
'proposal'
):
levels
=
scores_outputs
.
keys
()
scores
=
[]
rois
=
[]
anchor_boxes
=
all_anchors
.
get_unpacked_boxes
()
for
level
in
levels
:
# Expands the batch dimension for anchors as anchors do not have batch
# dimension. Note that batch_size is invariant across levels.
# batch_size = scores_outputs[level].shape[0]
# anchor_boxes_batch = tf.cast(
# tf.tile(tf.expand_dims(anchor_boxes[level], axis=0),
# [batch_size, 1, 1, 1]),
# dtype=scores_outputs[level].dtype)
logging
.
debug
(
"[ROI OPs] Using GenerateBoxProposals op... Scope: proposal_%s"
%
level
)
boxes_per_level
,
scores_per_level
=
tf
.
generate_bounding_box_proposals
(
scores
=
tf
.
reshape
(
tf
.
sigmoid
(
scores_outputs
[
level
]),
scores_outputs
[
level
].
shape
),
bbox_deltas
=
box_outputs
[
level
],
image_info
=
image_info
,
anchors
=
anchor_boxes
[
level
],
pre_nms_topn
=
rpn_pre_nms_topn
,
post_nms_topn
=
rpn_post_nms_topn
,
nms_threshold
=
rpn_nms_threshold
,
min_size
=
rpn_min_size
,
name
=
"proposal_%s"
%
level
)
scores
.
append
(
scores_per_level
)
rois
.
append
(
boxes_per_level
)
# a,b=_proposal_op_per_level(
# scores_outputs[level], box_outputs[level], anchor_boxes_batch,
# image_info, rpn_pre_nms_topn, rpn_post_nms_topn, rpn_nms_threshold,
# rpn_min_size, level)
# print("SAMI Orig,",a,b,"ours=",scores_per_level,boxes_per_level,rpn_min_size,anchor_boxes)
scores
=
tf
.
concat
(
scores
,
axis
=
1
)
rois
=
tf
.
concat
(
rois
,
axis
=
1
)
with
tf
.
name_scope
(
'post_nms_topk'
):
# Selects the top-k rois, k being rpn_post_nms_topn or the number of total
# anchors after non-max suppression.
post_nms_num_anchors
=
scores
.
shape
[
1
]
post_nms_topk_limit
=
(
post_nms_num_anchors
if
post_nms_num_anchors
<
rpn_post_nms_topn
else
rpn_post_nms_topn
)
top_k_scores
,
top_k_rois
=
box_utils
.
top_k
(
scores
,
k
=
post_nms_topk_limit
,
boxes_list
=
[
rois
])
top_k_rois
=
top_k_rois
[
0
]
top_k_scores
=
tf
.
stop_gradient
(
top_k_scores
)
top_k_rois
=
tf
.
stop_gradient
(
top_k_rois
)
return
top_k_scores
,
top_k_rois
Prev
1
2
3
4
5
6
7
…
10
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment