Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
0ff25f6b
Commit
0ff25f6b
authored
Nov 24, 2020
by
A. Unique TensorFlower
Browse files
Internal change
PiperOrigin-RevId: 344134923
parent
5460577d
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
882 additions
and
0 deletions
+882
-0
official/vision/detection/dataloader/anchor.py
official/vision/detection/dataloader/anchor.py
+166
-0
official/vision/detection/dataloader/factory.py
official/vision/detection/dataloader/factory.py
+33
-0
official/vision/detection/dataloader/olnmask_parser.py
official/vision/detection/dataloader/olnmask_parser.py
+327
-0
official/vision/detection/utils/box_utils.py
official/vision/detection/utils/box_utils.py
+150
-0
official/vision/detection/utils/object_detection/target_assigner.py
...ision/detection/utils/object_detection/target_assigner.py
+206
-0
No files found.
official/vision/detection/dataloader/anchor.py
View file @
0ff25f6b
...
...
@@ -22,6 +22,7 @@ import collections
import
tensorflow
as
tf
from
official.vision
import
keras_cv
from
official.vision.detection.utils
import
box_utils
from
official.vision.detection.utils.object_detection
import
argmax_matcher
from
official.vision.detection.utils.object_detection
import
balanced_positive_negative_sampler
from
official.vision.detection.utils.object_detection
import
box_list
...
...
@@ -290,3 +291,168 @@ class RpnAnchorLabeler(AnchorLabeler):
box_targets_dict
=
self
.
_anchor
.
unpack_labels
(
box_targets
)
return
score_targets_dict
,
box_targets_dict
class
OlnAnchorLabeler
(
RpnAnchorLabeler
):
"""Labeler for Region Proposal Network."""
def
__init__
(
self
,
anchor
,
match_threshold
=
0.7
,
unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
,
has_centerness
=
False
,
center_match_iou_threshold
=
0.3
,
center_unmatched_iou_threshold
=
0.1
,
num_center_samples_per_im
=
256
):
"""Constructs rpn anchor labeler to assign labels and centerness to anchors.
Args:
anchor: an instance of class Anchors.
match_threshold: a float number between 0 and 1 representing the
lower-bound threshold to assign positive labels for anchors. An anchor
with a score over the threshold is labeled positive.
unmatched_threshold: a float number between 0 and 1 representing the
upper-bound threshold to assign negative labels for anchors. An anchor
with a score below the threshold is labeled negative.
rpn_batch_size_per_im: number of anchors that are sampled per image.
rpn_fg_fraction:
has_centerness: whether to include centerness target creation. An anchor
is paired with one centerness score.
center_match_iou_threshold: a float number between 0 and 1 representing
the lower-bound threshold to sample foreground anchors for centerness
regression. An anchor with a score over the threshold is sampled as
foreground sample for centerness regression. We sample mostly from the
foreground region (255 out of 256 samples). That is, we sample 255 vs 1
(foreground vs background) anchor points to learn centerness regression.
center_unmatched_iou_threshold: a float number between 0 and 1
representing the lower-bound threshold to sample background anchors for
centerness regression. An anchor with a score over the threshold is
sampled as foreground sample for centerness regression. We sample very
sparsely from the background region (1 out of 256 samples). That is, we
sample 255 vs 1 (foreground vs background) anchor points to learn
centerness regression.
num_center_samples_per_im: number of anchor points per image that are
sampled as centerness targets.
"""
super
(
OlnAnchorLabeler
,
self
).
__init__
(
anchor
,
match_threshold
=
match_threshold
,
unmatched_threshold
=
unmatched_threshold
,
rpn_batch_size_per_im
=
rpn_batch_size_per_im
,
rpn_fg_fraction
=
rpn_fg_fraction
)
similarity_calc
=
keras_cv
.
ops
.
IouSimilarity
()
matcher
=
argmax_matcher
.
ArgMaxMatcher
(
match_threshold
,
unmatched_threshold
=
unmatched_threshold
,
negatives_lower_than_unmatched
=
True
,
force_match_for_each_row
=
True
)
box_coder
=
faster_rcnn_box_coder
.
FasterRcnnBoxCoder
()
if
has_centerness
:
center_matcher
=
argmax_matcher
.
ArgMaxMatcher
(
center_match_iou_threshold
,
unmatched_threshold
=
center_match_iou_threshold
,
negatives_lower_than_unmatched
=
True
,
force_match_for_each_row
=
True
,)
else
:
center_matcher
=
None
self
.
_target_assigner
=
target_assigner
.
OlnTargetAssigner
(
similarity_calc
,
matcher
,
box_coder
,
center_matcher
=
center_matcher
)
self
.
_num_center_samples_per_im
=
num_center_samples_per_im
self
.
_center_unmatched_iou_threshold
=
center_unmatched_iou_threshold
self
.
_rpn_batch_size_per_im
=
rpn_batch_size_per_im
self
.
_rpn_fg_fraction
=
rpn_fg_fraction
def
label_anchors_lrtb
(
self
,
gt_boxes
,
gt_labels
):
"""Labels anchors with ground truth inputs.
Args:
gt_boxes: A float tensor with shape [N, 4] representing groundtruth boxes.
For each row, it stores [y0, x0, y1, x1] for four corners of a box.
gt_labels: A integer tensor with shape [N, 1] representing groundtruth
classes.
Returns:
score_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors]. The height_l and width_l
represent the dimension of class logits at l-th level.
box_targets_dict: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
lrtb_targets_dict: Same strucure to box_target_dict, except the regression
targets are converted from xyhw to lrtb format. Ordered dictionary with
keys [min_level, min_level+1, ..., max_level]. The values are tensor
with shape [height_l, width_l, num_anchors * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
center_targets_dict: Same structure to score_tragets_dict, except the
scores are centerness values ranging from 0 to 1. Ordered dictionary
with keys [min_level, min_level+1, ..., max_level]. The values are
tensor with shape [height_l, width_l, num_anchors]. The height_l and
width_l represent the dimension of class logits at l-th level.
"""
gt_box_list
=
box_list
.
BoxList
(
gt_boxes
)
anchor_box_list
=
box_list
.
BoxList
(
self
.
_anchor
.
boxes
)
# cls_targets, cls_weights, box_weights are not used.
(
_
,
_
,
box_targets
,
_
,
matches
,
matched_gt_box_list
,
matched_anchors_mask
,
center_matched_gt_box_list
,
center_matched_anchors_mask
,
matched_ious
)
=
self
.
_target_assigner
.
assign
(
anchor_box_list
,
gt_box_list
,
gt_labels
)
# Box lrtb_targets.
lrtb_targets
,
_
=
box_utils
.
encode_boxes_lrtb
(
matched_gt_box_list
.
data
[
'boxes'
],
anchor_box_list
.
data
[
'boxes'
],
weights
=
[
1.0
,
1.0
,
1.0
,
1.0
])
lrtb_sanity
=
tf
.
logical_and
(
tf
.
greater
(
tf
.
reduce_min
(
lrtb_targets
,
-
1
),
0.
),
matched_anchors_mask
)
# To broadcast lrtb_sanity to the same shape as lrtb_targets.
lrtb_sanity
=
tf
.
tile
(
tf
.
expand_dims
(
lrtb_sanity
,
1
),
[
1
,
tf
.
shape
(
lrtb_targets
)[
1
]])
lrtb_targets
=
tf
.
where
(
lrtb_sanity
,
lrtb_targets
,
tf
.
zeros_like
(
lrtb_targets
))
# RPN anchor-gtbox iou values.
iou_targets
=
tf
.
where
(
tf
.
greater
(
matched_ious
,
0.0
),
matched_ious
,
tf
.
zeros_like
(
matched_ious
))
# Centerness_targets.
_
,
center_targets
=
box_utils
.
encode_boxes_lrtb
(
center_matched_gt_box_list
.
data
[
'boxes'
],
anchor_box_list
.
data
[
'boxes'
],
weights
=
[
1.0
,
1.0
,
1.0
,
1.0
])
# Positive-negative centerness sampler.
num_center_samples_per_im
=
self
.
_num_center_samples_per_im
center_pos_neg_sampler
=
(
balanced_positive_negative_sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
(
1.
-
1.
/
num_center_samples_per_im
),
is_static
=
False
))
center_pos_neg_indicator
=
tf
.
logical_or
(
center_matched_anchors_mask
,
tf
.
less
(
iou_targets
,
self
.
_center_unmatched_iou_threshold
))
center_pos_labels
=
center_matched_anchors_mask
center_samples
=
center_pos_neg_sampler
.
subsample
(
center_pos_neg_indicator
,
num_center_samples_per_im
,
center_pos_labels
)
is_valid
=
center_samples
center_targets
=
tf
.
where
(
is_valid
,
center_targets
,
(
-
1
)
*
tf
.
ones_like
(
center_targets
))
# score_targets contains the subsampled positive and negative anchors.
score_targets
,
_
,
_
=
self
.
_get_rpn_samples
(
matches
.
match_results
)
# Unpacks labels.
score_targets_dict
=
self
.
_anchor
.
unpack_labels
(
score_targets
)
box_targets_dict
=
self
.
_anchor
.
unpack_labels
(
box_targets
)
lrtb_targets_dict
=
self
.
_anchor
.
unpack_labels
(
lrtb_targets
)
center_targets_dict
=
self
.
_anchor
.
unpack_labels
(
center_targets
)
return
(
score_targets_dict
,
box_targets_dict
,
lrtb_targets_dict
,
center_targets_dict
)
official/vision/detection/dataloader/factory.py
View file @
0ff25f6b
...
...
@@ -19,6 +19,7 @@ from __future__ import division
from
__future__
import
print_function
from
official.vision.detection.dataloader
import
maskrcnn_parser
from
official.vision.detection.dataloader
import
olnmask_parser
from
official.vision.detection.dataloader
import
retinanet_parser
from
official.vision.detection.dataloader
import
shapemask_parser
...
...
@@ -69,6 +70,38 @@ def parser_generator(params, mode):
mask_crop_size
=
parser_params
.
mask_crop_size
,
use_bfloat16
=
params
.
architecture
.
use_bfloat16
,
mode
=
mode
)
elif
params
.
architecture
.
parser
==
'olnmask_parser'
:
anchor_params
=
params
.
anchor
parser_params
=
params
.
olnmask_parser
parser_fn
=
olnmask_parser
.
Parser
(
output_size
=
parser_params
.
output_size
,
min_level
=
params
.
architecture
.
min_level
,
max_level
=
params
.
architecture
.
max_level
,
num_scales
=
anchor_params
.
num_scales
,
aspect_ratios
=
anchor_params
.
aspect_ratios
,
anchor_size
=
anchor_params
.
anchor_size
,
rpn_match_threshold
=
parser_params
.
rpn_match_threshold
,
rpn_unmatched_threshold
=
parser_params
.
rpn_unmatched_threshold
,
rpn_batch_size_per_im
=
parser_params
.
rpn_batch_size_per_im
,
rpn_fg_fraction
=
parser_params
.
rpn_fg_fraction
,
aug_rand_hflip
=
parser_params
.
aug_rand_hflip
,
aug_scale_min
=
parser_params
.
aug_scale_min
,
aug_scale_max
=
parser_params
.
aug_scale_max
,
skip_crowd_during_training
=
parser_params
.
skip_crowd_during_training
,
max_num_instances
=
parser_params
.
max_num_instances
,
include_mask
=
params
.
architecture
.
include_mask
,
mask_crop_size
=
parser_params
.
mask_crop_size
,
use_bfloat16
=
params
.
architecture
.
use_bfloat16
,
mode
=
mode
,
has_centerness
=
parser_params
.
has_centerness
,
rpn_center_match_iou_threshold
=
(
parser_params
.
rpn_center_match_iou_threshold
),
rpn_center_unmatched_iou_threshold
=
(
parser_params
.
rpn_center_unmatched_iou_threshold
),
rpn_num_center_samples_per_im
=
(
parser_params
.
rpn_num_center_samples_per_im
),
class_agnostic
=
parser_params
.
class_agnostic
,
train_class
=
parser_params
.
train_class
,)
elif
params
.
architecture
.
parser
==
'shapemask_parser'
:
anchor_params
=
params
.
anchor
parser_params
=
params
.
shapemask_parser
...
...
official/vision/detection/dataloader/olnmask_parser.py
0 → 100644
View file @
0ff25f6b
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Data parser and processing for Mask R-CNN."""
import
tensorflow
as
tf
from
official.vision.detection.dataloader
import
anchor
from
official.vision.detection.dataloader.maskrcnn_parser
import
Parser
as
MaskrcnnParser
from
official.vision.detection.utils
import
box_utils
from
official.vision.detection.utils
import
class_utils
from
official.vision.detection.utils
import
input_utils
class
Parser
(
MaskrcnnParser
):
"""Parser to parse an image and its annotations into a dictionary of tensors."""
def
__init__
(
self
,
output_size
,
min_level
,
max_level
,
num_scales
,
aspect_ratios
,
anchor_size
,
rpn_match_threshold
=
0.7
,
rpn_unmatched_threshold
=
0.3
,
rpn_batch_size_per_im
=
256
,
rpn_fg_fraction
=
0.5
,
aug_rand_hflip
=
False
,
aug_scale_min
=
1.0
,
aug_scale_max
=
1.0
,
skip_crowd_during_training
=
True
,
max_num_instances
=
100
,
include_mask
=
False
,
mask_crop_size
=
112
,
use_bfloat16
=
True
,
mode
=
None
,
# for centerness learning.
has_centerness
=
False
,
rpn_center_match_iou_threshold
=
0.3
,
rpn_center_unmatched_iou_threshold
=
0.1
,
rpn_num_center_samples_per_im
=
256
,
# for class manipulation.
class_agnostic
=
False
,
train_class
=
'all'
,
):
"""Initializes parameters for parsing annotations in the dataset.
Args:
output_size: `Tensor` or `list` for [height, width] of output image. The
output_size should be divided by the largest feature stride 2^max_level.
min_level: `int` number of minimum level of the output feature pyramid.
max_level: `int` number of maximum level of the output feature pyramid.
num_scales: `int` number representing intermediate scales added
on each level. For instances, num_scales=2 adds one additional
intermediate anchor scales [2^0, 2^0.5] on each level.
aspect_ratios: `list` of float numbers representing the aspect raito
anchors added on each level. The number indicates the ratio of width to
height. For instances, aspect_ratios=[1.0, 2.0, 0.5] adds three anchors
on each scale level.
anchor_size: `float` number representing the scale of size of the base
anchor to the feature stride 2^level.
rpn_match_threshold:
rpn_unmatched_threshold:
rpn_batch_size_per_im:
rpn_fg_fraction:
aug_rand_hflip: `bool`, if True, augment training with random
horizontal flip.
aug_scale_min: `float`, the minimum scale applied to `output_size` for
data augmentation during training.
aug_scale_max: `float`, the maximum scale applied to `output_size` for
data augmentation during training.
skip_crowd_during_training: `bool`, if True, skip annotations labeled with
`is_crowd` equals to 1.
max_num_instances: `int` number of maximum number of instances in an
image. The groundtruth data will be padded to `max_num_instances`.
include_mask: a bool to indicate whether parse mask groundtruth.
mask_crop_size: the size which groundtruth mask is cropped to.
use_bfloat16: `bool`, if True, cast output image to tf.bfloat16.
mode: a ModeKeys. Specifies if this is training, evaluation, prediction
or prediction with groundtruths in the outputs.
has_centerness: whether to create centerness targets
rpn_center_match_iou_threshold: iou threshold for valid centerness samples
,set to 0.3 by default.
rpn_center_unmatched_iou_threshold: iou threshold for invalid centerness
samples, set to 0.1 by default.
rpn_num_center_samples_per_im: number of centerness samples per image,
256 by default.
class_agnostic: whether to merge class ids into one foreground(=1) class,
False by default.
train_class: 'all' or 'voc' or 'nonvoc', 'all' by default.
"""
super
(
Parser
,
self
).
__init__
(
output_size
=
output_size
,
min_level
=
min_level
,
max_level
=
max_level
,
num_scales
=
num_scales
,
aspect_ratios
=
aspect_ratios
,
anchor_size
=
anchor_size
,
rpn_match_threshold
=
rpn_match_threshold
,
rpn_unmatched_threshold
=
rpn_unmatched_threshold
,
rpn_batch_size_per_im
=
rpn_batch_size_per_im
,
rpn_fg_fraction
=
rpn_fg_fraction
,
aug_rand_hflip
=
aug_rand_hflip
,
aug_scale_min
=
aug_scale_min
,
aug_scale_max
=
aug_scale_max
,
skip_crowd_during_training
=
skip_crowd_during_training
,
max_num_instances
=
max_num_instances
,
include_mask
=
include_mask
,
mask_crop_size
=
mask_crop_size
,
use_bfloat16
=
use_bfloat16
,
mode
=
mode
,)
# Centerness target assigning.
self
.
_has_centerness
=
has_centerness
self
.
_rpn_center_match_iou_threshold
=
rpn_center_match_iou_threshold
self
.
_rpn_center_unmatched_iou_threshold
=
(
rpn_center_unmatched_iou_threshold
)
self
.
_rpn_num_center_samples_per_im
=
rpn_num_center_samples_per_im
# Class manipulation.
self
.
_class_agnostic
=
class_agnostic
self
.
_train_class
=
train_class
def
_parse_train_data
(
self
,
data
):
"""Parses data for training.
Args:
data: the decoded tensor dictionary from TfExampleDecoder.
Returns:
image: image tensor that is preproessed to have normalized value and
dimension [output_size[0], output_size[1], 3]
labels: a dictionary of tensors used for training. The following describes
{key: value} pairs in the dictionary.
image_info: a 2D `Tensor` that encodes the information of the image and
the applied preprocessing. It is in the format of
[[original_height, original_width], [scaled_height, scaled_width],
anchor_boxes: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, 4] representing anchor boxes at each level.
rpn_score_targets: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, anchors_per_location]. The height_l and
width_l represent the dimension of class logits at l-th level.
rpn_box_targets: ordered dictionary with keys
[min_level, min_level+1, ..., max_level]. The values are tensor with
shape [height_l, width_l, anchors_per_location * 4]. The height_l and
width_l represent the dimension of bounding box regression output at
l-th level.
gt_boxes: Groundtruth bounding box annotations. The box is represented
in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
image that is fed to the network. The tennsor is padded with -1 to
the fixed dimension [self._max_num_instances, 4].
gt_classes: Groundtruth classes annotations. The tennsor is padded
with -1 to the fixed dimension [self._max_num_instances].
gt_masks: groundtrugh masks cropped by the bounding box and
resized to a fixed size determined by mask_crop_size.
"""
classes
=
data
[
'groundtruth_classes'
]
boxes
=
data
[
'groundtruth_boxes'
]
if
self
.
_include_mask
:
masks
=
data
[
'groundtruth_instance_masks'
]
is_crowds
=
data
[
'groundtruth_is_crowd'
]
# Skips annotations with `is_crowd` = True.
if
self
.
_skip_crowd_during_training
and
self
.
_is_training
:
num_groundtruths
=
tf
.
shape
(
classes
)[
0
]
with
tf
.
control_dependencies
([
num_groundtruths
,
is_crowds
]):
indices
=
tf
.
cond
(
tf
.
greater
(
tf
.
size
(
is_crowds
),
0
),
lambda
:
tf
.
where
(
tf
.
logical_not
(
is_crowds
))[:,
0
],
lambda
:
tf
.
cast
(
tf
.
range
(
num_groundtruths
),
tf
.
int64
))
classes
=
tf
.
gather
(
classes
,
indices
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
if
self
.
_include_mask
:
masks
=
tf
.
gather
(
masks
,
indices
)
# Gets original image and its size.
image
=
data
[
'image'
]
image_shape
=
tf
.
shape
(
image
)[
0
:
2
]
# Normalizes image with mean and std pixel values.
image
=
input_utils
.
normalize_image
(
image
)
# Flips image randomly during training.
if
self
.
_aug_rand_hflip
:
if
self
.
_include_mask
:
image
,
boxes
,
masks
=
input_utils
.
random_horizontal_flip
(
image
,
boxes
,
masks
)
else
:
image
,
boxes
=
input_utils
.
random_horizontal_flip
(
image
,
boxes
)
# Converts boxes from normalized coordinates to pixel coordinates.
# Now the coordinates of boxes are w.r.t. the original image.
boxes
=
box_utils
.
denormalize_boxes
(
boxes
,
image_shape
)
# Resizes and crops image.
image
,
image_info
=
input_utils
.
resize_and_crop_image
(
image
,
self
.
_output_size
,
padded_size
=
input_utils
.
compute_padded_size
(
self
.
_output_size
,
2
**
self
.
_max_level
),
aug_scale_min
=
self
.
_aug_scale_min
,
aug_scale_max
=
self
.
_aug_scale_max
)
image_height
,
image_width
,
_
=
image
.
get_shape
().
as_list
()
# Resizes and crops boxes.
# Now the coordinates of boxes are w.r.t the scaled image.
image_scale
=
image_info
[
2
,
:]
offset
=
image_info
[
3
,
:]
boxes
=
input_utils
.
resize_and_crop_boxes
(
boxes
,
image_scale
,
image_info
[
1
,
:],
offset
)
# Filters out ground truth boxes that are all zeros.
indices
=
box_utils
.
get_non_empty_box_indices
(
boxes
)
boxes
=
tf
.
gather
(
boxes
,
indices
)
classes
=
tf
.
gather
(
classes
,
indices
)
if
self
.
_include_mask
:
masks
=
tf
.
gather
(
masks
,
indices
)
# Transfer boxes to the original image space and do normalization.
cropped_boxes
=
boxes
+
tf
.
tile
(
tf
.
expand_dims
(
offset
,
axis
=
0
),
[
1
,
2
])
cropped_boxes
/=
tf
.
tile
(
tf
.
expand_dims
(
image_scale
,
axis
=
0
),
[
1
,
2
])
cropped_boxes
=
box_utils
.
normalize_boxes
(
cropped_boxes
,
image_shape
)
num_masks
=
tf
.
shape
(
masks
)[
0
]
masks
=
tf
.
image
.
crop_and_resize
(
tf
.
expand_dims
(
masks
,
axis
=-
1
),
cropped_boxes
,
box_indices
=
tf
.
range
(
num_masks
,
dtype
=
tf
.
int32
),
crop_size
=
[
self
.
_mask_crop_size
,
self
.
_mask_crop_size
],
method
=
'bilinear'
)
masks
=
tf
.
squeeze
(
masks
,
axis
=-
1
)
# Class manipulation.
# Filter out novel split classes from training.
if
self
.
_train_class
!=
'all'
:
valid_classes
=
tf
.
cast
(
class_utils
.
coco_split_class_ids
(
self
.
_train_class
),
dtype
=
classes
.
dtype
)
match
=
tf
.
reduce_any
(
tf
.
equal
(
tf
.
expand_dims
(
valid_classes
,
1
),
tf
.
expand_dims
(
classes
,
0
)),
0
)
# kill novel split classes and boxes.
boxes
=
tf
.
gather
(
boxes
,
tf
.
where
(
match
)[:,
0
])
classes
=
tf
.
gather
(
classes
,
tf
.
where
(
match
)[:,
0
])
if
self
.
_include_mask
:
masks
=
tf
.
gather
(
masks
,
tf
.
where
(
match
)[:,
0
])
# Assigns anchor targets.
# Note that after the target assignment, box targets are absolute pixel
# offsets w.r.t. the scaled image.
input_anchor
=
anchor
.
Anchor
(
self
.
_min_level
,
self
.
_max_level
,
self
.
_num_scales
,
self
.
_aspect_ratios
,
self
.
_anchor_size
,
(
image_height
,
image_width
))
anchor_labeler
=
anchor
.
OlnAnchorLabeler
(
input_anchor
,
self
.
_rpn_match_threshold
,
self
.
_rpn_unmatched_threshold
,
self
.
_rpn_batch_size_per_im
,
self
.
_rpn_fg_fraction
,
# for centerness target.
self
.
_has_centerness
,
self
.
_rpn_center_match_iou_threshold
,
self
.
_rpn_center_unmatched_iou_threshold
,
self
.
_rpn_num_center_samples_per_im
,)
if
self
.
_has_centerness
:
rpn_score_targets
,
_
,
rpn_lrtb_targets
,
rpn_center_targets
=
(
anchor_labeler
.
label_anchors_lrtb
(
gt_boxes
=
boxes
,
gt_labels
=
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=-
1
),
dtype
=
tf
.
float32
)))
else
:
rpn_score_targets
,
rpn_box_targets
=
anchor_labeler
.
label_anchors
(
boxes
,
tf
.
cast
(
tf
.
expand_dims
(
classes
,
axis
=-
1
),
dtype
=
tf
.
float32
))
# For base rpn, dummy placeholder for centerness target.
rpn_center_targets
=
rpn_score_targets
.
copy
()
# If bfloat16 is used, casts input image to tf.bfloat16.
if
self
.
_use_bfloat16
:
image
=
tf
.
cast
(
image
,
dtype
=
tf
.
bfloat16
)
inputs
=
{
'image'
:
image
,
'image_info'
:
image_info
,
}
# Packs labels for model_fn outputs.
labels
=
{
'anchor_boxes'
:
input_anchor
.
multilevel_boxes
,
'image_info'
:
image_info
,
'rpn_score_targets'
:
rpn_score_targets
,
'rpn_box_targets'
:
(
rpn_lrtb_targets
if
self
.
_has_centerness
else
rpn_box_targets
),
'rpn_center_targets'
:
rpn_center_targets
,
}
# If class_agnostic, convert to binary classes.
if
self
.
_class_agnostic
:
classes
=
tf
.
where
(
tf
.
greater
(
classes
,
0
),
tf
.
ones_like
(
classes
),
tf
.
zeros_like
(
classes
))
inputs
[
'gt_boxes'
]
=
input_utils
.
pad_to_fixed_size
(
boxes
,
self
.
_max_num_instances
,
-
1
)
inputs
[
'gt_classes'
]
=
input_utils
.
pad_to_fixed_size
(
classes
,
self
.
_max_num_instances
,
-
1
)
if
self
.
_include_mask
:
inputs
[
'gt_masks'
]
=
input_utils
.
pad_to_fixed_size
(
masks
,
self
.
_max_num_instances
,
-
1
)
return
inputs
,
labels
official/vision/detection/utils/box_utils.py
View file @
0ff25f6b
...
...
@@ -366,6 +366,156 @@ def decode_boxes(encoded_boxes, anchors, weights=None):
return
decoded_boxes
def
encode_boxes_lrtb
(
boxes
,
anchors
,
weights
=
None
):
"""Encode boxes to targets on lrtb (=left,right,top,bottom) format.
Args:
boxes: a tensor whose last dimension is 4 representing the coordinates
of boxes in ymin, xmin, ymax, xmax order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
encoded_boxes_lrtb: a tensor whose shape is the same as `boxes` representing
the encoded box targets. The box targets encode the left, right, top,
bottom distances from an anchor location to the four borders of the
matched groundtruth bounding box.
center_targets: centerness targets defined by the left, right, top, and
bottom distance targets. The centerness is defined as the deviation of the
anchor location from the groundtruth object center. Formally, centerness =
sqrt(min(left, right)/max(left, right)*min(top, bottom)/max(top, bottom)).
Raises:
ValueError: If the last dimension of boxes is not 4.
"""
if
boxes
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'boxes.shape[-1] is {:d}, but must be 4.'
.
format
(
boxes
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'encode_boxes_lrtb'
):
boxes
=
tf
.
cast
(
boxes
,
dtype
=
anchors
.
dtype
)
ymin
=
boxes
[...,
0
:
1
]
xmin
=
boxes
[...,
1
:
2
]
ymax
=
boxes
[...,
2
:
3
]
xmax
=
boxes
[...,
3
:
4
]
# box_h = ymax - ymin + 1.0
# box_w = xmax - xmin + 1.0
box_h
=
ymax
-
ymin
box_w
=
xmax
-
xmin
anchor_ymin
=
anchors
[...,
0
:
1
]
anchor_xmin
=
anchors
[...,
1
:
2
]
anchor_ymax
=
anchors
[...,
2
:
3
]
anchor_xmax
=
anchors
[...,
3
:
4
]
# anchor_h = anchor_ymax - anchor_ymin + 1.0
# anchor_w = anchor_xmax - anchor_xmin + 1.0
anchor_h
=
anchor_ymax
-
anchor_ymin
anchor_w
=
anchor_xmax
-
anchor_xmin
anchor_yc
=
anchor_ymin
+
0.5
*
anchor_h
anchor_xc
=
anchor_xmin
+
0.5
*
anchor_w
box_h
+=
EPSILON
box_w
+=
EPSILON
anchor_h
+=
EPSILON
anchor_w
+=
EPSILON
left
=
(
anchor_xc
-
xmin
)
/
anchor_w
right
=
(
xmax
-
anchor_xc
)
/
anchor_w
top
=
(
anchor_yc
-
ymin
)
/
anchor_h
bottom
=
(
ymax
-
anchor_yc
)
/
anchor_h
# Create centerness target. {
lrtb_targets
=
tf
.
concat
([
left
,
right
,
top
,
bottom
],
axis
=-
1
)
valid_match
=
tf
.
greater
(
tf
.
reduce_min
(
lrtb_targets
,
-
1
),
0.0
)
# Centerness score.
left_right
=
tf
.
concat
([
left
,
right
],
axis
=-
1
)
left_right
=
tf
.
where
(
tf
.
stack
([
valid_match
,
valid_match
],
-
1
),
left_right
,
tf
.
zeros_like
(
left_right
))
top_bottom
=
tf
.
concat
([
top
,
bottom
],
axis
=-
1
)
top_bottom
=
tf
.
where
(
tf
.
stack
([
valid_match
,
valid_match
],
-
1
),
top_bottom
,
tf
.
zeros_like
(
top_bottom
))
center_targets
=
tf
.
sqrt
(
(
tf
.
reduce_min
(
left_right
,
-
1
)
/
(
tf
.
reduce_max
(
left_right
,
-
1
)
+
EPSILON
))
*
(
tf
.
reduce_min
(
top_bottom
,
-
1
)
/
(
tf
.
reduce_max
(
top_bottom
,
-
1
)
+
EPSILON
)))
center_targets
=
tf
.
where
(
valid_match
,
center_targets
,
tf
.
zeros_like
(
center_targets
))
if
weights
:
left
*=
weights
[
0
]
right
*=
weights
[
1
]
top
*=
weights
[
2
]
bottom
*=
weights
[
3
]
encoded_boxes_lrtb
=
tf
.
concat
(
[
left
,
right
,
top
,
bottom
],
axis
=-
1
)
return
encoded_boxes_lrtb
,
center_targets
def
decode_boxes_lrtb
(
encoded_boxes_lrtb
,
anchors
,
weights
=
None
):
"""Decode boxes.
Args:
encoded_boxes_lrtb: a tensor whose last dimension is 4 representing the
coordinates of encoded boxes in left, right, top, bottom order.
anchors: a tensor whose shape is the same as, or `broadcastable` to `boxes`,
representing the coordinates of anchors in ymin, xmin, ymax, xmax order.
weights: None or a list of four float numbers used to scale coordinates.
Returns:
decoded_boxes_lrtb: a tensor whose shape is the same as `boxes` representing
the decoded box targets in lrtb (=left,right,top,bottom) format. The box
decoded box coordinates represent the left, right, top, and bottom
distances from an anchor location to the four borders of the matched
groundtruth bounding box.
"""
if
encoded_boxes_lrtb
.
shape
[
-
1
]
!=
4
:
raise
ValueError
(
'encoded_boxes_lrtb.shape[-1] is {:d}, but must be 4.'
.
format
(
encoded_boxes_lrtb
.
shape
[
-
1
]))
with
tf
.
name_scope
(
'decode_boxes_lrtb'
):
encoded_boxes_lrtb
=
tf
.
cast
(
encoded_boxes_lrtb
,
dtype
=
anchors
.
dtype
)
left
=
encoded_boxes_lrtb
[...,
0
:
1
]
right
=
encoded_boxes_lrtb
[...,
1
:
2
]
top
=
encoded_boxes_lrtb
[...,
2
:
3
]
bottom
=
encoded_boxes_lrtb
[...,
3
:
4
]
if
weights
:
left
/=
weights
[
0
]
right
/=
weights
[
1
]
top
/=
weights
[
2
]
bottom
/=
weights
[
3
]
anchor_ymin
=
anchors
[...,
0
:
1
]
anchor_xmin
=
anchors
[...,
1
:
2
]
anchor_ymax
=
anchors
[...,
2
:
3
]
anchor_xmax
=
anchors
[...,
3
:
4
]
anchor_h
=
anchor_ymax
-
anchor_ymin
anchor_w
=
anchor_xmax
-
anchor_xmin
anchor_yc
=
anchor_ymin
+
0.5
*
anchor_h
anchor_xc
=
anchor_xmin
+
0.5
*
anchor_w
anchor_h
+=
EPSILON
anchor_w
+=
EPSILON
decoded_boxes_ymin
=
anchor_yc
-
top
*
anchor_h
decoded_boxes_xmin
=
anchor_xc
-
left
*
anchor_w
decoded_boxes_ymax
=
anchor_yc
+
bottom
*
anchor_h
decoded_boxes_xmax
=
anchor_xc
+
right
*
anchor_w
decoded_boxes_lrtb
=
tf
.
concat
(
[
decoded_boxes_ymin
,
decoded_boxes_xmin
,
decoded_boxes_ymax
,
decoded_boxes_xmax
],
axis
=-
1
)
return
decoded_boxes_lrtb
def
filter_boxes
(
boxes
,
scores
,
image_shape
,
min_size_threshold
):
"""Filter and remove boxes that are too small or fall outside the image.
...
...
official/vision/detection/utils/object_detection/target_assigner.py
View file @
0ff25f6b
...
...
@@ -315,3 +315,209 @@ class TargetAssigner(object):
BoxCoder object.
"""
return
self
.
_box_coder
class
OlnTargetAssigner
(
TargetAssigner
):
"""Target assigner to compute classification and regression targets."""
def
__init__
(
self
,
similarity_calc
,
matcher
,
box_coder
,
negative_class_weight
=
1.0
,
unmatched_cls_target
=
None
,
center_matcher
=
None
):
"""Construct Object Detection Target Assigner.
Args:
similarity_calc: a RegionSimilarityCalculator
matcher: Matcher used to match groundtruth to anchors.
box_coder: BoxCoder used to encode matching groundtruth boxes with respect
to anchors.
negative_class_weight: classification weight to be associated to negative
anchors (default: 1.0). The weight must be in [0., 1.].
unmatched_cls_target: a float32 tensor with shape [d_1, d_2, ..., d_k]
which is consistent with the classification target for each anchor (and
can be empty for scalar targets). This shape must thus be compatible
with the groundtruth labels that are passed to the "assign" function
(which have shape [num_gt_boxes, d_1, d_2, ..., d_k]). If set to None,
unmatched_cls_target is set to be [0] for each anchor.
center_matcher: Matcher used to match groundtruth to anchors to sample and
assign the regression targets of centerness to each anchor.
Raises:
ValueError: if similarity_calc is not a RegionSimilarityCalculator or
if matcher is not a Matcher or if box_coder is not a BoxCoder
"""
super
(
OlnTargetAssigner
,
self
).
__init__
(
similarity_calc
=
similarity_calc
,
matcher
=
matcher
,
box_coder
=
box_coder
,
negative_class_weight
=
negative_class_weight
,
unmatched_cls_target
=
unmatched_cls_target
)
# centerness-matcher with independent sampling IoU threshold.
self
.
_center_matcher
=
center_matcher
def
assign
(
self
,
anchors
,
groundtruth_boxes
,
groundtruth_labels
=
None
,
groundtruth_weights
=
None
,
**
params
):
"""Assign classification and regression targets to each anchor.
For a given set of anchors and groundtruth detections, match anchors
to groundtruth_boxes and assign classification and regression targets to
each anchor as well as weights based on the resulting match (specifying,
e.g., which anchors should not contribute to training loss).
Anchors that are not matched to anything are given a classification target
of self._unmatched_cls_target which can be specified via the constructor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth boxes
groundtruth_labels: a tensor of shape [M, d_1, ... d_k] with labels for
each of the ground_truth boxes. The subshape [d_1, ... d_k] can be empty
(corresponding to scalar inputs). When set to None, groundtruth_labels
assumes a binary problem where all ground_truth boxes get a positive
label (of 1).
groundtruth_weights: a float tensor of shape [M] indicating the weight to
assign to all anchors match to a particular groundtruth box. The weights
must be in [0., 1.]. If None, all weights are set to 1.
**params: Additional keyword arguments for specific implementations of the
Matcher.
Returns:
cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
which has shape [num_gt_boxes, d_1, d_2, ... d_k].
cls_weights: a float32 tensor with shape [num_anchors]
reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
reg_weights: a float32 tensor with shape [num_anchors]
match: a matcher.Match object encoding the match between anchors and
groundtruth boxes, with rows corresponding to groundtruth boxes
and columns corresponding to anchors.
matched_gt_boxlist: a BoxList object with data of float32 tensor with
shape [num_anchors, box_dimension] which encodes the coordinates of the
matched groundtruth boxes.
matched_anchors_mask: a Bool tensor with shape [num_anchors] which
indicates whether an anchor is matched or not.
center_matched_gt_boxlist: a BoxList object with data of float32 tensor
with shape [num_anchors, box_dimension] which encodes the coordinates of
the groundtruth boxes matched for centerness target assignment.
center_matched_anchors_mask: a Boolean tensor with shape [num_anchors]
which indicates whether an anchor is matched or not for centerness
target assignment.
matched_ious: a float32 tensor with shape [num_anchors] which encodes the
ious between each anchor and the matched groundtruth boxes.
Raises:
ValueError: if anchors or groundtruth_boxes are not of type
box_list.BoxList
"""
if
not
isinstance
(
anchors
,
box_list
.
BoxList
):
raise
ValueError
(
'anchors must be an BoxList'
)
if
not
isinstance
(
groundtruth_boxes
,
box_list
.
BoxList
):
raise
ValueError
(
'groundtruth_boxes must be an BoxList'
)
if
groundtruth_labels
is
None
:
groundtruth_labels
=
tf
.
ones
(
tf
.
expand_dims
(
groundtruth_boxes
.
num_boxes
(),
0
))
groundtruth_labels
=
tf
.
expand_dims
(
groundtruth_labels
,
-
1
)
unmatched_shape_assert
=
shape_utils
.
assert_shape_equal
(
shape_utils
.
combined_static_and_dynamic_shape
(
groundtruth_labels
)[
1
:],
shape_utils
.
combined_static_and_dynamic_shape
(
self
.
_unmatched_cls_target
))
labels_and_box_shapes_assert
=
shape_utils
.
assert_shape_equal
(
shape_utils
.
combined_static_and_dynamic_shape
(
groundtruth_labels
)[:
1
],
shape_utils
.
combined_static_and_dynamic_shape
(
groundtruth_boxes
.
get
())[:
1
])
if
groundtruth_weights
is
None
:
num_gt_boxes
=
groundtruth_boxes
.
num_boxes_static
()
if
not
num_gt_boxes
:
num_gt_boxes
=
groundtruth_boxes
.
num_boxes
()
groundtruth_weights
=
tf
.
ones
([
num_gt_boxes
],
dtype
=
tf
.
float32
)
with
tf
.
control_dependencies
(
[
unmatched_shape_assert
,
labels_and_box_shapes_assert
]):
match_quality_matrix
=
self
.
_similarity_calc
(
groundtruth_boxes
.
get
(),
anchors
.
get
())
match
=
self
.
_matcher
.
match
(
match_quality_matrix
,
**
params
)
reg_targets
,
matched_gt_boxlist
,
matched_anchors_mask
=
(
self
.
_create_regression_targets
(
anchors
,
groundtruth_boxes
,
match
))
cls_targets
=
self
.
_create_classification_targets
(
groundtruth_labels
,
match
)
reg_weights
=
self
.
_create_regression_weights
(
match
,
groundtruth_weights
)
cls_weights
=
self
.
_create_classification_weights
(
match
,
groundtruth_weights
)
# Match for creation of centerness regression targets.
if
self
.
_center_matcher
is
not
None
:
center_match
=
self
.
_center_matcher
.
match
(
match_quality_matrix
,
**
params
)
center_matched_gt_boxes
=
center_match
.
gather_based_on_match
(
groundtruth_boxes
.
get
(),
unmatched_value
=
tf
.
zeros
(
4
),
ignored_value
=
tf
.
zeros
(
4
))
center_matched_gt_boxlist
=
box_list
.
BoxList
(
center_matched_gt_boxes
)
center_matched_anchors_mask
=
center_match
.
matched_column_indicator
()
num_anchors
=
anchors
.
num_boxes_static
()
if
num_anchors
is
not
None
:
reg_targets
=
self
.
_reset_target_shape
(
reg_targets
,
num_anchors
)
cls_targets
=
self
.
_reset_target_shape
(
cls_targets
,
num_anchors
)
reg_weights
=
self
.
_reset_target_shape
(
reg_weights
,
num_anchors
)
cls_weights
=
self
.
_reset_target_shape
(
cls_weights
,
num_anchors
)
if
self
.
_center_matcher
is
not
None
:
matched_ious
=
tf
.
reduce_max
(
match_quality_matrix
,
0
)
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
,
matched_gt_boxlist
,
matched_anchors_mask
,
center_matched_gt_boxlist
,
center_matched_anchors_mask
,
matched_ious
)
else
:
return
(
cls_targets
,
cls_weights
,
reg_targets
,
reg_weights
,
match
)
def
_create_regression_targets
(
self
,
anchors
,
groundtruth_boxes
,
match
):
"""Returns a regression target for each anchor.
Args:
anchors: a BoxList representing N anchors
groundtruth_boxes: a BoxList representing M groundtruth_boxes
match: a matcher.Match object
Returns:
reg_targets: a float32 tensor with shape [N, box_code_dimension]
"""
matched_gt_boxes
=
match
.
gather_based_on_match
(
groundtruth_boxes
.
get
(),
unmatched_value
=
tf
.
zeros
(
4
),
ignored_value
=
tf
.
zeros
(
4
))
matched_gt_boxlist
=
box_list
.
BoxList
(
matched_gt_boxes
)
if
groundtruth_boxes
.
has_field
(
KEYPOINTS_FIELD_NAME
):
groundtruth_keypoints
=
groundtruth_boxes
.
get_field
(
KEYPOINTS_FIELD_NAME
)
matched_keypoints
=
match
.
gather_based_on_match
(
groundtruth_keypoints
,
unmatched_value
=
tf
.
zeros
(
groundtruth_keypoints
.
get_shape
()[
1
:]),
ignored_value
=
tf
.
zeros
(
groundtruth_keypoints
.
get_shape
()[
1
:]))
matched_gt_boxlist
.
add_field
(
KEYPOINTS_FIELD_NAME
,
matched_keypoints
)
matched_reg_targets
=
self
.
_box_coder
.
encode
(
matched_gt_boxlist
,
anchors
)
match_results_shape
=
shape_utils
.
combined_static_and_dynamic_shape
(
match
.
match_results
)
# Zero out the unmatched and ignored regression targets.
unmatched_ignored_reg_targets
=
tf
.
tile
(
self
.
_default_regression_target
(),
[
match_results_shape
[
0
],
1
])
matched_anchors_mask
=
match
.
matched_column_indicator
()
# To broadcast matched_anchors_mask to the same shape as
# matched_reg_targets.
matched_anchors_mask_tiled
=
tf
.
tile
(
tf
.
expand_dims
(
matched_anchors_mask
,
1
),
[
1
,
tf
.
shape
(
matched_reg_targets
)[
1
]])
reg_targets
=
tf
.
where
(
matched_anchors_mask_tiled
,
matched_reg_targets
,
unmatched_ignored_reg_targets
)
return
reg_targets
,
matched_gt_boxlist
,
matched_anchors_mask
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment