Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
d4f37e87
Commit
d4f37e87
authored
Nov 25, 2020
by
A. Unique TensorFlower
Browse files
Internal change
PiperOrigin-RevId: 344335367
parent
51f4ecad
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
641 additions
and
0 deletions
+641
-0
official/vision/detection/configs/factory.py
official/vision/detection/configs/factory.py
+4
-0
official/vision/detection/configs/olnmask_config.py
official/vision/detection/configs/olnmask_config.py
+143
-0
official/vision/detection/ops/postprocess_ops.py
official/vision/detection/ops/postprocess_ops.py
+86
-0
official/vision/detection/ops/roi_ops.py
official/vision/detection/ops/roi_ops.py
+234
-0
official/vision/detection/ops/target_ops.py
official/vision/detection/ops/target_ops.py
+174
-0
No files found.
official/vision/detection/configs/factory.py
View file @
d4f37e87
...
...
@@ -16,6 +16,7 @@
from
official.modeling.hyperparams
import
params_dict
from
official.vision.detection.configs
import
maskrcnn_config
from
official.vision.detection.configs
import
olnmask_config
from
official.vision.detection.configs
import
retinanet_config
from
official.vision.detection.configs
import
shapemask_config
...
...
@@ -28,6 +29,9 @@ def config_generator(model):
elif
model
==
'mask_rcnn'
:
default_config
=
maskrcnn_config
.
MASKRCNN_CFG
restrictions
=
maskrcnn_config
.
MASKRCNN_RESTRICTIONS
elif
model
==
'olnmask'
:
default_config
=
olnmask_config
.
OLNMASK_CFG
restrictions
=
olnmask_config
.
OLNMASK_RESTRICTIONS
elif
model
==
'shapemask'
:
default_config
=
shapemask_config
.
SHAPEMASK_CFG
restrictions
=
shapemask_config
.
SHAPEMASK_RESTRICTIONS
...
...
official/vision/detection/configs/olnmask_config.py
0 → 100644
View file @
d4f37e87
# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Config template to train Object Localization Network (OLN)."""
from
official.modeling.hyperparams
import
params_dict
from
official.vision.detection.configs
import
base_config
# pylint: disable=line-too-long
OLNMASK_CFG
=
params_dict
.
ParamsDict
(
base_config
.
BASE_CFG
)
OLNMASK_CFG
.
override
({
'type'
:
'olnmask'
,
'eval'
:
{
'type'
:
'oln_xclass_box'
,
'use_category'
:
False
,
'seen_class'
:
'voc'
,
'num_images_to_visualize'
:
0
,
},
'architecture'
:
{
'parser'
:
'olnmask_parser'
,
'min_level'
:
2
,
'max_level'
:
6
,
'include_rpn_class'
:
False
,
'include_frcnn_class'
:
False
,
'include_frcnn_box'
:
True
,
'include_mask'
:
False
,
'mask_target_size'
:
28
,
'num_classes'
:
2
,
},
'olnmask_parser'
:
{
'output_size'
:
[
640
,
640
],
'num_channels'
:
3
,
'rpn_match_threshold'
:
0.7
,
'rpn_unmatched_threshold'
:
0.3
,
'rpn_batch_size_per_im'
:
256
,
'rpn_fg_fraction'
:
0.5
,
'aug_rand_hflip'
:
True
,
'aug_scale_min'
:
0.5
,
'aug_scale_max'
:
2.0
,
'skip_crowd_during_training'
:
True
,
'max_num_instances'
:
100
,
'mask_crop_size'
:
112
,
# centerness targets.
'has_centerness'
:
True
,
'rpn_center_match_iou_threshold'
:
0.3
,
'rpn_center_unmatched_iou_threshold'
:
0.1
,
'rpn_num_center_samples_per_im'
:
256
,
# class manipulation.
'class_agnostic'
:
True
,
'train_class'
:
'voc'
,
},
'anchor'
:
{
'num_scales'
:
1
,
'aspect_ratios'
:
[
1.0
],
'anchor_size'
:
8
,
},
'rpn_head'
:
{
'num_convs'
:
2
,
'num_filters'
:
256
,
'use_separable_conv'
:
False
,
'use_batch_norm'
:
False
,
# RPN-Centerness learning {
'has_centerness'
:
True
,
# }
},
'frcnn_head'
:
{
'num_convs'
:
0
,
'num_filters'
:
256
,
'use_separable_conv'
:
False
,
'num_fcs'
:
2
,
'fc_dims'
:
1024
,
'use_batch_norm'
:
False
,
'has_scoring'
:
True
,
},
'mrcnn_head'
:
{
'num_convs'
:
4
,
'num_filters'
:
256
,
'use_separable_conv'
:
False
,
'use_batch_norm'
:
False
,
'has_scoring'
:
False
,
},
'rpn_score_loss'
:
{
'rpn_batch_size_per_im'
:
256
,
},
'rpn_box_loss'
:
{
'huber_loss_delta'
:
1.0
/
9.0
,
},
'frcnn_box_loss'
:
{
'huber_loss_delta'
:
1.0
,
},
'frcnn_box_score_loss'
:
{
'ignore_threshold'
:
0.3
,
},
'roi_proposal'
:
{
'rpn_pre_nms_top_k'
:
2000
,
'rpn_post_nms_top_k'
:
2000
,
'rpn_nms_threshold'
:
0.7
,
'rpn_score_threshold'
:
0.0
,
'rpn_min_size_threshold'
:
0.0
,
'test_rpn_pre_nms_top_k'
:
2000
,
'test_rpn_post_nms_top_k'
:
2000
,
'test_rpn_nms_threshold'
:
0.7
,
'test_rpn_score_threshold'
:
0.0
,
'test_rpn_min_size_threshold'
:
0.0
,
'use_batched_nms'
:
False
,
},
'roi_sampling'
:
{
'num_samples_per_image'
:
512
,
'fg_fraction'
:
0.25
,
'fg_iou_thresh'
:
0.5
,
'bg_iou_thresh_hi'
:
0.5
,
'bg_iou_thresh_lo'
:
0.0
,
'mix_gt_boxes'
:
True
,
},
'mask_sampling'
:
{
'num_mask_samples_per_image'
:
128
,
# Typically = `num_samples_per_image` * `fg_fraction`.
},
'postprocess'
:
{
'use_batched_nms'
:
False
,
'max_total_size'
:
100
,
'nms_iou_threshold'
:
0.5
,
'score_threshold'
:
0.00
,
'pre_nms_num_boxes'
:
2000
,
},
},
is_strict
=
False
)
OLNMASK_RESTRICTIONS
=
[
# 'anchor.aspect_ratios == [1.0]',
# 'anchor.scales == 1',
]
# pylint: enable=line-too-long
official/vision/detection/ops/postprocess_ops.py
View file @
d4f37e87
...
...
@@ -407,3 +407,89 @@ class GenericDetectionGenerator(object):
nmsed_classes
+=
1
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
class
OlnDetectionGenerator
(
GenericDetectionGenerator
):
"""Generates the final detected boxes with scores and classes."""
def
__call__
(
self
,
box_outputs
,
class_outputs
,
anchor_boxes
,
image_shape
,
is_single_fg_score
=
False
,
keep_nms
=
True
):
"""Generate final detections for Object Localization Network (OLN).
Args:
box_outputs: a tensor of shape of [batch_size, K, num_classes * 4]
representing the class-specific box coordinates relative to anchors.
class_outputs: a tensor of shape of [batch_size, K, num_classes]
representing the class logits before applying score activiation.
anchor_boxes: a tensor of shape of [batch_size, K, 4] representing the
corresponding anchor boxes w.r.t `box_outputs`.
image_shape: a tensor of shape of [batch_size, 2] storing the image height
and width w.r.t. the scaled image, i.e. the same image space as
`box_outputs` and `anchor_boxes`.
is_single_fg_score: a Bool indicator of whether class_outputs includes the
background scores concatenated or not. By default, class_outputs is a
concatenation of both scores for the foreground and background. That is,
scores_without_bg=False.
keep_nms: a Bool indicator of whether to perform NMS or not.
Returns:
nms_boxes: `float` Tensor of shape [batch_size, max_total_size, 4]
representing top detected boxes in [y1, x1, y2, x2].
nms_scores: `float` Tensor of shape [batch_size, max_total_size]
representing sorted confidence scores for detected boxes. The values are
between [0, 1].
nms_classes: `int` Tensor of shape [batch_size, max_total_size]
representing classes for detected boxes.
valid_detections: `int` Tensor of shape [batch_size] only the top
`valid_detections` boxes are valid detections.
"""
if
is_single_fg_score
:
# Concatenates dummy background scores.
dummy_bg_scores
=
tf
.
zeros_like
(
class_outputs
)
class_outputs
=
tf
.
stack
([
dummy_bg_scores
,
class_outputs
],
-
1
)
else
:
class_outputs
=
tf
.
nn
.
softmax
(
class_outputs
,
axis
=-
1
)
# Removes the background class.
class_outputs_shape
=
tf
.
shape
(
class_outputs
)
batch_size
=
class_outputs_shape
[
0
]
num_locations
=
class_outputs_shape
[
1
]
num_classes
=
class_outputs_shape
[
-
1
]
num_detections
=
num_locations
*
(
num_classes
-
1
)
class_outputs
=
tf
.
slice
(
class_outputs
,
[
0
,
0
,
1
],
[
-
1
,
-
1
,
-
1
])
box_outputs
=
tf
.
reshape
(
box_outputs
,
tf
.
stack
([
batch_size
,
num_locations
,
num_classes
,
4
],
axis
=-
1
))
box_outputs
=
tf
.
slice
(
box_outputs
,
[
0
,
0
,
1
,
0
],
[
-
1
,
-
1
,
-
1
,
-
1
])
anchor_boxes
=
tf
.
tile
(
tf
.
expand_dims
(
anchor_boxes
,
axis
=
2
),
[
1
,
1
,
num_classes
-
1
,
1
])
box_outputs
=
tf
.
reshape
(
box_outputs
,
tf
.
stack
([
batch_size
,
num_detections
,
4
],
axis
=-
1
))
anchor_boxes
=
tf
.
reshape
(
anchor_boxes
,
tf
.
stack
([
batch_size
,
num_detections
,
4
],
axis
=-
1
))
# Box decoding. For RPN outputs, box_outputs are all zeros.
decoded_boxes
=
box_utils
.
decode_boxes
(
box_outputs
,
anchor_boxes
,
weights
=
[
10.0
,
10.0
,
5.0
,
5.0
])
# Box clipping
decoded_boxes
=
box_utils
.
clip_boxes
(
decoded_boxes
,
image_shape
)
decoded_boxes
=
tf
.
reshape
(
decoded_boxes
,
tf
.
stack
([
batch_size
,
num_locations
,
num_classes
-
1
,
4
],
axis
=-
1
))
if
keep_nms
:
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
=
(
self
.
_generate_detections
(
decoded_boxes
,
class_outputs
))
# Adds 1 to offset the background class which has index 0.
nmsed_classes
+=
1
else
:
nmsed_boxes
=
decoded_boxes
[:,
:,
0
,
:]
nmsed_scores
=
class_outputs
[:,
:,
0
]
nmsed_classes
=
tf
.
cast
(
tf
.
ones_like
(
nmsed_scores
),
tf
.
int32
)
valid_detections
=
tf
.
cast
(
tf
.
reduce_sum
(
tf
.
ones_like
(
nmsed_scores
),
axis
=-
1
),
tf
.
int32
)
return
nmsed_boxes
,
nmsed_scores
,
nmsed_classes
,
valid_detections
official/vision/detection/ops/roi_ops.py
View file @
d4f37e87
...
...
@@ -231,3 +231,237 @@ class ROIGenerator(object):
use_batched_nms
=
self
.
_use_batched_nms
,
apply_sigmoid_to_score
=
True
)
return
proposed_rois
,
proposed_roi_scores
class
OlnROIGenerator
(
ROIGenerator
):
"""Proposes RoIs for the second stage processing."""
def
__call__
(
self
,
boxes
,
scores
,
anchor_boxes
,
image_shape
,
is_training
,
is_box_lrtb
=
False
,
object_scores
=
None
):
"""Generates RoI proposals.
Args:
boxes: a dict with keys representing FPN levels and values representing
box tenors of shape [batch_size, feature_h, feature_w, num_anchors * 4].
scores: a dict with keys representing FPN levels and values representing
logit tensors of shape [batch_size, feature_h, feature_w, num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape [batch_size, feature_h,
feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension
are [height, width] of the scaled image.
is_training: a bool indicating whether it is in training or inference
mode.
is_box_lrtb: a bool indicating whether boxes are in lrtb (=left,right,top,
bottom) format.
object_scores: another objectness score (e.g., centerness). In OLN, we use
object_scores=centerness as a replacement of the scores at each level.
A dict with keys representing FPN levels and values representing logit
tensors of shape [batch_size, feature_h, feature_w, num_anchors].
Returns:
proposed_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4],
representing the box coordinates of the proposed RoIs w.r.t. the
scaled image.
proposed_roi_scores: a tensor of shape
[batch_size, rpn_post_nms_top_k, 1], representing the scores of the
proposed RoIs.
"""
proposed_rois
,
proposed_roi_scores
=
self
.
oln_multilevel_propose_rois
(
boxes
,
scores
,
anchor_boxes
,
image_shape
,
rpn_pre_nms_top_k
=
(
self
.
_rpn_pre_nms_top_k
if
is_training
else
self
.
_test_rpn_pre_nms_top_k
),
rpn_post_nms_top_k
=
(
self
.
_rpn_post_nms_top_k
if
is_training
else
self
.
_test_rpn_post_nms_top_k
),
rpn_nms_threshold
=
(
self
.
_rpn_nms_threshold
if
is_training
else
self
.
_test_rpn_nms_threshold
),
rpn_score_threshold
=
(
self
.
_rpn_score_threshold
if
is_training
else
self
.
_test_rpn_score_threshold
),
rpn_min_size_threshold
=
(
self
.
_rpn_min_size_threshold
if
is_training
else
self
.
_test_rpn_min_size_threshold
),
decode_boxes
=
True
,
clip_boxes
=
True
,
use_batched_nms
=
self
.
_use_batched_nms
,
apply_sigmoid_to_score
=
True
,
is_box_lrtb
=
is_box_lrtb
,
rpn_object_scores
=
object_scores
,)
return
proposed_rois
,
proposed_roi_scores
def
oln_multilevel_propose_rois
(
self
,
rpn_boxes
,
rpn_scores
,
anchor_boxes
,
image_shape
,
rpn_pre_nms_top_k
=
2000
,
rpn_post_nms_top_k
=
1000
,
rpn_nms_threshold
=
0.7
,
rpn_score_threshold
=
0.0
,
rpn_min_size_threshold
=
0.0
,
decode_boxes
=
True
,
clip_boxes
=
True
,
use_batched_nms
=
False
,
apply_sigmoid_to_score
=
True
,
is_box_lrtb
=
False
,
rpn_object_scores
=
None
,):
"""Proposes RoIs given a group of candidates from different FPN levels.
The following describes the steps:
1. For each individual level:
a. Adjust scores for each level if specified by rpn_object_scores.
b. Apply sigmoid transform if specified.
c. Decode boxes (either of xyhw or left-right-top-bottom format) if
specified.
d. Clip boxes if specified.
e. Filter small boxes and those fall outside image if specified.
f. Apply pre-NMS filtering including pre-NMS top k and score
thresholding.
g. Apply NMS.
2. Aggregate post-NMS boxes from each level.
3. Apply an overall top k to generate the final selected RoIs.
Args:
rpn_boxes: a dict with keys representing FPN levels and values
representing box tenors of shape [batch_size, feature_h, feature_w,
num_anchors * 4].
rpn_scores: a dict with keys representing FPN levels and values
representing logit tensors of shape [batch_size, feature_h, feature_w,
num_anchors].
anchor_boxes: a dict with keys representing FPN levels and values
representing anchor box tensors of shape [batch_size, feature_h,
feature_w, num_anchors * 4].
image_shape: a tensor of shape [batch_size, 2] where the last dimension
are [height, width] of the scaled image.
rpn_pre_nms_top_k: an integer of top scoring RPN proposals *per level* to
keep before applying NMS. Default: 2000.
rpn_post_nms_top_k: an integer of top scoring RPN proposals *in total* to
keep after applying NMS. Default: 1000.
rpn_nms_threshold: a float between 0 and 1 representing the IoU threshold
used for NMS. If 0.0, no NMS is applied. Default: 0.7.
rpn_score_threshold: a float between 0 and 1 representing the minimal box
score to keep before applying NMS. This is often used as a pre-filtering
step for better performance. If 0, no filtering is applied. Default: 0.
rpn_min_size_threshold: a float representing the minimal box size in each
side (w.r.t. the scaled image) to keep before applying NMS. This is
often used as a pre-filtering step for better performance. If 0, no
filtering is applied. Default: 0.
decode_boxes: a boolean indicating whether `rpn_boxes` needs to be decoded
using `anchor_boxes`. If False, use `rpn_boxes` directly and ignore
`anchor_boxes`. Default: True.
clip_boxes: a boolean indicating whether boxes are first clipped to the
scaled image size before appliying NMS. If False, no clipping is applied
and `image_shape` is ignored. Default: True.
use_batched_nms: a boolean indicating whether NMS is applied in batch
using `tf.image.combined_non_max_suppression`. Currently only available
in CPU/GPU. Default: False.
apply_sigmoid_to_score: a boolean indicating whether apply sigmoid to
`rpn_scores` before applying NMS. Default: True.
is_box_lrtb: a bool indicating whether boxes are in lrtb (=left,right,top,
bottom) format.
rpn_object_scores: a predicted objectness score (e.g., centerness). In
OLN, we use object_scores=centerness as a replacement of the scores at
each level. A dict with keys representing FPN levels and values
representing logit tensors of shape [batch_size, feature_h, feature_w,
num_anchors].
Returns:
selected_rois: a tensor of shape [batch_size, rpn_post_nms_top_k, 4],
representing the box coordinates of the selected proposals w.r.t. the
scaled image.
selected_roi_scores: a tensor of shape [batch_size, rpn_post_nms_top_k,
1],representing the scores of the selected proposals.
"""
with
tf
.
name_scope
(
'multilevel_propose_rois'
):
rois
=
[]
roi_scores
=
[]
image_shape
=
tf
.
expand_dims
(
image_shape
,
axis
=
1
)
for
level
in
sorted
(
rpn_scores
.
keys
()):
with
tf
.
name_scope
(
'level_%d'
%
level
):
_
,
feature_h
,
feature_w
,
num_anchors_per_location
=
(
rpn_scores
[
level
].
get_shape
().
as_list
())
num_boxes
=
feature_h
*
feature_w
*
num_anchors_per_location
this_level_scores
=
tf
.
reshape
(
rpn_scores
[
level
],
[
-
1
,
num_boxes
])
this_level_boxes
=
tf
.
reshape
(
rpn_boxes
[
level
],
[
-
1
,
num_boxes
,
4
])
this_level_anchors
=
tf
.
cast
(
tf
.
reshape
(
anchor_boxes
[
level
],
[
-
1
,
num_boxes
,
4
]),
dtype
=
this_level_scores
.
dtype
)
if
rpn_object_scores
:
this_level_object_scores
=
rpn_object_scores
[
level
]
this_level_object_scores
=
tf
.
reshape
(
this_level_object_scores
,
[
-
1
,
num_boxes
])
this_level_object_scores
=
tf
.
cast
(
this_level_object_scores
,
this_level_scores
.
dtype
)
this_level_scores
=
this_level_object_scores
if
apply_sigmoid_to_score
:
this_level_scores
=
tf
.
sigmoid
(
this_level_scores
)
if
decode_boxes
:
if
is_box_lrtb
:
# Box in left-right-top-bottom format.
this_level_boxes
=
box_utils
.
decode_boxes_lrtb
(
this_level_boxes
,
this_level_anchors
)
else
:
# Box in standard x-y-h-w format.
this_level_boxes
=
box_utils
.
decode_boxes
(
this_level_boxes
,
this_level_anchors
)
if
clip_boxes
:
this_level_boxes
=
box_utils
.
clip_boxes
(
this_level_boxes
,
image_shape
)
if
rpn_min_size_threshold
>
0.0
:
this_level_boxes
,
this_level_scores
=
box_utils
.
filter_boxes
(
this_level_boxes
,
this_level_scores
,
image_shape
,
rpn_min_size_threshold
)
this_level_pre_nms_top_k
=
min
(
num_boxes
,
rpn_pre_nms_top_k
)
this_level_post_nms_top_k
=
min
(
num_boxes
,
rpn_post_nms_top_k
)
if
rpn_nms_threshold
>
0.0
:
if
use_batched_nms
:
this_level_rois
,
this_level_roi_scores
,
_
,
_
=
(
tf
.
image
.
combined_non_max_suppression
(
tf
.
expand_dims
(
this_level_boxes
,
axis
=
2
),
tf
.
expand_dims
(
this_level_scores
,
axis
=-
1
),
max_output_size_per_class
=
this_level_pre_nms_top_k
,
max_total_size
=
this_level_post_nms_top_k
,
iou_threshold
=
rpn_nms_threshold
,
score_threshold
=
rpn_score_threshold
,
pad_per_class
=
False
,
clip_boxes
=
False
))
else
:
if
rpn_score_threshold
>
0.0
:
this_level_boxes
,
this_level_scores
=
(
box_utils
.
filter_boxes_by_scores
(
this_level_boxes
,
this_level_scores
,
rpn_score_threshold
))
this_level_boxes
,
this_level_scores
=
box_utils
.
top_k_boxes
(
this_level_boxes
,
this_level_scores
,
k
=
this_level_pre_nms_top_k
)
this_level_roi_scores
,
this_level_rois
=
(
nms
.
sorted_non_max_suppression_padded
(
this_level_scores
,
this_level_boxes
,
max_output_size
=
this_level_post_nms_top_k
,
iou_threshold
=
rpn_nms_threshold
))
else
:
this_level_rois
,
this_level_roi_scores
=
box_utils
.
top_k_boxes
(
this_level_rois
,
this_level_scores
,
k
=
this_level_post_nms_top_k
)
rois
.
append
(
this_level_rois
)
roi_scores
.
append
(
this_level_roi_scores
)
all_rois
=
tf
.
concat
(
rois
,
axis
=
1
)
all_roi_scores
=
tf
.
concat
(
roi_scores
,
axis
=
1
)
with
tf
.
name_scope
(
'top_k_rois'
):
_
,
num_valid_rois
=
all_roi_scores
.
get_shape
().
as_list
()
overall_top_k
=
min
(
num_valid_rois
,
rpn_post_nms_top_k
)
selected_rois
,
selected_roi_scores
=
box_utils
.
top_k_boxes
(
all_rois
,
all_roi_scores
,
k
=
overall_top_k
)
return
selected_rois
,
selected_roi_scores
official/vision/detection/ops/target_ops.py
View file @
d4f37e87
...
...
@@ -342,6 +342,180 @@ class ROISampler(object):
sampled_gt_indices
)
class
ROIScoreSampler
(
ROISampler
):
"""Samples RoIs, RoI-scores and creates training targets."""
def
__call__
(
self
,
rois
,
roi_scores
,
gt_boxes
,
gt_classes
):
"""Sample and assign RoIs for training.
Args:
rois: a tensor of shape of [batch_size, N, 4]. N is the number of
proposals before groundtruth assignment. The last dimension is the box
coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax] format.
roi_scores:
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the
invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid
classes.
Returns:
sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
coordinates of the sampled RoIs, where K is the number of the sampled
RoIs, i.e. K = num_samples_per_image.
sampled_roi_scores:
sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
box coordinates of the matched groundtruth boxes of the samples RoIs.
sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
classes of the matched groundtruth boxes of the sampled RoIs.
"""
(
sampled_rois
,
sampled_roi_scores
,
sampled_gt_boxes
,
sampled_gt_classes
,
sampled_gt_indices
)
=
(
self
.
assign_and_sample_proposals_and_scores
(
rois
,
roi_scores
,
gt_boxes
,
gt_classes
,
num_samples_per_image
=
self
.
_num_samples_per_image
,
mix_gt_boxes
=
self
.
_mix_gt_boxes
,
fg_fraction
=
self
.
_fg_fraction
,
fg_iou_thresh
=
self
.
_fg_iou_thresh
,
bg_iou_thresh_hi
=
self
.
_bg_iou_thresh_hi
,
bg_iou_thresh_lo
=
self
.
_bg_iou_thresh_lo
))
return
(
sampled_rois
,
sampled_roi_scores
,
sampled_gt_boxes
,
sampled_gt_classes
,
sampled_gt_indices
)
def
assign_and_sample_proposals_and_scores
(
self
,
proposed_boxes
,
proposed_scores
,
gt_boxes
,
gt_classes
,
num_samples_per_image
=
512
,
mix_gt_boxes
=
True
,
fg_fraction
=
0.25
,
fg_iou_thresh
=
0.5
,
bg_iou_thresh_hi
=
0.5
,
bg_iou_thresh_lo
=
0.0
):
"""Assigns the proposals with groundtruth classes and performs subsmpling.
Given `proposed_boxes`, `gt_boxes`, and `gt_classes`, the function uses the
following algorithm to generate the final `num_samples_per_image` RoIs.
1. Calculates the IoU between each proposal box and each gt_boxes.
2. Assigns each proposed box with a groundtruth class and box by choosing
the largest IoU overlap.
3. Samples `num_samples_per_image` boxes from all proposed boxes, and
returns box_targets, class_targets, and RoIs.
Args:
proposed_boxes: a tensor of shape of [batch_size, N, 4]. N is the number
of proposals before groundtruth assignment. The last dimension is the
box coordinates w.r.t. the scaled images in [ymin, xmin, ymax, xmax]
format.
proposed_scores: a tensor of shape of [batch_size, N]. N is the number of
proposals before groundtruth assignment. It is the rpn scores for all
proposed boxes which can be either their classification or centerness
scores.
gt_boxes: a tensor of shape of [batch_size, MAX_NUM_INSTANCES, 4]. The
coordinates of gt_boxes are in the pixel coordinates of the scaled
image. This tensor might have padding of values -1 indicating the
invalid box coordinates.
gt_classes: a tensor with a shape of [batch_size, MAX_NUM_INSTANCES]. This
tensor might have paddings with values of -1 indicating the invalid
classes.
num_samples_per_image: a integer represents RoI minibatch size per image.
mix_gt_boxes: a bool indicating whether to mix the groundtruth boxes
before sampling proposals.
fg_fraction: a float represents the target fraction of RoI minibatch that
is labeled foreground (i.e., class > 0).
fg_iou_thresh: a float represents the IoU overlap threshold for an RoI to
be considered foreground (if >= fg_iou_thresh).
bg_iou_thresh_hi: a float represents the IoU overlap threshold for an RoI
to be considered background (class = 0 if overlap in [LO, HI)).
bg_iou_thresh_lo: a float represents the IoU overlap threshold for an RoI
to be considered background (class = 0 if overlap in [LO, HI)).
Returns:
sampled_rois: a tensor of shape of [batch_size, K, 4], representing the
coordinates of the sampled RoIs, where K is the number of the sampled
RoIs, i.e. K = num_samples_per_image.
sampled_scores: a tensor of shape of [batch_size, K], representing the
confidence score of the sampled RoIs, where K is the number of the
sampled RoIs, i.e. K = num_samples_per_image.
sampled_gt_boxes: a tensor of shape of [batch_size, K, 4], storing the
box coordinates of the matched groundtruth boxes of the samples RoIs.
sampled_gt_classes: a tensor of shape of [batch_size, K], storing the
classes of the matched groundtruth boxes of the sampled RoIs.
sampled_gt_indices: a tensor of shape of [batch_size, K], storing the
indices of the sampled groudntruth boxes in the original `gt_boxes`
tensor, i.e. gt_boxes[sampled_gt_indices[:, i]] =
sampled_gt_boxes[:, i].
"""
with
tf
.
name_scope
(
'sample_proposals_and_scores'
):
if
mix_gt_boxes
:
boxes
=
tf
.
concat
([
proposed_boxes
,
gt_boxes
],
axis
=
1
)
gt_scores
=
tf
.
ones_like
(
gt_boxes
[:,
:,
0
])
scores
=
tf
.
concat
([
proposed_scores
,
gt_scores
],
axis
=
1
)
else
:
boxes
=
proposed_boxes
scores
=
proposed_scores
(
matched_gt_boxes
,
matched_gt_classes
,
matched_gt_indices
,
matched_iou
,
_
)
=
box_matching
(
boxes
,
gt_boxes
,
gt_classes
)
positive_match
=
tf
.
greater
(
matched_iou
,
fg_iou_thresh
)
negative_match
=
tf
.
logical_and
(
tf
.
greater_equal
(
matched_iou
,
bg_iou_thresh_lo
),
tf
.
less
(
matched_iou
,
bg_iou_thresh_hi
))
ignored_match
=
tf
.
less
(
matched_iou
,
0.0
)
# re-assign negatively matched boxes to the background class.
matched_gt_classes
=
tf
.
where
(
negative_match
,
tf
.
zeros_like
(
matched_gt_classes
),
matched_gt_classes
)
matched_gt_indices
=
tf
.
where
(
negative_match
,
tf
.
zeros_like
(
matched_gt_indices
),
matched_gt_indices
)
sample_candidates
=
tf
.
logical_and
(
tf
.
logical_or
(
positive_match
,
negative_match
),
tf
.
logical_not
(
ignored_match
))
sampler
=
(
balanced_positive_negative_sampler
.
BalancedPositiveNegativeSampler
(
positive_fraction
=
fg_fraction
,
is_static
=
True
))
batch_size
,
_
=
sample_candidates
.
get_shape
().
as_list
()
sampled_indicators
=
[]
for
i
in
range
(
batch_size
):
sampled_indicator
=
sampler
.
subsample
(
sample_candidates
[
i
],
num_samples_per_image
,
positive_match
[
i
])
sampled_indicators
.
append
(
sampled_indicator
)
sampled_indicators
=
tf
.
stack
(
sampled_indicators
)
_
,
sampled_indices
=
tf
.
nn
.
top_k
(
tf
.
cast
(
sampled_indicators
,
dtype
=
tf
.
int32
),
k
=
num_samples_per_image
,
sorted
=
True
)
sampled_indices_shape
=
tf
.
shape
(
sampled_indices
)
batch_indices
=
(
tf
.
expand_dims
(
tf
.
range
(
sampled_indices_shape
[
0
]),
axis
=-
1
)
*
tf
.
ones
([
1
,
sampled_indices_shape
[
-
1
]],
dtype
=
tf
.
int32
))
gather_nd_indices
=
tf
.
stack
([
batch_indices
,
sampled_indices
],
axis
=-
1
)
sampled_rois
=
tf
.
gather_nd
(
boxes
,
gather_nd_indices
)
sampled_roi_scores
=
tf
.
gather_nd
(
scores
,
gather_nd_indices
)
sampled_gt_boxes
=
tf
.
gather_nd
(
matched_gt_boxes
,
gather_nd_indices
)
sampled_gt_classes
=
tf
.
gather_nd
(
matched_gt_classes
,
gather_nd_indices
)
sampled_gt_indices
=
tf
.
gather_nd
(
matched_gt_indices
,
gather_nd_indices
)
return
(
sampled_rois
,
sampled_roi_scores
,
sampled_gt_boxes
,
sampled_gt_classes
,
sampled_gt_indices
)
class
MaskSampler
(
object
):
"""Samples and creates mask training targets."""
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment