Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
dcuai
dlexamples
Commits
142dcf29
Commit
142dcf29
authored
Apr 15, 2022
by
hepj
Browse files
增加conformer代码
parent
7f99c1c3
Changes
317
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
6422 additions
and
0 deletions
+6422
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/dense_test_mixins.py
...mmdetection/mmdet/models/dense_heads/dense_test_mixins.py
+97
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/embedding_rpn_head.py
...mdetection/mmdet/models/dense_heads/embedding_rpn_head.py
+120
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/fcos_head.py
...er-main/mmdetection/mmdet/models/dense_heads/fcos_head.py
+577
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/fovea_head.py
...r-main/mmdetection/mmdet/models/dense_heads/fovea_head.py
+341
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/free_anchor_retina_head.py
...ction/mmdet/models/dense_heads/free_anchor_retina_head.py
+270
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/fsaf_head.py
...er-main/mmdetection/mmdet/models/dense_heads/fsaf_head.py
+422
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/ga_retina_head.py
...in/mmdetection/mmdet/models/dense_heads/ga_retina_head.py
+109
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/ga_rpn_head.py
...-main/mmdetection/mmdet/models/dense_heads/ga_rpn_head.py
+133
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/gfl_head.py
...mer-main/mmdetection/mmdet/models/dense_heads/gfl_head.py
+632
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/guided_anchor_head.py
...mdetection/mmdet/models/dense_heads/guided_anchor_head.py
+860
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/nasfcos_head.py
...main/mmdetection/mmdet/models/dense_heads/nasfcos_head.py
+75
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/paa_head.py
...mer-main/mmdetection/mmdet/models/dense_heads/paa_head.py
+655
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/pisa_retinanet_head.py
...detection/mmdet/models/dense_heads/pisa_retinanet_head.py
+154
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/pisa_ssd_head.py
...ain/mmdetection/mmdet/models/dense_heads/pisa_ssd_head.py
+139
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/reppoints_head.py
...in/mmdetection/mmdet/models/dense_heads/reppoints_head.py
+763
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/retina_head.py
...-main/mmdetection/mmdet/models/dense_heads/retina_head.py
+114
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/retina_sepbn_head.py
...mmdetection/mmdet/models/dense_heads/retina_sepbn_head.py
+113
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/rpn_head.py
...mer-main/mmdetection/mmdet/models/dense_heads/rpn_head.py
+168
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/rpn_test_mixin.py
...in/mmdetection/mmdet/models/dense_heads/rpn_test_mixin.py
+59
-0
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/sabl_retina_head.py
.../mmdetection/mmdet/models/dense_heads/sabl_retina_head.py
+621
-0
No files found.
Too many changes to show.
To preserve performance only
317 of 317+
files are displayed.
Plain diff
Email patch
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/dense_test_mixins.py
0 → 100644
View file @
142dcf29
from
inspect
import
signature
import
torch
from
mmdet.core
import
bbox2result
,
bbox_mapping_back
,
multiclass_nms
class
BBoxTestMixin
(
object
):
"""Mixin class for test time augmentation of bboxes."""
def
merge_aug_bboxes
(
self
,
aug_bboxes
,
aug_scores
,
img_metas
):
"""Merge augmented detection bboxes and scores.
Args:
aug_bboxes (list[Tensor]): shape (n, 4*#class)
aug_scores (list[Tensor] or None): shape (n, #class)
img_shapes (list[Tensor]): shape (3, ).
Returns:
tuple: (bboxes, scores)
"""
recovered_bboxes
=
[]
for
bboxes
,
img_info
in
zip
(
aug_bboxes
,
img_metas
):
img_shape
=
img_info
[
0
][
'img_shape'
]
scale_factor
=
img_info
[
0
][
'scale_factor'
]
flip
=
img_info
[
0
][
'flip'
]
flip_direction
=
img_info
[
0
][
'flip_direction'
]
bboxes
=
bbox_mapping_back
(
bboxes
,
img_shape
,
scale_factor
,
flip
,
flip_direction
)
recovered_bboxes
.
append
(
bboxes
)
bboxes
=
torch
.
cat
(
recovered_bboxes
,
dim
=
0
)
if
aug_scores
is
None
:
return
bboxes
else
:
scores
=
torch
.
cat
(
aug_scores
,
dim
=
0
)
return
bboxes
,
scores
def
aug_test_bboxes
(
self
,
feats
,
img_metas
,
rescale
=
False
):
"""Test det bboxes with test time augmentation.
Args:
feats (list[Tensor]): the outer list indicates test-time
augmentations and inner Tensor should have a shape NxCxHxW,
which contains features for all images in the batch.
img_metas (list[list[dict]]): the outer list indicates test-time
augs (multiscale, flip, etc.) and the inner list indicates
images in a batch. each dict has image information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[ndarray]: bbox results of each class
"""
# check with_nms argument
gb_sig
=
signature
(
self
.
get_bboxes
)
gb_args
=
[
p
.
name
for
p
in
gb_sig
.
parameters
.
values
()]
gbs_sig
=
signature
(
self
.
_get_bboxes_single
)
gbs_args
=
[
p
.
name
for
p
in
gbs_sig
.
parameters
.
values
()]
assert
(
'with_nms'
in
gb_args
)
and
(
'with_nms'
in
gbs_args
),
\
f
'
{
self
.
__class__
.
__name__
}
'
\
' does not support test-time augmentation'
aug_bboxes
=
[]
aug_scores
=
[]
aug_factors
=
[]
# score_factors for NMS
for
x
,
img_meta
in
zip
(
feats
,
img_metas
):
# only one image in the batch
outs
=
self
.
forward
(
x
)
bbox_inputs
=
outs
+
(
img_meta
,
self
.
test_cfg
,
False
,
False
)
bbox_outputs
=
self
.
get_bboxes
(
*
bbox_inputs
)[
0
]
aug_bboxes
.
append
(
bbox_outputs
[
0
])
aug_scores
.
append
(
bbox_outputs
[
1
])
# bbox_outputs of some detectors (e.g., ATSS, FCOS, YOLOv3)
# contains additional element to adjust scores before NMS
if
len
(
bbox_outputs
)
>=
3
:
aug_factors
.
append
(
bbox_outputs
[
2
])
# after merging, bboxes will be rescaled to the original image size
merged_bboxes
,
merged_scores
=
self
.
merge_aug_bboxes
(
aug_bboxes
,
aug_scores
,
img_metas
)
merged_factors
=
torch
.
cat
(
aug_factors
,
dim
=
0
)
if
aug_factors
else
None
det_bboxes
,
det_labels
=
multiclass_nms
(
merged_bboxes
,
merged_scores
,
self
.
test_cfg
.
score_thr
,
self
.
test_cfg
.
nms
,
self
.
test_cfg
.
max_per_img
,
score_factors
=
merged_factors
)
if
rescale
:
_det_bboxes
=
det_bboxes
else
:
_det_bboxes
=
det_bboxes
.
clone
()
_det_bboxes
[:,
:
4
]
*=
det_bboxes
.
new_tensor
(
img_metas
[
0
][
0
][
'scale_factor'
])
bbox_results
=
bbox2result
(
_det_bboxes
,
det_labels
,
self
.
num_classes
)
return
bbox_results
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/embedding_rpn_head.py
0 → 100644
View file @
142dcf29
import
mmcv
import
torch
import
torch.nn
as
nn
from
mmcv
import
tensor2imgs
from
mmdet.models.builder
import
HEADS
from
...core
import
bbox_cxcywh_to_xyxy
@
HEADS
.
register_module
()
class
EmbeddingRPNHead
(
nn
.
Module
):
"""RPNHead in the `Sparse R-CNN <https://arxiv.org/abs/2011.12450>`_ .
Unlike traditional RPNHead, this module does not need FPN input, but just
decode `init_proposal_bboxes` and expand the first dimension of
`init_proposal_bboxes` and `init_proposal_features` to the batch_size.
Args:
num_proposals (int): Number of init_proposals. Default 100.
proposal_feature_channel (int): Channel number of
init_proposal_feature. Defaults to 256.
"""
def
__init__
(
self
,
num_proposals
=
100
,
proposal_feature_channel
=
256
,
**
kwargs
):
super
(
EmbeddingRPNHead
,
self
).
__init__
()
self
.
num_proposals
=
num_proposals
self
.
proposal_feature_channel
=
proposal_feature_channel
self
.
_init_layers
()
def
_init_layers
(
self
):
"""Initialize a sparse set of proposal boxes and proposal features."""
self
.
init_proposal_bboxes
=
nn
.
Embedding
(
self
.
num_proposals
,
4
)
self
.
init_proposal_features
=
nn
.
Embedding
(
self
.
num_proposals
,
self
.
proposal_feature_channel
)
def
init_weights
(
self
):
"""Initialize the init_proposal_bboxes as normalized.
[c_x, c_y, w, h], and we initialize it to the size of the entire
image.
"""
nn
.
init
.
constant_
(
self
.
init_proposal_bboxes
.
weight
[:,
:
2
],
0.5
)
nn
.
init
.
constant_
(
self
.
init_proposal_bboxes
.
weight
[:,
2
:],
1
)
def
_decode_init_proposals
(
self
,
imgs
,
img_metas
):
"""Decode init_proposal_bboxes according to the size of images and
expand dimension of init_proposal_features to batch_size.
Args:
imgs (list[Tensor]): List of FPN features.
img_metas (list[dict]): List of meta-information of
images. Need the img_shape to decode the init_proposals.
Returns:
Tuple(Tensor):
- proposals (Tensor): Decoded proposal bboxes,
has shape (batch_size, num_proposals, 4).
- init_proposal_features (Tensor): Expanded proposal
features, has shape
(batch_size, num_proposals, proposal_feature_channel).
- imgs_whwh (Tensor): Tensor with shape
(batch_size, 4), the dimension means
[img_width, img_height, img_width, img_height].
"""
proposals
=
self
.
init_proposal_bboxes
.
weight
.
clone
()
proposals
=
bbox_cxcywh_to_xyxy
(
proposals
)
num_imgs
=
len
(
imgs
[
0
])
imgs_whwh
=
[]
for
meta
in
img_metas
:
h
,
w
,
_
=
meta
[
'img_shape'
]
imgs_whwh
.
append
(
imgs
[
0
].
new_tensor
([[
w
,
h
,
w
,
h
]]))
imgs_whwh
=
torch
.
cat
(
imgs_whwh
,
dim
=
0
)
imgs_whwh
=
imgs_whwh
[:,
None
,
:]
# imgs_whwh has shape (batch_size, 1, 4)
# The shape of proposals change from (num_proposals, 4)
# to (batch_size ,num_proposals, 4)
proposals
=
proposals
*
imgs_whwh
init_proposal_features
=
self
.
init_proposal_features
.
weight
.
clone
()
init_proposal_features
=
init_proposal_features
[
None
].
expand
(
num_imgs
,
*
init_proposal_features
.
size
())
return
proposals
,
init_proposal_features
,
imgs_whwh
def
forward_dummy
(
self
,
img
,
img_metas
):
"""Dummy forward function.
Used in flops calculation.
"""
return
self
.
_decode_init_proposals
(
img
,
img_metas
)
def
forward_train
(
self
,
img
,
img_metas
):
"""Forward function in training stage."""
return
self
.
_decode_init_proposals
(
img
,
img_metas
)
def
simple_test_rpn
(
self
,
img
,
img_metas
):
"""Forward function in testing stage."""
return
self
.
_decode_init_proposals
(
img
,
img_metas
)
def
show_result
(
self
,
data
):
"""Show the init proposals in EmbeddingRPN.
Args:
data (dict): Dict contains image and
corresponding meta information.
"""
img_tensor
=
data
[
'img'
][
0
]
img_metas
=
data
[
'img_metas'
][
0
].
data
[
0
]
imgs
=
tensor2imgs
(
img_tensor
,
**
img_metas
[
0
][
'img_norm_cfg'
])
proposals
,
_
=
self
.
_decode_init_proposals
(
data
[
'img'
],
data
[
'img_metas'
])
assert
len
(
imgs
)
==
len
(
img_metas
)
for
img
,
img_meta
in
zip
(
imgs
,
img_metas
):
h
,
w
,
_
=
img_meta
[
'img_shape'
]
img_show
=
img
[:
h
,
:
w
,
:]
mmcv
.
imshow_bboxes
(
img_show
,
proposals
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/fcos_head.py
0 → 100644
View file @
142dcf29
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
Scale
,
normal_init
from
mmcv.runner
import
force_fp32
from
mmdet.core
import
distance2bbox
,
multi_apply
,
multiclass_nms
,
reduce_mean
from
..builder
import
HEADS
,
build_loss
from
.anchor_free_head
import
AnchorFreeHead
INF
=
1e8
@
HEADS
.
register_module
()
class
FCOSHead
(
AnchorFreeHead
):
"""Anchor-free head used in `FCOS <https://arxiv.org/abs/1904.01355>`_.
The FCOS head does not use anchor boxes. Instead bounding boxes are
predicted at each pixel and a centerness measure is used to supress
low-quality predictions.
Here norm_on_bbox, centerness_on_reg, dcn_on_last_conv are training
tricks used in official repo, which will bring remarkable mAP gains
of up to 4.9. Please see https://github.com/tianzhi0549/FCOS for
more detail.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
strides (list[int] | list[tuple[int, int]]): Strides of points
in multiple feature levels. Default: (4, 8, 16, 32, 64).
regress_ranges (tuple[tuple[int, int]]): Regress range of multiple
level points.
center_sampling (bool): If true, use center sampling. Default: False.
center_sample_radius (float): Radius of center sampling. Default: 1.5.
norm_on_bbox (bool): If true, normalize the regression targets
with FPN strides. Default: False.
centerness_on_reg (bool): If true, position centerness on the
regress branch. Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042.
Default: False.
conv_bias (bool | str): If specified as `auto`, it will be decided by the
norm_cfg. Bias of conv will be set as True if `norm_cfg` is None, otherwise
False. Default: "auto".
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
loss_centerness (dict): Config of centerness loss.
norm_cfg (dict): dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).
Example:
>>> self = FCOSHead(11, 7)
>>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]
>>> cls_score, bbox_pred, centerness = self.forward(feats)
>>> assert len(cls_score) == len(self.scales)
"""
# noqa: E501
def
__init__
(
self
,
num_classes
,
in_channels
,
regress_ranges
=
((
-
1
,
64
),
(
64
,
128
),
(
128
,
256
),
(
256
,
512
),
(
512
,
INF
)),
center_sampling
=
False
,
center_sample_radius
=
1.5
,
norm_on_bbox
=
False
,
centerness_on_reg
=
False
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'IoULoss'
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
),
**
kwargs
):
self
.
regress_ranges
=
regress_ranges
self
.
center_sampling
=
center_sampling
self
.
center_sample_radius
=
center_sample_radius
self
.
norm_on_bbox
=
norm_on_bbox
self
.
centerness_on_reg
=
centerness_on_reg
super
().
__init__
(
num_classes
,
in_channels
,
loss_cls
=
loss_cls
,
loss_bbox
=
loss_bbox
,
norm_cfg
=
norm_cfg
,
**
kwargs
)
self
.
loss_centerness
=
build_loss
(
loss_centerness
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
super
().
_init_layers
()
self
.
conv_centerness
=
nn
.
Conv2d
(
self
.
feat_channels
,
1
,
3
,
padding
=
1
)
self
.
scales
=
nn
.
ModuleList
([
Scale
(
1.0
)
for
_
in
self
.
strides
])
def
init_weights
(
self
):
"""Initialize weights of the head."""
super
().
init_weights
()
normal_init
(
self
.
conv_centerness
,
std
=
0.01
)
def
forward
(
self
,
feats
):
"""Forward features from the upstream network.
Args:
feats (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
tuple:
cls_scores (list[Tensor]): Box scores for each scale level,
\
each is a 4D-tensor, the channel number is
\
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each
\
scale level, each is a 4D-tensor, the channel number is
\
num_points * 4.
centernesses (list[Tensor]): Centerss for each scale level,
\
each is a 4D-tensor, the channel number is num_points * 1.
"""
return
multi_apply
(
self
.
forward_single
,
feats
,
self
.
scales
,
self
.
strides
)
def
forward_single
(
self
,
x
,
scale
,
stride
):
"""Forward features of a single scale levle.
Args:
x (Tensor): FPN feature maps of the specified stride.
scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize
the bbox prediction.
stride (int): The corresponding stride for feature maps, only
used to normalize the bbox prediction when self.norm_on_bbox
is True.
Returns:
tuple: scores for each class, bbox predictions and centerness
\
predictions of input feature maps.
"""
cls_score
,
bbox_pred
,
cls_feat
,
reg_feat
=
super
().
forward_single
(
x
)
if
self
.
centerness_on_reg
:
centerness
=
self
.
conv_centerness
(
reg_feat
)
else
:
centerness
=
self
.
conv_centerness
(
cls_feat
)
# scale the bbox_pred of different level
# float to avoid overflow when enabling FP16
bbox_pred
=
scale
(
bbox_pred
).
float
()
if
self
.
norm_on_bbox
:
bbox_pred
=
F
.
relu
(
bbox_pred
)
if
not
self
.
training
:
bbox_pred
*=
stride
else
:
bbox_pred
=
bbox_pred
.
exp
()
return
cls_score
,
bbox_pred
,
centerness
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'centernesses'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
centernesses
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute loss of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_points * 4.
centernesses (list[Tensor]): Centerss for each scale level, each
is a 4D-tensor, the channel number is num_points * 1.
gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
centernesses
)
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
all_level_points
=
self
.
get_points
(
featmap_sizes
,
bbox_preds
[
0
].
dtype
,
bbox_preds
[
0
].
device
)
labels
,
bbox_targets
=
self
.
get_targets
(
all_level_points
,
gt_bboxes
,
gt_labels
)
num_imgs
=
cls_scores
[
0
].
size
(
0
)
# flatten cls_scores, bbox_preds and centerness
flatten_cls_scores
=
[
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
cls_out_channels
)
for
cls_score
in
cls_scores
]
flatten_bbox_preds
=
[
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
4
)
for
bbox_pred
in
bbox_preds
]
flatten_centerness
=
[
centerness
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
)
for
centerness
in
centernesses
]
flatten_cls_scores
=
torch
.
cat
(
flatten_cls_scores
)
flatten_bbox_preds
=
torch
.
cat
(
flatten_bbox_preds
)
flatten_centerness
=
torch
.
cat
(
flatten_centerness
)
flatten_labels
=
torch
.
cat
(
labels
)
flatten_bbox_targets
=
torch
.
cat
(
bbox_targets
)
# repeat points to align with bbox_preds
flatten_points
=
torch
.
cat
(
[
points
.
repeat
(
num_imgs
,
1
)
for
points
in
all_level_points
])
# FG cat_id: [0, num_classes -1], BG cat_id: num_classes
bg_class_ind
=
self
.
num_classes
pos_inds
=
((
flatten_labels
>=
0
)
&
(
flatten_labels
<
bg_class_ind
)).
nonzero
().
reshape
(
-
1
)
num_pos
=
torch
.
tensor
(
len
(
pos_inds
),
dtype
=
torch
.
float
,
device
=
bbox_preds
[
0
].
device
)
num_pos
=
max
(
reduce_mean
(
num_pos
),
1.0
)
loss_cls
=
self
.
loss_cls
(
flatten_cls_scores
,
flatten_labels
,
avg_factor
=
num_pos
)
pos_bbox_preds
=
flatten_bbox_preds
[
pos_inds
]
pos_centerness
=
flatten_centerness
[
pos_inds
]
if
len
(
pos_inds
)
>
0
:
pos_bbox_targets
=
flatten_bbox_targets
[
pos_inds
]
pos_centerness_targets
=
self
.
centerness_target
(
pos_bbox_targets
)
pos_points
=
flatten_points
[
pos_inds
]
pos_decoded_bbox_preds
=
distance2bbox
(
pos_points
,
pos_bbox_preds
)
pos_decoded_target_preds
=
distance2bbox
(
pos_points
,
pos_bbox_targets
)
# centerness weighted iou loss
centerness_denorm
=
max
(
reduce_mean
(
pos_centerness_targets
.
sum
().
detach
()),
1e-6
)
loss_bbox
=
self
.
loss_bbox
(
pos_decoded_bbox_preds
,
pos_decoded_target_preds
,
weight
=
pos_centerness_targets
,
avg_factor
=
centerness_denorm
)
loss_centerness
=
self
.
loss_centerness
(
pos_centerness
,
pos_centerness_targets
,
avg_factor
=
num_pos
)
else
:
loss_bbox
=
pos_bbox_preds
.
sum
()
loss_centerness
=
pos_centerness
.
sum
()
return
dict
(
loss_cls
=
loss_cls
,
loss_bbox
=
loss_bbox
,
loss_centerness
=
loss_centerness
)
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'centernesses'
))
def
get_bboxes
(
self
,
cls_scores
,
bbox_preds
,
centernesses
,
img_metas
,
cfg
=
None
,
rescale
=
False
,
with_nms
=
True
):
"""Transform network output for a batch into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
with shape (N, num_points * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_points * 4, H, W).
centernesses (list[Tensor]): Centerness for each scale level with
shape (N, num_points * 1, H, W).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
cfg (mmcv.Config | None): Test / postprocessing configuration,
if None, test_cfg would be used. Default: None.
rescale (bool): If True, return boxes in original image space.
Default: False.
with_nms (bool): If True, do nms before return boxes.
Default: True.
Returns:
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class label of the
corresponding box.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
num_levels
=
len
(
cls_scores
)
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
mlvl_points
=
self
.
get_points
(
featmap_sizes
,
bbox_preds
[
0
].
dtype
,
bbox_preds
[
0
].
device
)
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
cls_score_list
=
[
cls_scores
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
bbox_pred_list
=
[
bbox_preds
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
centerness_pred_list
=
[
centernesses
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
img_shape
=
img_metas
[
img_id
][
'img_shape'
]
scale_factor
=
img_metas
[
img_id
][
'scale_factor'
]
det_bboxes
=
self
.
_get_bboxes_single
(
cls_score_list
,
bbox_pred_list
,
centerness_pred_list
,
mlvl_points
,
img_shape
,
scale_factor
,
cfg
,
rescale
,
with_nms
)
result_list
.
append
(
det_bboxes
)
return
result_list
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
centernesses
,
mlvl_points
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
,
with_nms
=
True
):
"""Transform outputs for a single batch item into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for a single scale level
with shape (num_points * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for a single scale
level with shape (num_points * 4, H, W).
centernesses (list[Tensor]): Centerness for a single scale level
with shape (num_points * 4, H, W).
mlvl_points (list[Tensor]): Box reference for a single scale level
with shape (num_total_points, 4).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arrange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config | None): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Default: False.
with_nms (bool): If True, do nms before return boxes.
Default: True.
Returns:
tuple(Tensor):
det_bboxes (Tensor): BBox predictions in shape (n, 5), where
the first 4 columns are bounding box positions
(tl_x, tl_y, br_x, br_y) and the 5-th column is a score
between 0 and 1.
det_labels (Tensor): A (n,) tensor where each item is the
predicted class label of the corresponding box.
"""
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
mlvl_points
)
mlvl_bboxes
=
[]
mlvl_scores
=
[]
mlvl_centerness
=
[]
for
cls_score
,
bbox_pred
,
centerness
,
points
in
zip
(
cls_scores
,
bbox_preds
,
centernesses
,
mlvl_points
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
scores
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
).
sigmoid
()
centerness
=
centerness
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
).
sigmoid
()
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
max_scores
,
_
=
(
scores
*
centerness
[:,
None
]).
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
points
=
points
[
topk_inds
,
:]
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
centerness
=
centerness
[
topk_inds
]
bboxes
=
distance2bbox
(
points
,
bbox_pred
,
max_shape
=
img_shape
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_centerness
.
append
(
centerness
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
if
rescale
:
mlvl_bboxes
/=
mlvl_bboxes
.
new_tensor
(
scale_factor
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
mlvl_scores
=
torch
.
cat
([
mlvl_scores
,
padding
],
dim
=
1
)
mlvl_centerness
=
torch
.
cat
(
mlvl_centerness
)
if
with_nms
:
det_bboxes
,
det_labels
=
multiclass_nms
(
mlvl_bboxes
,
mlvl_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
,
score_factors
=
mlvl_centerness
)
return
det_bboxes
,
det_labels
else
:
return
mlvl_bboxes
,
mlvl_scores
,
mlvl_centerness
def
_get_points_single
(
self
,
featmap_size
,
stride
,
dtype
,
device
,
flatten
=
False
):
"""Get points according to feature map sizes."""
y
,
x
=
super
().
_get_points_single
(
featmap_size
,
stride
,
dtype
,
device
)
points
=
torch
.
stack
((
x
.
reshape
(
-
1
)
*
stride
,
y
.
reshape
(
-
1
)
*
stride
),
dim
=-
1
)
+
stride
//
2
return
points
def
get_targets
(
self
,
points
,
gt_bboxes_list
,
gt_labels_list
):
"""Compute regression, classification and centerss targets for points
in multiple images.
Args:
points (list[Tensor]): Points of each fpn level, each has shape
(num_points, 2).
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
each has shape (num_gt, 4).
gt_labels_list (list[Tensor]): Ground truth labels of each box,
each has shape (num_gt,).
Returns:
tuple:
concat_lvl_labels (list[Tensor]): Labels of each level.
\
concat_lvl_bbox_targets (list[Tensor]): BBox targets of each
\
level.
"""
assert
len
(
points
)
==
len
(
self
.
regress_ranges
)
num_levels
=
len
(
points
)
# expand regress ranges to align with points
expanded_regress_ranges
=
[
points
[
i
].
new_tensor
(
self
.
regress_ranges
[
i
])[
None
].
expand_as
(
points
[
i
])
for
i
in
range
(
num_levels
)
]
# concat all levels points and regress ranges
concat_regress_ranges
=
torch
.
cat
(
expanded_regress_ranges
,
dim
=
0
)
concat_points
=
torch
.
cat
(
points
,
dim
=
0
)
# the number of points per img, per lvl
num_points
=
[
center
.
size
(
0
)
for
center
in
points
]
# get labels and bbox_targets of each image
labels_list
,
bbox_targets_list
=
multi_apply
(
self
.
_get_target_single
,
gt_bboxes_list
,
gt_labels_list
,
points
=
concat_points
,
regress_ranges
=
concat_regress_ranges
,
num_points_per_lvl
=
num_points
)
# split to per img, per level
labels_list
=
[
labels
.
split
(
num_points
,
0
)
for
labels
in
labels_list
]
bbox_targets_list
=
[
bbox_targets
.
split
(
num_points
,
0
)
for
bbox_targets
in
bbox_targets_list
]
# concat per level image
concat_lvl_labels
=
[]
concat_lvl_bbox_targets
=
[]
for
i
in
range
(
num_levels
):
concat_lvl_labels
.
append
(
torch
.
cat
([
labels
[
i
]
for
labels
in
labels_list
]))
bbox_targets
=
torch
.
cat
(
[
bbox_targets
[
i
]
for
bbox_targets
in
bbox_targets_list
])
if
self
.
norm_on_bbox
:
bbox_targets
=
bbox_targets
/
self
.
strides
[
i
]
concat_lvl_bbox_targets
.
append
(
bbox_targets
)
return
concat_lvl_labels
,
concat_lvl_bbox_targets
def
_get_target_single
(
self
,
gt_bboxes
,
gt_labels
,
points
,
regress_ranges
,
num_points_per_lvl
):
"""Compute regression and classification targets for a single image."""
num_points
=
points
.
size
(
0
)
num_gts
=
gt_labels
.
size
(
0
)
if
num_gts
==
0
:
return
gt_labels
.
new_full
((
num_points
,),
self
.
num_classes
),
\
gt_bboxes
.
new_zeros
((
num_points
,
4
))
areas
=
(
gt_bboxes
[:,
2
]
-
gt_bboxes
[:,
0
])
*
(
gt_bboxes
[:,
3
]
-
gt_bboxes
[:,
1
])
# TODO: figure out why these two are different
# areas = areas[None].expand(num_points, num_gts)
areas
=
areas
[
None
].
repeat
(
num_points
,
1
)
regress_ranges
=
regress_ranges
[:,
None
,
:].
expand
(
num_points
,
num_gts
,
2
)
gt_bboxes
=
gt_bboxes
[
None
].
expand
(
num_points
,
num_gts
,
4
)
xs
,
ys
=
points
[:,
0
],
points
[:,
1
]
xs
=
xs
[:,
None
].
expand
(
num_points
,
num_gts
)
ys
=
ys
[:,
None
].
expand
(
num_points
,
num_gts
)
left
=
xs
-
gt_bboxes
[...,
0
]
right
=
gt_bboxes
[...,
2
]
-
xs
top
=
ys
-
gt_bboxes
[...,
1
]
bottom
=
gt_bboxes
[...,
3
]
-
ys
bbox_targets
=
torch
.
stack
((
left
,
top
,
right
,
bottom
),
-
1
)
if
self
.
center_sampling
:
# condition1: inside a `center bbox`
radius
=
self
.
center_sample_radius
center_xs
=
(
gt_bboxes
[...,
0
]
+
gt_bboxes
[...,
2
])
/
2
center_ys
=
(
gt_bboxes
[...,
1
]
+
gt_bboxes
[...,
3
])
/
2
center_gts
=
torch
.
zeros_like
(
gt_bboxes
)
stride
=
center_xs
.
new_zeros
(
center_xs
.
shape
)
# project the points on current lvl back to the `original` sizes
lvl_begin
=
0
for
lvl_idx
,
num_points_lvl
in
enumerate
(
num_points_per_lvl
):
lvl_end
=
lvl_begin
+
num_points_lvl
stride
[
lvl_begin
:
lvl_end
]
=
self
.
strides
[
lvl_idx
]
*
radius
lvl_begin
=
lvl_end
x_mins
=
center_xs
-
stride
y_mins
=
center_ys
-
stride
x_maxs
=
center_xs
+
stride
y_maxs
=
center_ys
+
stride
center_gts
[...,
0
]
=
torch
.
where
(
x_mins
>
gt_bboxes
[...,
0
],
x_mins
,
gt_bboxes
[...,
0
])
center_gts
[...,
1
]
=
torch
.
where
(
y_mins
>
gt_bboxes
[...,
1
],
y_mins
,
gt_bboxes
[...,
1
])
center_gts
[...,
2
]
=
torch
.
where
(
x_maxs
>
gt_bboxes
[...,
2
],
gt_bboxes
[...,
2
],
x_maxs
)
center_gts
[...,
3
]
=
torch
.
where
(
y_maxs
>
gt_bboxes
[...,
3
],
gt_bboxes
[...,
3
],
y_maxs
)
cb_dist_left
=
xs
-
center_gts
[...,
0
]
cb_dist_right
=
center_gts
[...,
2
]
-
xs
cb_dist_top
=
ys
-
center_gts
[...,
1
]
cb_dist_bottom
=
center_gts
[...,
3
]
-
ys
center_bbox
=
torch
.
stack
(
(
cb_dist_left
,
cb_dist_top
,
cb_dist_right
,
cb_dist_bottom
),
-
1
)
inside_gt_bbox_mask
=
center_bbox
.
min
(
-
1
)[
0
]
>
0
else
:
# condition1: inside a gt bbox
inside_gt_bbox_mask
=
bbox_targets
.
min
(
-
1
)[
0
]
>
0
# condition2: limit the regression range for each location
max_regress_distance
=
bbox_targets
.
max
(
-
1
)[
0
]
inside_regress_range
=
(
(
max_regress_distance
>=
regress_ranges
[...,
0
])
&
(
max_regress_distance
<=
regress_ranges
[...,
1
]))
# if there are still more than one objects for a location,
# we choose the one with minimal area
areas
[
inside_gt_bbox_mask
==
0
]
=
INF
areas
[
inside_regress_range
==
0
]
=
INF
min_area
,
min_area_inds
=
areas
.
min
(
dim
=
1
)
labels
=
gt_labels
[
min_area_inds
]
labels
[
min_area
==
INF
]
=
self
.
num_classes
# set as BG
bbox_targets
=
bbox_targets
[
range
(
num_points
),
min_area_inds
]
return
labels
,
bbox_targets
def
centerness_target
(
self
,
pos_bbox_targets
):
"""Compute centerness targets.
Args:
pos_bbox_targets (Tensor): BBox targets of positive bboxes in shape
(num_pos, 4)
Returns:
Tensor: Centerness target.
"""
# only calculate pos centerness targets, otherwise there may be nan
left_right
=
pos_bbox_targets
[:,
[
0
,
2
]]
top_bottom
=
pos_bbox_targets
[:,
[
1
,
3
]]
centerness_targets
=
(
left_right
.
min
(
dim
=-
1
)[
0
]
/
left_right
.
max
(
dim
=-
1
)[
0
])
*
(
top_bottom
.
min
(
dim
=-
1
)[
0
]
/
top_bottom
.
max
(
dim
=-
1
)[
0
])
return
torch
.
sqrt
(
centerness_targets
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/fovea_head.py
0 → 100644
View file @
142dcf29
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
normal_init
from
mmcv.ops
import
DeformConv2d
from
mmdet.core
import
multi_apply
,
multiclass_nms
from
..builder
import
HEADS
from
.anchor_free_head
import
AnchorFreeHead
INF
=
1e8
class
FeatureAlign
(
nn
.
Module
):
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
=
3
,
deform_groups
=
4
):
super
(
FeatureAlign
,
self
).
__init__
()
offset_channels
=
kernel_size
*
kernel_size
*
2
self
.
conv_offset
=
nn
.
Conv2d
(
4
,
deform_groups
*
offset_channels
,
1
,
bias
=
False
)
self
.
conv_adaption
=
DeformConv2d
(
in_channels
,
out_channels
,
kernel_size
=
kernel_size
,
padding
=
(
kernel_size
-
1
)
//
2
,
deform_groups
=
deform_groups
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
init_weights
(
self
):
normal_init
(
self
.
conv_offset
,
std
=
0.1
)
normal_init
(
self
.
conv_adaption
,
std
=
0.01
)
def
forward
(
self
,
x
,
shape
):
offset
=
self
.
conv_offset
(
shape
)
x
=
self
.
relu
(
self
.
conv_adaption
(
x
,
offset
))
return
x
@
HEADS
.
register_module
()
class
FoveaHead
(
AnchorFreeHead
):
"""FoveaBox: Beyond Anchor-based Object Detector
https://arxiv.org/abs/1904.03797
"""
def
__init__
(
self
,
num_classes
,
in_channels
,
base_edge_list
=
(
16
,
32
,
64
,
128
,
256
),
scale_ranges
=
((
8
,
32
),
(
16
,
64
),
(
32
,
128
),
(
64
,
256
),
(
128
,
512
)),
sigma
=
0.4
,
with_deform
=
False
,
deform_groups
=
4
,
**
kwargs
):
self
.
base_edge_list
=
base_edge_list
self
.
scale_ranges
=
scale_ranges
self
.
sigma
=
sigma
self
.
with_deform
=
with_deform
self
.
deform_groups
=
deform_groups
super
().
__init__
(
num_classes
,
in_channels
,
**
kwargs
)
def
_init_layers
(
self
):
# box branch
super
().
_init_reg_convs
()
self
.
conv_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
4
,
3
,
padding
=
1
)
# cls branch
if
not
self
.
with_deform
:
super
().
_init_cls_convs
()
self
.
conv_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
cls_out_channels
,
3
,
padding
=
1
)
else
:
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
cls_convs
.
append
(
ConvModule
(
self
.
feat_channels
,
(
self
.
feat_channels
*
4
),
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
,
bias
=
self
.
norm_cfg
is
None
))
self
.
cls_convs
.
append
(
ConvModule
((
self
.
feat_channels
*
4
),
(
self
.
feat_channels
*
4
),
1
,
stride
=
1
,
padding
=
0
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
,
bias
=
self
.
norm_cfg
is
None
))
self
.
feature_adaption
=
FeatureAlign
(
self
.
feat_channels
,
self
.
feat_channels
,
kernel_size
=
3
,
deform_groups
=
self
.
deform_groups
)
self
.
conv_cls
=
nn
.
Conv2d
(
int
(
self
.
feat_channels
*
4
),
self
.
cls_out_channels
,
3
,
padding
=
1
)
def
init_weights
(
self
):
super
().
init_weights
()
if
self
.
with_deform
:
self
.
feature_adaption
.
init_weights
()
def
forward_single
(
self
,
x
):
cls_feat
=
x
reg_feat
=
x
for
reg_layer
in
self
.
reg_convs
:
reg_feat
=
reg_layer
(
reg_feat
)
bbox_pred
=
self
.
conv_reg
(
reg_feat
)
if
self
.
with_deform
:
cls_feat
=
self
.
feature_adaption
(
cls_feat
,
bbox_pred
.
exp
())
for
cls_layer
in
self
.
cls_convs
:
cls_feat
=
cls_layer
(
cls_feat
)
cls_score
=
self
.
conv_cls
(
cls_feat
)
return
cls_score
,
bbox_pred
def
_get_points_single
(
self
,
*
args
,
**
kwargs
):
y
,
x
=
super
().
_get_points_single
(
*
args
,
**
kwargs
)
return
y
+
0.5
,
x
+
0.5
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bbox_list
,
gt_label_list
,
img_metas
,
gt_bboxes_ignore
=
None
):
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
points
=
self
.
get_points
(
featmap_sizes
,
bbox_preds
[
0
].
dtype
,
bbox_preds
[
0
].
device
)
num_imgs
=
cls_scores
[
0
].
size
(
0
)
flatten_cls_scores
=
[
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
cls_out_channels
)
for
cls_score
in
cls_scores
]
flatten_bbox_preds
=
[
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
4
)
for
bbox_pred
in
bbox_preds
]
flatten_cls_scores
=
torch
.
cat
(
flatten_cls_scores
)
flatten_bbox_preds
=
torch
.
cat
(
flatten_bbox_preds
)
flatten_labels
,
flatten_bbox_targets
=
self
.
get_targets
(
gt_bbox_list
,
gt_label_list
,
featmap_sizes
,
points
)
# FG cat_id: [0, num_classes -1], BG cat_id: num_classes
pos_inds
=
((
flatten_labels
>=
0
)
&
(
flatten_labels
<
self
.
num_classes
)).
nonzero
().
view
(
-
1
)
num_pos
=
len
(
pos_inds
)
loss_cls
=
self
.
loss_cls
(
flatten_cls_scores
,
flatten_labels
,
avg_factor
=
num_pos
+
num_imgs
)
if
num_pos
>
0
:
pos_bbox_preds
=
flatten_bbox_preds
[
pos_inds
]
pos_bbox_targets
=
flatten_bbox_targets
[
pos_inds
]
pos_weights
=
pos_bbox_targets
.
new_zeros
(
pos_bbox_targets
.
size
())
+
1.0
loss_bbox
=
self
.
loss_bbox
(
pos_bbox_preds
,
pos_bbox_targets
,
pos_weights
,
avg_factor
=
num_pos
)
else
:
loss_bbox
=
torch
.
tensor
(
0
,
dtype
=
flatten_bbox_preds
.
dtype
,
device
=
flatten_bbox_preds
.
device
)
return
dict
(
loss_cls
=
loss_cls
,
loss_bbox
=
loss_bbox
)
def
get_targets
(
self
,
gt_bbox_list
,
gt_label_list
,
featmap_sizes
,
points
):
label_list
,
bbox_target_list
=
multi_apply
(
self
.
_get_target_single
,
gt_bbox_list
,
gt_label_list
,
featmap_size_list
=
featmap_sizes
,
point_list
=
points
)
flatten_labels
=
[
torch
.
cat
([
labels_level_img
.
flatten
()
for
labels_level_img
in
labels_level
])
for
labels_level
in
zip
(
*
label_list
)
]
flatten_bbox_targets
=
[
torch
.
cat
([
bbox_targets_level_img
.
reshape
(
-
1
,
4
)
for
bbox_targets_level_img
in
bbox_targets_level
])
for
bbox_targets_level
in
zip
(
*
bbox_target_list
)
]
flatten_labels
=
torch
.
cat
(
flatten_labels
)
flatten_bbox_targets
=
torch
.
cat
(
flatten_bbox_targets
)
return
flatten_labels
,
flatten_bbox_targets
def
_get_target_single
(
self
,
gt_bboxes_raw
,
gt_labels_raw
,
featmap_size_list
=
None
,
point_list
=
None
):
gt_areas
=
torch
.
sqrt
((
gt_bboxes_raw
[:,
2
]
-
gt_bboxes_raw
[:,
0
])
*
(
gt_bboxes_raw
[:,
3
]
-
gt_bboxes_raw
[:,
1
]))
label_list
=
[]
bbox_target_list
=
[]
# for each pyramid, find the cls and box target
for
base_len
,
(
lower_bound
,
upper_bound
),
stride
,
featmap_size
,
\
(
y
,
x
)
in
zip
(
self
.
base_edge_list
,
self
.
scale_ranges
,
self
.
strides
,
featmap_size_list
,
point_list
):
# FG cat_id: [0, num_classes -1], BG cat_id: num_classes
labels
=
gt_labels_raw
.
new_zeros
(
featmap_size
)
+
self
.
num_classes
bbox_targets
=
gt_bboxes_raw
.
new
(
featmap_size
[
0
],
featmap_size
[
1
],
4
)
+
1
# scale assignment
hit_indices
=
((
gt_areas
>=
lower_bound
)
&
(
gt_areas
<=
upper_bound
)).
nonzero
().
flatten
()
if
len
(
hit_indices
)
==
0
:
label_list
.
append
(
labels
)
bbox_target_list
.
append
(
torch
.
log
(
bbox_targets
))
continue
_
,
hit_index_order
=
torch
.
sort
(
-
gt_areas
[
hit_indices
])
hit_indices
=
hit_indices
[
hit_index_order
]
gt_bboxes
=
gt_bboxes_raw
[
hit_indices
,
:]
/
stride
gt_labels
=
gt_labels_raw
[
hit_indices
]
half_w
=
0.5
*
(
gt_bboxes
[:,
2
]
-
gt_bboxes
[:,
0
])
half_h
=
0.5
*
(
gt_bboxes
[:,
3
]
-
gt_bboxes
[:,
1
])
# valid fovea area: left, right, top, down
pos_left
=
torch
.
ceil
(
gt_bboxes
[:,
0
]
+
(
1
-
self
.
sigma
)
*
half_w
-
0.5
).
long
().
\
clamp
(
0
,
featmap_size
[
1
]
-
1
)
pos_right
=
torch
.
floor
(
gt_bboxes
[:,
0
]
+
(
1
+
self
.
sigma
)
*
half_w
-
0.5
).
long
().
\
clamp
(
0
,
featmap_size
[
1
]
-
1
)
pos_top
=
torch
.
ceil
(
gt_bboxes
[:,
1
]
+
(
1
-
self
.
sigma
)
*
half_h
-
0.5
).
long
().
\
clamp
(
0
,
featmap_size
[
0
]
-
1
)
pos_down
=
torch
.
floor
(
gt_bboxes
[:,
1
]
+
(
1
+
self
.
sigma
)
*
half_h
-
0.5
).
long
().
\
clamp
(
0
,
featmap_size
[
0
]
-
1
)
for
px1
,
py1
,
px2
,
py2
,
label
,
(
gt_x1
,
gt_y1
,
gt_x2
,
gt_y2
)
in
\
zip
(
pos_left
,
pos_top
,
pos_right
,
pos_down
,
gt_labels
,
gt_bboxes_raw
[
hit_indices
,
:]):
labels
[
py1
:
py2
+
1
,
px1
:
px2
+
1
]
=
label
bbox_targets
[
py1
:
py2
+
1
,
px1
:
px2
+
1
,
0
]
=
\
(
stride
*
x
[
py1
:
py2
+
1
,
px1
:
px2
+
1
]
-
gt_x1
)
/
base_len
bbox_targets
[
py1
:
py2
+
1
,
px1
:
px2
+
1
,
1
]
=
\
(
stride
*
y
[
py1
:
py2
+
1
,
px1
:
px2
+
1
]
-
gt_y1
)
/
base_len
bbox_targets
[
py1
:
py2
+
1
,
px1
:
px2
+
1
,
2
]
=
\
(
gt_x2
-
stride
*
x
[
py1
:
py2
+
1
,
px1
:
px2
+
1
])
/
base_len
bbox_targets
[
py1
:
py2
+
1
,
px1
:
px2
+
1
,
3
]
=
\
(
gt_y2
-
stride
*
y
[
py1
:
py2
+
1
,
px1
:
px2
+
1
])
/
base_len
bbox_targets
=
bbox_targets
.
clamp
(
min
=
1.
/
16
,
max
=
16.
)
label_list
.
append
(
labels
)
bbox_target_list
.
append
(
torch
.
log
(
bbox_targets
))
return
label_list
,
bbox_target_list
def
get_bboxes
(
self
,
cls_scores
,
bbox_preds
,
img_metas
,
cfg
=
None
,
rescale
=
None
):
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
num_levels
=
len
(
cls_scores
)
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
points
=
self
.
get_points
(
featmap_sizes
,
bbox_preds
[
0
].
dtype
,
bbox_preds
[
0
].
device
,
flatten
=
True
)
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
cls_score_list
=
[
cls_scores
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
bbox_pred_list
=
[
bbox_preds
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
img_shape
=
img_metas
[
img_id
][
'img_shape'
]
scale_factor
=
img_metas
[
img_id
][
'scale_factor'
]
det_bboxes
=
self
.
_get_bboxes_single
(
cls_score_list
,
bbox_pred_list
,
featmap_sizes
,
points
,
img_shape
,
scale_factor
,
cfg
,
rescale
)
result_list
.
append
(
det_bboxes
)
return
result_list
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
featmap_sizes
,
point_list
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
):
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
point_list
)
det_bboxes
=
[]
det_scores
=
[]
for
cls_score
,
bbox_pred
,
featmap_size
,
stride
,
base_len
,
(
y
,
x
)
\
in
zip
(
cls_scores
,
bbox_preds
,
featmap_sizes
,
self
.
strides
,
self
.
base_edge_list
,
point_list
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
scores
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
).
sigmoid
()
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
).
exp
()
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
(
nms_pre
>
0
)
and
(
scores
.
shape
[
0
]
>
nms_pre
):
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
y
=
y
[
topk_inds
]
x
=
x
[
topk_inds
]
x1
=
(
stride
*
x
-
base_len
*
bbox_pred
[:,
0
]).
\
clamp
(
min
=
0
,
max
=
img_shape
[
1
]
-
1
)
y1
=
(
stride
*
y
-
base_len
*
bbox_pred
[:,
1
]).
\
clamp
(
min
=
0
,
max
=
img_shape
[
0
]
-
1
)
x2
=
(
stride
*
x
+
base_len
*
bbox_pred
[:,
2
]).
\
clamp
(
min
=
0
,
max
=
img_shape
[
1
]
-
1
)
y2
=
(
stride
*
y
+
base_len
*
bbox_pred
[:,
3
]).
\
clamp
(
min
=
0
,
max
=
img_shape
[
0
]
-
1
)
bboxes
=
torch
.
stack
([
x1
,
y1
,
x2
,
y2
],
-
1
)
det_bboxes
.
append
(
bboxes
)
det_scores
.
append
(
scores
)
det_bboxes
=
torch
.
cat
(
det_bboxes
)
if
rescale
:
det_bboxes
/=
det_bboxes
.
new_tensor
(
scale_factor
)
det_scores
=
torch
.
cat
(
det_scores
)
padding
=
det_scores
.
new_zeros
(
det_scores
.
shape
[
0
],
1
)
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
det_scores
=
torch
.
cat
([
det_scores
,
padding
],
dim
=
1
)
det_bboxes
,
det_labels
=
multiclass_nms
(
det_bboxes
,
det_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
)
return
det_bboxes
,
det_labels
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/free_anchor_retina_head.py
0 → 100644
View file @
142dcf29
import
torch
import
torch.nn.functional
as
F
from
mmdet.core
import
bbox_overlaps
from
..builder
import
HEADS
from
.retina_head
import
RetinaHead
EPS
=
1e-12
@
HEADS
.
register_module
()
class
FreeAnchorRetinaHead
(
RetinaHead
):
"""FreeAnchor RetinaHead used in https://arxiv.org/abs/1909.02466.
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
stacked_convs (int): Number of conv layers in cls and reg tower.
Default: 4.
conv_cfg (dict): dictionary to construct and config conv layer.
Default: None.
norm_cfg (dict): dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32,
requires_grad=True).
pre_anchor_topk (int): Number of boxes that be token in each bag.
bbox_thr (float): The threshold of the saturated linear function. It is
usually the same with the IoU threshold used in NMS.
gamma (float): Gamma parameter in focal loss.
alpha (float): Alpha parameter in focal loss.
"""
# noqa: W605
def
__init__
(
self
,
num_classes
,
in_channels
,
stacked_convs
=
4
,
conv_cfg
=
None
,
norm_cfg
=
None
,
pre_anchor_topk
=
50
,
bbox_thr
=
0.6
,
gamma
=
2.0
,
alpha
=
0.5
,
**
kwargs
):
super
(
FreeAnchorRetinaHead
,
self
).
__init__
(
num_classes
,
in_channels
,
stacked_convs
,
conv_cfg
,
norm_cfg
,
**
kwargs
)
self
.
pre_anchor_topk
=
pre_anchor_topk
self
.
bbox_thr
=
bbox_thr
self
.
gamma
=
gamma
self
.
alpha
=
alpha
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute losses of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_anchors * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_anchors * 4, H, W)
gt_bboxes (list[Tensor]): each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
len
(
self
.
anchor_generator
.
base_anchors
)
anchor_list
,
_
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
)
anchors
=
[
torch
.
cat
(
anchor
)
for
anchor
in
anchor_list
]
# concatenate each level
cls_scores
=
[
cls
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
cls
.
size
(
0
),
-
1
,
self
.
cls_out_channels
)
for
cls
in
cls_scores
]
bbox_preds
=
[
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
bbox_pred
.
size
(
0
),
-
1
,
4
)
for
bbox_pred
in
bbox_preds
]
cls_scores
=
torch
.
cat
(
cls_scores
,
dim
=
1
)
bbox_preds
=
torch
.
cat
(
bbox_preds
,
dim
=
1
)
cls_prob
=
torch
.
sigmoid
(
cls_scores
)
box_prob
=
[]
num_pos
=
0
positive_losses
=
[]
for
_
,
(
anchors_
,
gt_labels_
,
gt_bboxes_
,
cls_prob_
,
bbox_preds_
)
in
enumerate
(
zip
(
anchors
,
gt_labels
,
gt_bboxes
,
cls_prob
,
bbox_preds
)):
with
torch
.
no_grad
():
if
len
(
gt_bboxes_
)
==
0
:
image_box_prob
=
torch
.
zeros
(
anchors_
.
size
(
0
),
self
.
cls_out_channels
).
type_as
(
bbox_preds_
)
else
:
# box_localization: a_{j}^{loc}, shape: [j, 4]
pred_boxes
=
self
.
bbox_coder
.
decode
(
anchors_
,
bbox_preds_
)
# object_box_iou: IoU_{ij}^{loc}, shape: [i, j]
object_box_iou
=
bbox_overlaps
(
gt_bboxes_
,
pred_boxes
)
# object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j]
t1
=
self
.
bbox_thr
t2
=
object_box_iou
.
max
(
dim
=
1
,
keepdim
=
True
).
values
.
clamp
(
min
=
t1
+
1e-12
)
object_box_prob
=
((
object_box_iou
-
t1
)
/
(
t2
-
t1
)).
clamp
(
min
=
0
,
max
=
1
)
# object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j]
num_obj
=
gt_labels_
.
size
(
0
)
indices
=
torch
.
stack
([
torch
.
arange
(
num_obj
).
type_as
(
gt_labels_
),
gt_labels_
],
dim
=
0
)
object_cls_box_prob
=
torch
.
sparse_coo_tensor
(
indices
,
object_box_prob
)
# image_box_iou: P{a_{j} \in A_{+}}, shape: [c, j]
"""
from "start" to "end" implement:
image_box_iou = torch.sparse.max(object_cls_box_prob,
dim=0).t()
"""
# start
box_cls_prob
=
torch
.
sparse
.
sum
(
object_cls_box_prob
,
dim
=
0
).
to_dense
()
indices
=
torch
.
nonzero
(
box_cls_prob
,
as_tuple
=
False
).
t_
()
if
indices
.
numel
()
==
0
:
image_box_prob
=
torch
.
zeros
(
anchors_
.
size
(
0
),
self
.
cls_out_channels
).
type_as
(
object_box_prob
)
else
:
nonzero_box_prob
=
torch
.
where
(
(
gt_labels_
.
unsqueeze
(
dim
=-
1
)
==
indices
[
0
]),
object_box_prob
[:,
indices
[
1
]],
torch
.
tensor
([
0
]).
type_as
(
object_box_prob
)).
max
(
dim
=
0
).
values
# upmap to shape [j, c]
image_box_prob
=
torch
.
sparse_coo_tensor
(
indices
.
flip
([
0
]),
nonzero_box_prob
,
size
=
(
anchors_
.
size
(
0
),
self
.
cls_out_channels
)).
to_dense
()
# end
box_prob
.
append
(
image_box_prob
)
# construct bags for objects
match_quality_matrix
=
bbox_overlaps
(
gt_bboxes_
,
anchors_
)
_
,
matched
=
torch
.
topk
(
match_quality_matrix
,
self
.
pre_anchor_topk
,
dim
=
1
,
sorted
=
False
)
del
match_quality_matrix
# matched_cls_prob: P_{ij}^{cls}
matched_cls_prob
=
torch
.
gather
(
cls_prob_
[
matched
],
2
,
gt_labels_
.
view
(
-
1
,
1
,
1
).
repeat
(
1
,
self
.
pre_anchor_topk
,
1
)).
squeeze
(
2
)
# matched_box_prob: P_{ij}^{loc}
matched_anchors
=
anchors_
[
matched
]
matched_object_targets
=
self
.
bbox_coder
.
encode
(
matched_anchors
,
gt_bboxes_
.
unsqueeze
(
dim
=
1
).
expand_as
(
matched_anchors
))
loss_bbox
=
self
.
loss_bbox
(
bbox_preds_
[
matched
],
matched_object_targets
,
reduction_override
=
'none'
).
sum
(
-
1
)
matched_box_prob
=
torch
.
exp
(
-
loss_bbox
)
# positive_losses: {-log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) )}
num_pos
+=
len
(
gt_bboxes_
)
positive_losses
.
append
(
self
.
positive_bag_loss
(
matched_cls_prob
,
matched_box_prob
))
positive_loss
=
torch
.
cat
(
positive_losses
).
sum
()
/
max
(
1
,
num_pos
)
# box_prob: P{a_{j} \in A_{+}}
box_prob
=
torch
.
stack
(
box_prob
,
dim
=
0
)
# negative_loss:
# \sum_{j}{ FL((1 - P{a_{j} \in A_{+}}) * (1 - P_{j}^{bg})) } / n||B||
negative_loss
=
self
.
negative_bag_loss
(
cls_prob
,
box_prob
).
sum
()
/
max
(
1
,
num_pos
*
self
.
pre_anchor_topk
)
# avoid the absence of gradients in regression subnet
# when no ground-truth in a batch
if
num_pos
==
0
:
positive_loss
=
bbox_preds
.
sum
()
*
0
losses
=
{
'positive_bag_loss'
:
positive_loss
,
'negative_bag_loss'
:
negative_loss
}
return
losses
def
positive_bag_loss
(
self
,
matched_cls_prob
,
matched_box_prob
):
"""Compute positive bag loss.
:math:`-log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) )`.
:math:`P_{ij}^{cls}`: matched_cls_prob, classification probability of matched samples.
:math:`P_{ij}^{loc}`: matched_box_prob, box probability of matched samples.
Args:
matched_cls_prob (Tensor): Classification probabilty of matched
samples in shape (num_gt, pre_anchor_topk).
matched_box_prob (Tensor): BBox probability of matched samples,
in shape (num_gt, pre_anchor_topk).
Returns:
Tensor: Positive bag loss in shape (num_gt,).
"""
# noqa: E501, W605
# bag_prob = Mean-max(matched_prob)
matched_prob
=
matched_cls_prob
*
matched_box_prob
weight
=
1
/
torch
.
clamp
(
1
-
matched_prob
,
1e-12
,
None
)
weight
/=
weight
.
sum
(
dim
=
1
).
unsqueeze
(
dim
=-
1
)
bag_prob
=
(
weight
*
matched_prob
).
sum
(
dim
=
1
)
# positive_bag_loss = -self.alpha * log(bag_prob)
return
self
.
alpha
*
F
.
binary_cross_entropy
(
bag_prob
,
torch
.
ones_like
(
bag_prob
),
reduction
=
'none'
)
def
negative_bag_loss
(
self
,
cls_prob
,
box_prob
):
"""Compute negative bag loss.
:math:`FL((1 - P_{a_{j} \in A_{+}}) * (1 - P_{j}^{bg}))`.
:math:`P_{a_{j} \in A_{+}}`: Box_probability of matched samples.
:math:`P_{j}^{bg}`: Classification probability of negative samples.
Args:
cls_prob (Tensor): Classification probability, in shape
(num_img, num_anchors, num_classes).
box_prob (Tensor): Box probability, in shape
(num_img, num_anchors, num_classes).
Returns:
Tensor: Negative bag loss in shape (num_img, num_anchors, num_classes).
"""
# noqa: E501, W605
prob
=
cls_prob
*
(
1
-
box_prob
)
# There are some cases when neg_prob = 0.
# This will cause the neg_prob.log() to be inf without clamp.
prob
=
prob
.
clamp
(
min
=
EPS
,
max
=
1
-
EPS
)
negative_bag_loss
=
prob
**
self
.
gamma
*
F
.
binary_cross_entropy
(
prob
,
torch
.
zeros_like
(
prob
),
reduction
=
'none'
)
return
(
1
-
self
.
alpha
)
*
negative_bag_loss
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/fsaf_head.py
0 → 100644
View file @
142dcf29
import
numpy
as
np
import
torch
from
mmcv.cnn
import
normal_init
from
mmcv.runner
import
force_fp32
from
mmdet.core
import
(
anchor_inside_flags
,
images_to_levels
,
multi_apply
,
unmap
)
from
..builder
import
HEADS
from
..losses.accuracy
import
accuracy
from
..losses.utils
import
weight_reduce_loss
from
.retina_head
import
RetinaHead
@
HEADS
.
register_module
()
class
FSAFHead
(
RetinaHead
):
"""Anchor-free head used in `FSAF <https://arxiv.org/abs/1903.00621>`_.
The head contains two subnetworks. The first classifies anchor boxes and
the second regresses deltas for the anchors (num_anchors is 1 for anchor-
free methods)
Args:
*args: Same as its base class in :class:`RetinaHead`
score_threshold (float, optional): The score_threshold to calculate
positive recall. If given, prediction scores lower than this value
is counted as incorrect prediction. Default to None.
**kwargs: Same as its base class in :class:`RetinaHead`
Example:
>>> import torch
>>> self = FSAFHead(11, 7)
>>> x = torch.rand(1, 7, 32, 32)
>>> cls_score, bbox_pred = self.forward_single(x)
>>> # Each anchor predicts a score for each class except background
>>> cls_per_anchor = cls_score.shape[1] / self.num_anchors
>>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors
>>> assert cls_per_anchor == self.num_classes
>>> assert box_per_anchor == 4
"""
def
__init__
(
self
,
*
args
,
score_threshold
=
None
,
**
kwargs
):
super
().
__init__
(
*
args
,
**
kwargs
)
self
.
score_threshold
=
score_threshold
def
forward_single
(
self
,
x
):
"""Forward feature map of a single scale level.
Args:
x (Tensor): Feature map of a single scale level.
Returns:
tuple (Tensor):
cls_score (Tensor): Box scores for each scale level
Has shape (N, num_points * num_classes, H, W).
bbox_pred (Tensor): Box energies / deltas for each scale
level with shape (N, num_points * 4, H, W).
"""
cls_score
,
bbox_pred
=
super
().
forward_single
(
x
)
# relu: TBLR encoder only accepts positive bbox_pred
return
cls_score
,
self
.
relu
(
bbox_pred
)
def
init_weights
(
self
):
"""Initialize weights of the head."""
super
(
FSAFHead
,
self
).
init_weights
()
# The positive bias in self.retina_reg conv is to prevent predicted \
# bbox with 0 area
normal_init
(
self
.
retina_reg
,
std
=
0.01
,
bias
=
0.25
)
def
_get_targets_single
(
self
,
flat_anchors
,
valid_flags
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
,
img_meta
,
label_channels
=
1
,
unmap_outputs
=
True
):
"""Compute regression and classification targets for anchors in a
single image.
Most of the codes are the same with the base class
:obj: `AnchorHead`, except that it also collects and returns
the matched gt index in the image (from 0 to num_gt-1). If the
anchor bbox is not matched to any gt, the corresponding value in
pos_gt_inds is -1.
"""
inside_flags
=
anchor_inside_flags
(
flat_anchors
,
valid_flags
,
img_meta
[
'img_shape'
][:
2
],
self
.
train_cfg
.
allowed_border
)
if
not
inside_flags
.
any
():
return
(
None
,
)
*
7
# Assign gt and sample anchors
anchors
=
flat_anchors
[
inside_flags
.
type
(
torch
.
bool
),
:]
assign_result
=
self
.
assigner
.
assign
(
anchors
,
gt_bboxes
,
gt_bboxes_ignore
,
None
if
self
.
sampling
else
gt_labels
)
sampling_result
=
self
.
sampler
.
sample
(
assign_result
,
anchors
,
gt_bboxes
)
num_valid_anchors
=
anchors
.
shape
[
0
]
bbox_targets
=
torch
.
zeros_like
(
anchors
)
bbox_weights
=
torch
.
zeros_like
(
anchors
)
labels
=
anchors
.
new_full
((
num_valid_anchors
,
),
self
.
num_classes
,
dtype
=
torch
.
long
)
label_weights
=
anchors
.
new_zeros
((
num_valid_anchors
,
label_channels
),
dtype
=
torch
.
float
)
pos_gt_inds
=
anchors
.
new_full
((
num_valid_anchors
,
),
-
1
,
dtype
=
torch
.
long
)
pos_inds
=
sampling_result
.
pos_inds
neg_inds
=
sampling_result
.
neg_inds
if
len
(
pos_inds
)
>
0
:
if
not
self
.
reg_decoded_bbox
:
pos_bbox_targets
=
self
.
bbox_coder
.
encode
(
sampling_result
.
pos_bboxes
,
sampling_result
.
pos_gt_bboxes
)
else
:
# When the regression loss (e.g. `IouLoss`, `GIouLoss`)
# is applied directly on the decoded bounding boxes, both
# the predicted boxes and regression targets should be with
# absolute coordinate format.
pos_bbox_targets
=
sampling_result
.
pos_gt_bboxes
bbox_targets
[
pos_inds
,
:]
=
pos_bbox_targets
bbox_weights
[
pos_inds
,
:]
=
1.0
# The assigned gt_index for each anchor. (0-based)
pos_gt_inds
[
pos_inds
]
=
sampling_result
.
pos_assigned_gt_inds
if
gt_labels
is
None
:
# Only rpn gives gt_labels as None
# Foreground is the first class
labels
[
pos_inds
]
=
0
else
:
labels
[
pos_inds
]
=
gt_labels
[
sampling_result
.
pos_assigned_gt_inds
]
if
self
.
train_cfg
.
pos_weight
<=
0
:
label_weights
[
pos_inds
]
=
1.0
else
:
label_weights
[
pos_inds
]
=
self
.
train_cfg
.
pos_weight
if
len
(
neg_inds
)
>
0
:
label_weights
[
neg_inds
]
=
1.0
# shadowed_labels is a tensor composed of tuples
# (anchor_inds, class_label) that indicate those anchors lying in the
# outer region of a gt or overlapped by another gt with a smaller
# area.
#
# Therefore, only the shadowed labels are ignored for loss calculation.
# the key `shadowed_labels` is defined in :obj:`CenterRegionAssigner`
shadowed_labels
=
assign_result
.
get_extra_property
(
'shadowed_labels'
)
if
shadowed_labels
is
not
None
and
shadowed_labels
.
numel
():
if
len
(
shadowed_labels
.
shape
)
==
2
:
idx_
,
label_
=
shadowed_labels
[:,
0
],
shadowed_labels
[:,
1
]
assert
(
labels
[
idx_
]
!=
label_
).
all
(),
\
'One label cannot be both positive and ignored'
label_weights
[
idx_
,
label_
]
=
0
else
:
label_weights
[
shadowed_labels
]
=
0
# map up to original set of anchors
if
unmap_outputs
:
num_total_anchors
=
flat_anchors
.
size
(
0
)
labels
=
unmap
(
labels
,
num_total_anchors
,
inside_flags
)
label_weights
=
unmap
(
label_weights
,
num_total_anchors
,
inside_flags
)
bbox_targets
=
unmap
(
bbox_targets
,
num_total_anchors
,
inside_flags
)
bbox_weights
=
unmap
(
bbox_weights
,
num_total_anchors
,
inside_flags
)
pos_gt_inds
=
unmap
(
pos_gt_inds
,
num_total_anchors
,
inside_flags
,
fill
=-
1
)
return
(
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
pos_inds
,
neg_inds
,
sampling_result
,
pos_gt_inds
)
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute loss of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_points * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_points * 4, H, W).
gt_bboxes (list[Tensor]): each item are the truth boxes for each
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
for
i
in
range
(
len
(
bbox_preds
)):
# loop over fpn level
# avoid 0 area of the predicted bbox
bbox_preds
[
i
]
=
bbox_preds
[
i
].
clamp
(
min
=
1e-4
)
# TODO: It may directly use the base-class loss function.
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
self
.
anchor_generator
.
num_levels
batch_size
=
len
(
gt_bboxes
)
device
=
cls_scores
[
0
].
device
anchor_list
,
valid_flag_list
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
,
device
=
device
)
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
cls_reg_targets
=
self
.
get_targets
(
anchor_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
label_channels
)
if
cls_reg_targets
is
None
:
return
None
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
,
pos_assigned_gt_inds_list
)
=
cls_reg_targets
num_gts
=
np
.
array
(
list
(
map
(
len
,
gt_labels
)))
num_total_samples
=
(
num_total_pos
+
num_total_neg
if
self
.
sampling
else
num_total_pos
)
# anchor number of multi levels
num_level_anchors
=
[
anchors
.
size
(
0
)
for
anchors
in
anchor_list
[
0
]]
# concat all level anchors and flags to a single tensor
concat_anchor_list
=
[]
for
i
in
range
(
len
(
anchor_list
)):
concat_anchor_list
.
append
(
torch
.
cat
(
anchor_list
[
i
]))
all_anchor_list
=
images_to_levels
(
concat_anchor_list
,
num_level_anchors
)
losses_cls
,
losses_bbox
=
multi_apply
(
self
.
loss_single
,
cls_scores
,
bbox_preds
,
all_anchor_list
,
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_samples
=
num_total_samples
)
# `pos_assigned_gt_inds_list` (length: fpn_levels) stores the assigned
# gt index of each anchor bbox in each fpn level.
cum_num_gts
=
list
(
np
.
cumsum
(
num_gts
))
# length of batch_size
for
i
,
assign
in
enumerate
(
pos_assigned_gt_inds_list
):
# loop over fpn levels
for
j
in
range
(
1
,
batch_size
):
# loop over batch size
# Convert gt indices in each img to those in the batch
assign
[
j
][
assign
[
j
]
>=
0
]
+=
int
(
cum_num_gts
[
j
-
1
])
pos_assigned_gt_inds_list
[
i
]
=
assign
.
flatten
()
labels_list
[
i
]
=
labels_list
[
i
].
flatten
()
num_gts
=
sum
(
map
(
len
,
gt_labels
))
# total number of gt in the batch
# The unique label index of each gt in the batch
label_sequence
=
torch
.
arange
(
num_gts
,
device
=
device
)
# Collect the average loss of each gt in each level
with
torch
.
no_grad
():
loss_levels
,
=
multi_apply
(
self
.
collect_loss_level_single
,
losses_cls
,
losses_bbox
,
pos_assigned_gt_inds_list
,
labels_seq
=
label_sequence
)
# Shape: (fpn_levels, num_gts). Loss of each gt at each fpn level
loss_levels
=
torch
.
stack
(
loss_levels
,
dim
=
0
)
# Locate the best fpn level for loss back-propagation
if
loss_levels
.
numel
()
==
0
:
# zero gt
argmin
=
loss_levels
.
new_empty
((
num_gts
,
),
dtype
=
torch
.
long
)
else
:
_
,
argmin
=
loss_levels
.
min
(
dim
=
0
)
# Reweight the loss of each (anchor, label) pair, so that only those
# at the best gt level are back-propagated.
losses_cls
,
losses_bbox
,
pos_inds
=
multi_apply
(
self
.
reweight_loss_single
,
losses_cls
,
losses_bbox
,
pos_assigned_gt_inds_list
,
labels_list
,
list
(
range
(
len
(
losses_cls
))),
min_levels
=
argmin
)
num_pos
=
torch
.
cat
(
pos_inds
,
0
).
sum
().
float
()
pos_recall
=
self
.
calculate_pos_recall
(
cls_scores
,
labels_list
,
pos_inds
)
if
num_pos
==
0
:
# No gt
avg_factor
=
num_pos
+
float
(
num_total_neg
)
else
:
avg_factor
=
num_pos
for
i
in
range
(
len
(
losses_cls
)):
losses_cls
[
i
]
/=
avg_factor
losses_bbox
[
i
]
/=
avg_factor
return
dict
(
loss_cls
=
losses_cls
,
loss_bbox
=
losses_bbox
,
num_pos
=
num_pos
/
batch_size
,
pos_recall
=
pos_recall
)
def
calculate_pos_recall
(
self
,
cls_scores
,
labels_list
,
pos_inds
):
"""Calculate positive recall with score threshold.
Args:
cls_scores (list[Tensor]): Classification scores at all fpn levels.
Each tensor is in shape (N, num_classes * num_anchors, H, W)
labels_list (list[Tensor]): The label that each anchor is assigned
to. Shape (N * H * W * num_anchors, )
pos_inds (list[Tensor]): List of bool tensors indicating whether
the anchor is assigned to a positive label.
Shape (N * H * W * num_anchors, )
Returns:
Tensor: A single float number indicating the positive recall.
"""
with
torch
.
no_grad
():
num_class
=
self
.
num_classes
scores
=
[
cls
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
num_class
)[
pos
]
for
cls
,
pos
in
zip
(
cls_scores
,
pos_inds
)
]
labels
=
[
label
.
reshape
(
-
1
)[
pos
]
for
label
,
pos
in
zip
(
labels_list
,
pos_inds
)
]
scores
=
torch
.
cat
(
scores
,
dim
=
0
)
labels
=
torch
.
cat
(
labels
,
dim
=
0
)
if
self
.
use_sigmoid_cls
:
scores
=
scores
.
sigmoid
()
else
:
scores
=
scores
.
softmax
(
dim
=
1
)
return
accuracy
(
scores
,
labels
,
thresh
=
self
.
score_threshold
)
def
collect_loss_level_single
(
self
,
cls_loss
,
reg_loss
,
assigned_gt_inds
,
labels_seq
):
"""Get the average loss in each FPN level w.r.t. each gt label.
Args:
cls_loss (Tensor): Classification loss of each feature map pixel,
shape (num_anchor, num_class)
reg_loss (Tensor): Regression loss of each feature map pixel,
shape (num_anchor, 4)
assigned_gt_inds (Tensor): It indicates which gt the prior is
assigned to (0-based, -1: no assignment). shape (num_anchor),
labels_seq: The rank of labels. shape (num_gt)
Returns:
shape: (num_gt), average loss of each gt in this level
"""
if
len
(
reg_loss
.
shape
)
==
2
:
# iou loss has shape (num_prior, 4)
reg_loss
=
reg_loss
.
sum
(
dim
=-
1
)
# sum loss in tblr dims
if
len
(
cls_loss
.
shape
)
==
2
:
cls_loss
=
cls_loss
.
sum
(
dim
=-
1
)
# sum loss in class dims
loss
=
cls_loss
+
reg_loss
assert
loss
.
size
(
0
)
==
assigned_gt_inds
.
size
(
0
)
# Default loss value is 1e6 for a layer where no anchor is positive
# to ensure it will not be chosen to back-propagate gradient
losses_
=
loss
.
new_full
(
labels_seq
.
shape
,
1e6
)
for
i
,
l
in
enumerate
(
labels_seq
):
match
=
assigned_gt_inds
==
l
if
match
.
any
():
losses_
[
i
]
=
loss
[
match
].
mean
()
return
losses_
,
def
reweight_loss_single
(
self
,
cls_loss
,
reg_loss
,
assigned_gt_inds
,
labels
,
level
,
min_levels
):
"""Reweight loss values at each level.
Reassign loss values at each level by masking those where the
pre-calculated loss is too large. Then return the reduced losses.
Args:
cls_loss (Tensor): Element-wise classification loss.
Shape: (num_anchors, num_classes)
reg_loss (Tensor): Element-wise regression loss.
Shape: (num_anchors, 4)
assigned_gt_inds (Tensor): The gt indices that each anchor bbox
is assigned to. -1 denotes a negative anchor, otherwise it is the
gt index (0-based). Shape: (num_anchors, ),
labels (Tensor): Label assigned to anchors. Shape: (num_anchors, ).
level (int): The current level index in the pyramid
(0-4 for RetinaNet)
min_levels (Tensor): The best-matching level for each gt.
Shape: (num_gts, ),
Returns:
tuple:
- cls_loss: Reduced corrected classification loss. Scalar.
- reg_loss: Reduced corrected regression loss. Scalar.
- pos_flags (Tensor): Corrected bool tensor indicating the
final postive anchors. Shape: (num_anchors, ).
"""
loc_weight
=
torch
.
ones_like
(
reg_loss
)
cls_weight
=
torch
.
ones_like
(
cls_loss
)
pos_flags
=
assigned_gt_inds
>=
0
# positive pixel flag
pos_indices
=
torch
.
nonzero
(
pos_flags
,
as_tuple
=
False
).
flatten
()
if
pos_flags
.
any
():
# pos pixels exist
pos_assigned_gt_inds
=
assigned_gt_inds
[
pos_flags
]
zeroing_indices
=
(
min_levels
[
pos_assigned_gt_inds
]
!=
level
)
neg_indices
=
pos_indices
[
zeroing_indices
]
if
neg_indices
.
numel
():
pos_flags
[
neg_indices
]
=
0
loc_weight
[
neg_indices
]
=
0
# Only the weight corresponding to the label is
# zeroed out if not selected
zeroing_labels
=
labels
[
neg_indices
]
assert
(
zeroing_labels
>=
0
).
all
()
cls_weight
[
neg_indices
,
zeroing_labels
]
=
0
# Weighted loss for both cls and reg loss
cls_loss
=
weight_reduce_loss
(
cls_loss
,
cls_weight
,
reduction
=
'sum'
)
reg_loss
=
weight_reduce_loss
(
reg_loss
,
loc_weight
,
reduction
=
'sum'
)
return
cls_loss
,
reg_loss
,
pos_flags
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/ga_retina_head.py
0 → 100644
View file @
142dcf29
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
mmcv.ops
import
MaskedConv2d
from
..builder
import
HEADS
from
.guided_anchor_head
import
FeatureAdaption
,
GuidedAnchorHead
@
HEADS
.
register_module
()
class
GARetinaHead
(
GuidedAnchorHead
):
"""Guided-Anchor-based RetinaNet head."""
def
__init__
(
self
,
num_classes
,
in_channels
,
stacked_convs
=
4
,
conv_cfg
=
None
,
norm_cfg
=
None
,
**
kwargs
):
self
.
stacked_convs
=
stacked_convs
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
super
(
GARetinaHead
,
self
).
__init__
(
num_classes
,
in_channels
,
**
kwargs
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
self
.
cls_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
reg_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
conv_loc
=
nn
.
Conv2d
(
self
.
feat_channels
,
1
,
1
)
self
.
conv_shape
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
2
,
1
)
self
.
feature_adaption_cls
=
FeatureAdaption
(
self
.
feat_channels
,
self
.
feat_channels
,
kernel_size
=
3
,
deform_groups
=
self
.
deform_groups
)
self
.
feature_adaption_reg
=
FeatureAdaption
(
self
.
feat_channels
,
self
.
feat_channels
,
kernel_size
=
3
,
deform_groups
=
self
.
deform_groups
)
self
.
retina_cls
=
MaskedConv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
3
,
padding
=
1
)
self
.
retina_reg
=
MaskedConv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
4
,
3
,
padding
=
1
)
def
init_weights
(
self
):
"""Initialize weights of the layer."""
for
m
in
self
.
cls_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
for
m
in
self
.
reg_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
self
.
feature_adaption_cls
.
init_weights
()
self
.
feature_adaption_reg
.
init_weights
()
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
conv_loc
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
conv_shape
,
std
=
0.01
)
normal_init
(
self
.
retina_cls
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
retina_reg
,
std
=
0.01
)
def
forward_single
(
self
,
x
):
"""Forward feature map of a single scale level."""
cls_feat
=
x
reg_feat
=
x
for
cls_conv
in
self
.
cls_convs
:
cls_feat
=
cls_conv
(
cls_feat
)
for
reg_conv
in
self
.
reg_convs
:
reg_feat
=
reg_conv
(
reg_feat
)
loc_pred
=
self
.
conv_loc
(
cls_feat
)
shape_pred
=
self
.
conv_shape
(
reg_feat
)
cls_feat
=
self
.
feature_adaption_cls
(
cls_feat
,
shape_pred
)
reg_feat
=
self
.
feature_adaption_reg
(
reg_feat
,
shape_pred
)
if
not
self
.
training
:
mask
=
loc_pred
.
sigmoid
()[
0
]
>=
self
.
loc_filter_thr
else
:
mask
=
None
cls_score
=
self
.
retina_cls
(
cls_feat
,
mask
)
bbox_pred
=
self
.
retina_reg
(
reg_feat
,
mask
)
return
cls_score
,
bbox_pred
,
shape_pred
,
loc_pred
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/ga_rpn_head.py
0 → 100644
View file @
142dcf29
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
normal_init
from
mmcv.ops
import
nms
from
..builder
import
HEADS
from
.guided_anchor_head
import
GuidedAnchorHead
from
.rpn_test_mixin
import
RPNTestMixin
@
HEADS
.
register_module
()
class
GARPNHead
(
RPNTestMixin
,
GuidedAnchorHead
):
"""Guided-Anchor-based RPN head."""
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
GARPNHead
,
self
).
__init__
(
1
,
in_channels
,
**
kwargs
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
self
.
rpn_conv
=
nn
.
Conv2d
(
self
.
in_channels
,
self
.
feat_channels
,
3
,
padding
=
1
)
super
(
GARPNHead
,
self
).
_init_layers
()
def
init_weights
(
self
):
"""Initialize weights of the head."""
normal_init
(
self
.
rpn_conv
,
std
=
0.01
)
super
(
GARPNHead
,
self
).
init_weights
()
def
forward_single
(
self
,
x
):
"""Forward feature of a single scale level."""
x
=
self
.
rpn_conv
(
x
)
x
=
F
.
relu
(
x
,
inplace
=
True
)
(
cls_score
,
bbox_pred
,
shape_pred
,
loc_pred
)
=
super
(
GARPNHead
,
self
).
forward_single
(
x
)
return
cls_score
,
bbox_pred
,
shape_pred
,
loc_pred
def
loss
(
self
,
cls_scores
,
bbox_preds
,
shape_preds
,
loc_preds
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore
=
None
):
losses
=
super
(
GARPNHead
,
self
).
loss
(
cls_scores
,
bbox_preds
,
shape_preds
,
loc_preds
,
gt_bboxes
,
None
,
img_metas
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
return
dict
(
loss_rpn_cls
=
losses
[
'loss_cls'
],
loss_rpn_bbox
=
losses
[
'loss_bbox'
],
loss_anchor_shape
=
losses
[
'loss_shape'
],
loss_anchor_loc
=
losses
[
'loss_loc'
])
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_anchors
,
mlvl_masks
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
):
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
mlvl_proposals
=
[]
for
idx
in
range
(
len
(
cls_scores
)):
rpn_cls_score
=
cls_scores
[
idx
]
rpn_bbox_pred
=
bbox_preds
[
idx
]
anchors
=
mlvl_anchors
[
idx
]
mask
=
mlvl_masks
[
idx
]
assert
rpn_cls_score
.
size
()[
-
2
:]
==
rpn_bbox_pred
.
size
()[
-
2
:]
# if no location is kept, end.
if
mask
.
sum
()
==
0
:
continue
rpn_cls_score
=
rpn_cls_score
.
permute
(
1
,
2
,
0
)
if
self
.
use_sigmoid_cls
:
rpn_cls_score
=
rpn_cls_score
.
reshape
(
-
1
)
scores
=
rpn_cls_score
.
sigmoid
()
else
:
rpn_cls_score
=
rpn_cls_score
.
reshape
(
-
1
,
2
)
# remind that we set FG labels to [0, num_class-1]
# since mmdet v2.0
# BG cat_id: num_class
scores
=
rpn_cls_score
.
softmax
(
dim
=
1
)[:,
:
-
1
]
# filter scores, bbox_pred w.r.t. mask.
# anchors are filtered in get_anchors() beforehand.
scores
=
scores
[
mask
]
rpn_bbox_pred
=
rpn_bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)[
mask
,
:]
if
scores
.
dim
()
==
0
:
rpn_bbox_pred
=
rpn_bbox_pred
.
unsqueeze
(
0
)
anchors
=
anchors
.
unsqueeze
(
0
)
scores
=
scores
.
unsqueeze
(
0
)
# filter anchors, bbox_pred, scores w.r.t. scores
if
cfg
.
nms_pre
>
0
and
scores
.
shape
[
0
]
>
cfg
.
nms_pre
:
_
,
topk_inds
=
scores
.
topk
(
cfg
.
nms_pre
)
rpn_bbox_pred
=
rpn_bbox_pred
[
topk_inds
,
:]
anchors
=
anchors
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
]
# get proposals w.r.t. anchors and rpn_bbox_pred
proposals
=
self
.
bbox_coder
.
decode
(
anchors
,
rpn_bbox_pred
,
max_shape
=
img_shape
)
# filter out too small bboxes
if
cfg
.
min_bbox_size
>
0
:
w
=
proposals
[:,
2
]
-
proposals
[:,
0
]
h
=
proposals
[:,
3
]
-
proposals
[:,
1
]
valid_inds
=
torch
.
nonzero
(
(
w
>=
cfg
.
min_bbox_size
)
&
(
h
>=
cfg
.
min_bbox_size
),
as_tuple
=
False
).
squeeze
()
proposals
=
proposals
[
valid_inds
,
:]
scores
=
scores
[
valid_inds
]
# NMS in current level
proposals
,
_
=
nms
(
proposals
,
scores
,
cfg
.
nms_thr
)
proposals
=
proposals
[:
cfg
.
nms_post
,
:]
mlvl_proposals
.
append
(
proposals
)
proposals
=
torch
.
cat
(
mlvl_proposals
,
0
)
if
cfg
.
nms_across_levels
:
# NMS across multi levels
proposals
,
_
=
nms
(
proposals
[:,
:
4
],
proposals
[:,
-
1
],
cfg
.
nms_thr
)
proposals
=
proposals
[:
cfg
.
max_num
,
:]
else
:
scores
=
proposals
[:,
4
]
num
=
min
(
cfg
.
max_num
,
proposals
.
shape
[
0
])
_
,
topk_inds
=
scores
.
topk
(
num
)
proposals
=
proposals
[
topk_inds
,
:]
return
proposals
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/gfl_head.py
0 → 100644
View file @
142dcf29
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
ConvModule
,
Scale
,
bias_init_with_prob
,
normal_init
from
mmcv.runner
import
force_fp32
from
mmdet.core
import
(
anchor_inside_flags
,
bbox2distance
,
bbox_overlaps
,
build_assigner
,
build_sampler
,
distance2bbox
,
images_to_levels
,
multi_apply
,
multiclass_nms
,
reduce_mean
,
unmap
)
from
..builder
import
HEADS
,
build_loss
from
.anchor_head
import
AnchorHead
class
Integral
(
nn
.
Module
):
"""A fixed layer for calculating integral result from distribution.
This layer calculates the target location by :math: `sum{P(y_i) * y_i}`,
P(y_i) denotes the softmax vector that represents the discrete distribution
y_i denotes the discrete set, usually {0, 1, 2, ..., reg_max}
Args:
reg_max (int): The maximal value of the discrete set. Default: 16. You
may want to reset it according to your new dataset or related
settings.
"""
def
__init__
(
self
,
reg_max
=
16
):
super
(
Integral
,
self
).
__init__
()
self
.
reg_max
=
reg_max
self
.
register_buffer
(
'project'
,
torch
.
linspace
(
0
,
self
.
reg_max
,
self
.
reg_max
+
1
))
def
forward
(
self
,
x
):
"""Forward feature from the regression head to get integral result of
bounding box location.
Args:
x (Tensor): Features of the regression head, shape (N, 4*(n+1)),
n is self.reg_max.
Returns:
x (Tensor): Integral result of box locations, i.e., distance
offsets from the box center in four directions, shape (N, 4).
"""
x
=
F
.
softmax
(
x
.
reshape
(
-
1
,
self
.
reg_max
+
1
),
dim
=
1
)
x
=
F
.
linear
(
x
,
self
.
project
.
type_as
(
x
)).
reshape
(
-
1
,
4
)
return
x
@
HEADS
.
register_module
()
class
GFLHead
(
AnchorHead
):
"""Generalized Focal Loss: Learning Qualified and Distributed Bounding
Boxes for Dense Object Detection.
GFL head structure is similar with ATSS, however GFL uses
1) joint representation for classification and localization quality, and
2) flexible General distribution for bounding box locations,
which are supervised by
Quality Focal Loss (QFL) and Distribution Focal Loss (DFL), respectively
https://arxiv.org/abs/2006.04388
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
stacked_convs (int): Number of conv layers in cls and reg tower.
Default: 4.
conv_cfg (dict): dictionary to construct and config conv layer.
Default: None.
norm_cfg (dict): dictionary to construct and config norm layer.
Default: dict(type='GN', num_groups=32, requires_grad=True).
loss_qfl (dict): Config of Quality Focal Loss (QFL).
reg_max (int): Max value of integral set :math: `{0, ..., reg_max}`
in QFL setting. Default: 16.
Example:
>>> self = GFLHead(11, 7)
>>> feats = [torch.rand(1, 7, s, s) for s in [4, 8, 16, 32, 64]]
>>> cls_quality_score, bbox_pred = self.forward(feats)
>>> assert len(cls_quality_score) == len(self.scales)
"""
def
__init__
(
self
,
num_classes
,
in_channels
,
stacked_convs
=
4
,
conv_cfg
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
),
loss_dfl
=
dict
(
type
=
'DistributionFocalLoss'
,
loss_weight
=
0.25
),
reg_max
=
16
,
**
kwargs
):
self
.
stacked_convs
=
stacked_convs
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
self
.
reg_max
=
reg_max
super
(
GFLHead
,
self
).
__init__
(
num_classes
,
in_channels
,
**
kwargs
)
self
.
sampling
=
False
if
self
.
train_cfg
:
self
.
assigner
=
build_assigner
(
self
.
train_cfg
.
assigner
)
# SSD sampling=False so use PseudoSampler
sampler_cfg
=
dict
(
type
=
'PseudoSampler'
)
self
.
sampler
=
build_sampler
(
sampler_cfg
,
context
=
self
)
self
.
integral
=
Integral
(
self
.
reg_max
)
self
.
loss_dfl
=
build_loss
(
loss_dfl
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
self
.
cls_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
reg_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
assert
self
.
num_anchors
==
1
,
'anchor free version'
self
.
gfl_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
cls_out_channels
,
3
,
padding
=
1
)
self
.
gfl_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
4
*
(
self
.
reg_max
+
1
),
3
,
padding
=
1
)
self
.
scales
=
nn
.
ModuleList
(
[
Scale
(
1.0
)
for
_
in
self
.
anchor_generator
.
strides
])
def
init_weights
(
self
):
"""Initialize weights of the head."""
for
m
in
self
.
cls_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
for
m
in
self
.
reg_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
gfl_cls
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
gfl_reg
,
std
=
0.01
)
def
forward
(
self
,
feats
):
"""Forward features from the upstream network.
Args:
feats (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
tuple: Usually a tuple of classification scores and bbox prediction
cls_scores (list[Tensor]): Classification and quality (IoU)
joint scores for all scale levels, each is a 4D-tensor,
the channel number is num_classes.
bbox_preds (list[Tensor]): Box distribution logits for all
scale levels, each is a 4D-tensor, the channel number is
4*(n+1), n is max value of integral set.
"""
return
multi_apply
(
self
.
forward_single
,
feats
,
self
.
scales
)
def
forward_single
(
self
,
x
,
scale
):
"""Forward feature of a single scale level.
Args:
x (Tensor): Features of a single scale level.
scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize
the bbox prediction.
Returns:
tuple:
cls_score (Tensor): Cls and quality joint scores for a single
scale level the channel number is num_classes.
bbox_pred (Tensor): Box distribution logits for a single scale
level, the channel number is 4*(n+1), n is max value of
integral set.
"""
cls_feat
=
x
reg_feat
=
x
for
cls_conv
in
self
.
cls_convs
:
cls_feat
=
cls_conv
(
cls_feat
)
for
reg_conv
in
self
.
reg_convs
:
reg_feat
=
reg_conv
(
reg_feat
)
cls_score
=
self
.
gfl_cls
(
cls_feat
)
bbox_pred
=
scale
(
self
.
gfl_reg
(
reg_feat
)).
float
()
return
cls_score
,
bbox_pred
def
anchor_center
(
self
,
anchors
):
"""Get anchor centers from anchors.
Args:
anchors (Tensor): Anchor list with shape (N, 4), "xyxy" format.
Returns:
Tensor: Anchor centers with shape (N, 2), "xy" format.
"""
anchors_cx
=
(
anchors
[:,
2
]
+
anchors
[:,
0
])
/
2
anchors_cy
=
(
anchors
[:,
3
]
+
anchors
[:,
1
])
/
2
return
torch
.
stack
([
anchors_cx
,
anchors_cy
],
dim
=-
1
)
def
loss_single
(
self
,
anchors
,
cls_score
,
bbox_pred
,
labels
,
label_weights
,
bbox_targets
,
stride
,
num_total_samples
):
"""Compute loss of a single scale level.
Args:
anchors (Tensor): Box reference for each scale level with shape
(N, num_total_anchors, 4).
cls_score (Tensor): Cls and quality joint scores for each scale
level has shape (N, num_classes, H, W).
bbox_pred (Tensor): Box distribution logits for each scale
level with shape (N, 4*(n+1), H, W), n is max value of integral
set.
labels (Tensor): Labels of each anchors with shape
(N, num_total_anchors).
label_weights (Tensor): Label weights of each anchor with shape
(N, num_total_anchors)
bbox_targets (Tensor): BBox regression targets of each anchor wight
shape (N, num_total_anchors, 4).
stride (tuple): Stride in this scale level.
num_total_samples (int): Number of positive samples that is
reduced over all GPUs.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
assert
stride
[
0
]
==
stride
[
1
],
'h stride is not equal to w stride!'
anchors
=
anchors
.
reshape
(
-
1
,
4
)
cls_score
=
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
cls_out_channels
)
bbox_pred
=
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
4
*
(
self
.
reg_max
+
1
))
bbox_targets
=
bbox_targets
.
reshape
(
-
1
,
4
)
labels
=
labels
.
reshape
(
-
1
)
label_weights
=
label_weights
.
reshape
(
-
1
)
# FG cat_id: [0, num_classes -1], BG cat_id: num_classes
bg_class_ind
=
self
.
num_classes
pos_inds
=
((
labels
>=
0
)
&
(
labels
<
bg_class_ind
)).
nonzero
().
squeeze
(
1
)
score
=
label_weights
.
new_zeros
(
labels
.
shape
)
if
len
(
pos_inds
)
>
0
:
pos_bbox_targets
=
bbox_targets
[
pos_inds
]
pos_bbox_pred
=
bbox_pred
[
pos_inds
]
pos_anchors
=
anchors
[
pos_inds
]
pos_anchor_centers
=
self
.
anchor_center
(
pos_anchors
)
/
stride
[
0
]
weight_targets
=
cls_score
.
detach
().
sigmoid
()
weight_targets
=
weight_targets
.
max
(
dim
=
1
)[
0
][
pos_inds
]
pos_bbox_pred_corners
=
self
.
integral
(
pos_bbox_pred
)
pos_decode_bbox_pred
=
distance2bbox
(
pos_anchor_centers
,
pos_bbox_pred_corners
)
pos_decode_bbox_targets
=
pos_bbox_targets
/
stride
[
0
]
score
[
pos_inds
]
=
bbox_overlaps
(
pos_decode_bbox_pred
.
detach
(),
pos_decode_bbox_targets
,
is_aligned
=
True
)
pred_corners
=
pos_bbox_pred
.
reshape
(
-
1
,
self
.
reg_max
+
1
)
target_corners
=
bbox2distance
(
pos_anchor_centers
,
pos_decode_bbox_targets
,
self
.
reg_max
).
reshape
(
-
1
)
# regression loss
loss_bbox
=
self
.
loss_bbox
(
pos_decode_bbox_pred
,
pos_decode_bbox_targets
,
weight
=
weight_targets
,
avg_factor
=
1.0
)
# dfl loss
loss_dfl
=
self
.
loss_dfl
(
pred_corners
,
target_corners
,
weight
=
weight_targets
[:,
None
].
expand
(
-
1
,
4
).
reshape
(
-
1
),
avg_factor
=
4.0
)
else
:
loss_bbox
=
bbox_pred
.
sum
()
*
0
loss_dfl
=
bbox_pred
.
sum
()
*
0
weight_targets
=
torch
.
tensor
(
0
).
cuda
()
# cls (qfl) loss
loss_cls
=
self
.
loss_cls
(
cls_score
,
(
labels
,
score
),
weight
=
label_weights
,
avg_factor
=
num_total_samples
)
return
loss_cls
,
loss_bbox
,
loss_dfl
,
weight_targets
.
sum
()
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute losses of the head.
Args:
cls_scores (list[Tensor]): Cls and quality scores for each scale
level has shape (N, num_classes, H, W).
bbox_preds (list[Tensor]): Box distribution logits for each scale
level with shape (N, 4*(n+1), H, W), n is max value of integral
set.
gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (list[Tensor] | None): specify which bounding
boxes can be ignored when computing the loss.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
self
.
anchor_generator
.
num_levels
device
=
cls_scores
[
0
].
device
anchor_list
,
valid_flag_list
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
,
device
=
device
)
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
cls_reg_targets
=
self
.
get_targets
(
anchor_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
label_channels
)
if
cls_reg_targets
is
None
:
return
None
(
anchor_list
,
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
)
=
cls_reg_targets
num_total_samples
=
reduce_mean
(
torch
.
tensor
(
num_total_pos
,
dtype
=
torch
.
float
,
device
=
device
)).
item
()
num_total_samples
=
max
(
num_total_samples
,
1.0
)
losses_cls
,
losses_bbox
,
losses_dfl
,
\
avg_factor
=
multi_apply
(
self
.
loss_single
,
anchor_list
,
cls_scores
,
bbox_preds
,
labels_list
,
label_weights_list
,
bbox_targets_list
,
self
.
anchor_generator
.
strides
,
num_total_samples
=
num_total_samples
)
avg_factor
=
sum
(
avg_factor
)
avg_factor
=
reduce_mean
(
avg_factor
).
item
()
losses_bbox
=
list
(
map
(
lambda
x
:
x
/
avg_factor
,
losses_bbox
))
losses_dfl
=
list
(
map
(
lambda
x
:
x
/
avg_factor
,
losses_dfl
))
return
dict
(
loss_cls
=
losses_cls
,
loss_bbox
=
losses_bbox
,
loss_dfl
=
losses_dfl
)
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
,
with_nms
=
True
):
"""Transform outputs for a single batch item into labeled boxes.
Args:
cls_scores (list[Tensor]): Box scores for a single scale level
has shape (num_classes, H, W).
bbox_preds (list[Tensor]): Box distribution logits for a single
scale level with shape (4*(n+1), H, W), n is max value of
integral set.
mlvl_anchors (list[Tensor]): Box reference for a single scale level
with shape (num_total_anchors, 4).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config | None): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Default: False.
with_nms (bool): If True, do nms before return boxes.
Default: True.
Returns:
tuple(Tensor):
det_bboxes (Tensor): Bbox predictions in shape (N, 5), where
the first 4 columns are bounding box positions
(tl_x, tl_y, br_x, br_y) and the 5-th column is a score
between 0 and 1.
det_labels (Tensor): A (N,) tensor where each item is the
predicted class label of the corresponding box.
"""
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
mlvl_anchors
)
mlvl_bboxes
=
[]
mlvl_scores
=
[]
for
cls_score
,
bbox_pred
,
stride
,
anchors
in
zip
(
cls_scores
,
bbox_preds
,
self
.
anchor_generator
.
strides
,
mlvl_anchors
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
assert
stride
[
0
]
==
stride
[
1
]
scores
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
).
sigmoid
()
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
)
bbox_pred
=
self
.
integral
(
bbox_pred
)
*
stride
[
0
]
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
anchors
=
anchors
[
topk_inds
,
:]
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
bboxes
=
distance2bbox
(
self
.
anchor_center
(
anchors
),
bbox_pred
,
max_shape
=
img_shape
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
if
rescale
:
mlvl_bboxes
/=
mlvl_bboxes
.
new_tensor
(
scale_factor
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
# Add a dummy background class to the backend when using sigmoid
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
mlvl_scores
,
padding
],
dim
=
1
)
if
with_nms
:
det_bboxes
,
det_labels
=
multiclass_nms
(
mlvl_bboxes
,
mlvl_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
)
return
det_bboxes
,
det_labels
else
:
return
mlvl_bboxes
,
mlvl_scores
def
get_targets
(
self
,
anchor_list
,
valid_flag_list
,
gt_bboxes_list
,
img_metas
,
gt_bboxes_ignore_list
=
None
,
gt_labels_list
=
None
,
label_channels
=
1
,
unmap_outputs
=
True
):
"""Get targets for GFL head.
This method is almost the same as `AnchorHead.get_targets()`. Besides
returning the targets as the parent method does, it also returns the
anchors as the first element of the returned tuple.
"""
num_imgs
=
len
(
img_metas
)
assert
len
(
anchor_list
)
==
len
(
valid_flag_list
)
==
num_imgs
# anchor number of multi levels
num_level_anchors
=
[
anchors
.
size
(
0
)
for
anchors
in
anchor_list
[
0
]]
num_level_anchors_list
=
[
num_level_anchors
]
*
num_imgs
# concat all level anchors and flags to a single tensor
for
i
in
range
(
num_imgs
):
assert
len
(
anchor_list
[
i
])
==
len
(
valid_flag_list
[
i
])
anchor_list
[
i
]
=
torch
.
cat
(
anchor_list
[
i
])
valid_flag_list
[
i
]
=
torch
.
cat
(
valid_flag_list
[
i
])
# compute targets for each image
if
gt_bboxes_ignore_list
is
None
:
gt_bboxes_ignore_list
=
[
None
for
_
in
range
(
num_imgs
)]
if
gt_labels_list
is
None
:
gt_labels_list
=
[
None
for
_
in
range
(
num_imgs
)]
(
all_anchors
,
all_labels
,
all_label_weights
,
all_bbox_targets
,
all_bbox_weights
,
pos_inds_list
,
neg_inds_list
)
=
multi_apply
(
self
.
_get_target_single
,
anchor_list
,
valid_flag_list
,
num_level_anchors_list
,
gt_bboxes_list
,
gt_bboxes_ignore_list
,
gt_labels_list
,
img_metas
,
label_channels
=
label_channels
,
unmap_outputs
=
unmap_outputs
)
# no valid anchors
if
any
([
labels
is
None
for
labels
in
all_labels
]):
return
None
# sampled anchors of all images
num_total_pos
=
sum
([
max
(
inds
.
numel
(),
1
)
for
inds
in
pos_inds_list
])
num_total_neg
=
sum
([
max
(
inds
.
numel
(),
1
)
for
inds
in
neg_inds_list
])
# split targets to a list w.r.t. multiple levels
anchors_list
=
images_to_levels
(
all_anchors
,
num_level_anchors
)
labels_list
=
images_to_levels
(
all_labels
,
num_level_anchors
)
label_weights_list
=
images_to_levels
(
all_label_weights
,
num_level_anchors
)
bbox_targets_list
=
images_to_levels
(
all_bbox_targets
,
num_level_anchors
)
bbox_weights_list
=
images_to_levels
(
all_bbox_weights
,
num_level_anchors
)
return
(
anchors_list
,
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
)
def
_get_target_single
(
self
,
flat_anchors
,
valid_flags
,
num_level_anchors
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
,
img_meta
,
label_channels
=
1
,
unmap_outputs
=
True
):
"""Compute regression, classification targets for anchors in a single
image.
Args:
flat_anchors (Tensor): Multi-level anchors of the image, which are
concatenated into a single tensor of shape (num_anchors, 4)
valid_flags (Tensor): Multi level valid flags of the image,
which are concatenated into a single tensor of
shape (num_anchors,).
num_level_anchors Tensor): Number of anchors of each scale level.
gt_bboxes (Tensor): Ground truth bboxes of the image,
shape (num_gts, 4).
gt_bboxes_ignore (Tensor): Ground truth bboxes to be
ignored, shape (num_ignored_gts, 4).
gt_labels (Tensor): Ground truth labels of each box,
shape (num_gts,).
img_meta (dict): Meta info of the image.
label_channels (int): Channel of label.
unmap_outputs (bool): Whether to map outputs back to the original
set of anchors.
Returns:
tuple: N is the number of total anchors in the image.
anchors (Tensor): All anchors in the image with shape (N, 4).
labels (Tensor): Labels of all anchors in the image with shape
(N,).
label_weights (Tensor): Label weights of all anchor in the
image with shape (N,).
bbox_targets (Tensor): BBox targets of all anchors in the
image with shape (N, 4).
bbox_weights (Tensor): BBox weights of all anchors in the
image with shape (N, 4).
pos_inds (Tensor): Indices of postive anchor with shape
(num_pos,).
neg_inds (Tensor): Indices of negative anchor with shape
(num_neg,).
"""
inside_flags
=
anchor_inside_flags
(
flat_anchors
,
valid_flags
,
img_meta
[
'img_shape'
][:
2
],
self
.
train_cfg
.
allowed_border
)
if
not
inside_flags
.
any
():
return
(
None
,
)
*
7
# assign gt and sample anchors
anchors
=
flat_anchors
[
inside_flags
,
:]
num_level_anchors_inside
=
self
.
get_num_level_anchors_inside
(
num_level_anchors
,
inside_flags
)
assign_result
=
self
.
assigner
.
assign
(
anchors
,
num_level_anchors_inside
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
)
sampling_result
=
self
.
sampler
.
sample
(
assign_result
,
anchors
,
gt_bboxes
)
num_valid_anchors
=
anchors
.
shape
[
0
]
bbox_targets
=
torch
.
zeros_like
(
anchors
)
bbox_weights
=
torch
.
zeros_like
(
anchors
)
labels
=
anchors
.
new_full
((
num_valid_anchors
,
),
self
.
num_classes
,
dtype
=
torch
.
long
)
label_weights
=
anchors
.
new_zeros
(
num_valid_anchors
,
dtype
=
torch
.
float
)
pos_inds
=
sampling_result
.
pos_inds
neg_inds
=
sampling_result
.
neg_inds
if
len
(
pos_inds
)
>
0
:
pos_bbox_targets
=
sampling_result
.
pos_gt_bboxes
bbox_targets
[
pos_inds
,
:]
=
pos_bbox_targets
bbox_weights
[
pos_inds
,
:]
=
1.0
if
gt_labels
is
None
:
# Only rpn gives gt_labels as None
# Foreground is the first class
labels
[
pos_inds
]
=
0
else
:
labels
[
pos_inds
]
=
gt_labels
[
sampling_result
.
pos_assigned_gt_inds
]
if
self
.
train_cfg
.
pos_weight
<=
0
:
label_weights
[
pos_inds
]
=
1.0
else
:
label_weights
[
pos_inds
]
=
self
.
train_cfg
.
pos_weight
if
len
(
neg_inds
)
>
0
:
label_weights
[
neg_inds
]
=
1.0
# map up to original set of anchors
if
unmap_outputs
:
num_total_anchors
=
flat_anchors
.
size
(
0
)
anchors
=
unmap
(
anchors
,
num_total_anchors
,
inside_flags
)
labels
=
unmap
(
labels
,
num_total_anchors
,
inside_flags
,
fill
=
self
.
num_classes
)
label_weights
=
unmap
(
label_weights
,
num_total_anchors
,
inside_flags
)
bbox_targets
=
unmap
(
bbox_targets
,
num_total_anchors
,
inside_flags
)
bbox_weights
=
unmap
(
bbox_weights
,
num_total_anchors
,
inside_flags
)
return
(
anchors
,
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
pos_inds
,
neg_inds
)
def
get_num_level_anchors_inside
(
self
,
num_level_anchors
,
inside_flags
):
split_inside_flags
=
torch
.
split
(
inside_flags
,
num_level_anchors
)
num_level_anchors_inside
=
[
int
(
flags
.
sum
())
for
flags
in
split_inside_flags
]
return
num_level_anchors_inside
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/guided_anchor_head.py
0 → 100644
View file @
142dcf29
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
bias_init_with_prob
,
normal_init
from
mmcv.ops
import
DeformConv2d
,
MaskedConv2d
from
mmcv.runner
import
force_fp32
from
mmdet.core
import
(
anchor_inside_flags
,
build_anchor_generator
,
build_assigner
,
build_bbox_coder
,
build_sampler
,
calc_region
,
images_to_levels
,
multi_apply
,
multiclass_nms
,
unmap
)
from
..builder
import
HEADS
,
build_loss
from
.anchor_head
import
AnchorHead
class
FeatureAdaption
(
nn
.
Module
):
"""Feature Adaption Module.
Feature Adaption Module is implemented based on DCN v1.
It uses anchor shape prediction rather than feature map to
predict offsets of deform conv layer.
Args:
in_channels (int): Number of channels in the input feature map.
out_channels (int): Number of channels in the output feature map.
kernel_size (int): Deformable conv kernel size.
deform_groups (int): Deformable conv group size.
"""
def
__init__
(
self
,
in_channels
,
out_channels
,
kernel_size
=
3
,
deform_groups
=
4
):
super
(
FeatureAdaption
,
self
).
__init__
()
offset_channels
=
kernel_size
*
kernel_size
*
2
self
.
conv_offset
=
nn
.
Conv2d
(
2
,
deform_groups
*
offset_channels
,
1
,
bias
=
False
)
self
.
conv_adaption
=
DeformConv2d
(
in_channels
,
out_channels
,
kernel_size
=
kernel_size
,
padding
=
(
kernel_size
-
1
)
//
2
,
deform_groups
=
deform_groups
)
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
def
init_weights
(
self
):
normal_init
(
self
.
conv_offset
,
std
=
0.1
)
normal_init
(
self
.
conv_adaption
,
std
=
0.01
)
def
forward
(
self
,
x
,
shape
):
offset
=
self
.
conv_offset
(
shape
.
detach
())
x
=
self
.
relu
(
self
.
conv_adaption
(
x
,
offset
))
return
x
@
HEADS
.
register_module
()
class
GuidedAnchorHead
(
AnchorHead
):
"""Guided-Anchor-based head (GA-RPN, GA-RetinaNet, etc.).
This GuidedAnchorHead will predict high-quality feature guided
anchors and locations where anchors will be kept in inference.
There are mainly 3 categories of bounding-boxes.
- Sampled 9 pairs for target assignment. (approxes)
- The square boxes where the predicted anchors are based on. (squares)
- Guided anchors.
Please refer to https://arxiv.org/abs/1901.03278 for more details.
Args:
num_classes (int): Number of classes.
in_channels (int): Number of channels in the input feature map.
feat_channels (int): Number of hidden channels.
approx_anchor_generator (dict): Config dict for approx generator
square_anchor_generator (dict): Config dict for square generator
anchor_coder (dict): Config dict for anchor coder
bbox_coder (dict): Config dict for bbox coder
reg_decoded_bbox (bool): If true, the regression loss would be
applied directly on decoded bounding boxes, converting both
the predicted boxes and regression targets to absolute
coordinates format. Default False. It should be `True` when
using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head.
deform_groups: (int): Group number of DCN in
FeatureAdaption module.
loc_filter_thr (float): Threshold to filter out unconcerned regions.
loss_loc (dict): Config of location loss.
loss_shape (dict): Config of anchor shape loss.
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of bbox regression loss.
"""
def
__init__
(
self
,
num_classes
,
in_channels
,
feat_channels
=
256
,
approx_anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
octave_base_scale
=
8
,
scales_per_octave
=
3
,
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
square_anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
ratios
=
[
1.0
],
scales
=
[
8
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
anchor_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]
),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]
),
reg_decoded_bbox
=
False
,
deform_groups
=
4
,
loc_filter_thr
=
0.01
,
train_cfg
=
None
,
test_cfg
=
None
,
loss_loc
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_shape
=
dict
(
type
=
'BoundedIoULoss'
,
beta
=
0.2
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)):
# yapf: disable
super
(
AnchorHead
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
num_classes
=
num_classes
self
.
feat_channels
=
feat_channels
self
.
deform_groups
=
deform_groups
self
.
loc_filter_thr
=
loc_filter_thr
# build approx_anchor_generator and square_anchor_generator
assert
(
approx_anchor_generator
[
'octave_base_scale'
]
==
square_anchor_generator
[
'scales'
][
0
])
assert
(
approx_anchor_generator
[
'strides'
]
==
square_anchor_generator
[
'strides'
])
self
.
approx_anchor_generator
=
build_anchor_generator
(
approx_anchor_generator
)
self
.
square_anchor_generator
=
build_anchor_generator
(
square_anchor_generator
)
self
.
approxs_per_octave
=
self
.
approx_anchor_generator
\
.
num_base_anchors
[
0
]
self
.
reg_decoded_bbox
=
reg_decoded_bbox
# one anchor per location
self
.
num_anchors
=
1
self
.
use_sigmoid_cls
=
loss_cls
.
get
(
'use_sigmoid'
,
False
)
self
.
loc_focal_loss
=
loss_loc
[
'type'
]
in
[
'FocalLoss'
]
self
.
sampling
=
loss_cls
[
'type'
]
not
in
[
'FocalLoss'
]
self
.
ga_sampling
=
train_cfg
is
not
None
and
hasattr
(
train_cfg
,
'ga_sampler'
)
if
self
.
use_sigmoid_cls
:
self
.
cls_out_channels
=
self
.
num_classes
else
:
self
.
cls_out_channels
=
self
.
num_classes
+
1
# build bbox_coder
self
.
anchor_coder
=
build_bbox_coder
(
anchor_coder
)
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
# build losses
self
.
loss_loc
=
build_loss
(
loss_loc
)
self
.
loss_shape
=
build_loss
(
loss_shape
)
self
.
loss_cls
=
build_loss
(
loss_cls
)
self
.
loss_bbox
=
build_loss
(
loss_bbox
)
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
if
self
.
train_cfg
:
self
.
assigner
=
build_assigner
(
self
.
train_cfg
.
assigner
)
# use PseudoSampler when sampling is False
if
self
.
sampling
and
hasattr
(
self
.
train_cfg
,
'sampler'
):
sampler_cfg
=
self
.
train_cfg
.
sampler
else
:
sampler_cfg
=
dict
(
type
=
'PseudoSampler'
)
self
.
sampler
=
build_sampler
(
sampler_cfg
,
context
=
self
)
self
.
ga_assigner
=
build_assigner
(
self
.
train_cfg
.
ga_assigner
)
if
self
.
ga_sampling
:
ga_sampler_cfg
=
self
.
train_cfg
.
ga_sampler
else
:
ga_sampler_cfg
=
dict
(
type
=
'PseudoSampler'
)
self
.
ga_sampler
=
build_sampler
(
ga_sampler_cfg
,
context
=
self
)
self
.
fp16_enabled
=
False
self
.
_init_layers
()
def
_init_layers
(
self
):
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
conv_loc
=
nn
.
Conv2d
(
self
.
in_channels
,
1
,
1
)
self
.
conv_shape
=
nn
.
Conv2d
(
self
.
in_channels
,
self
.
num_anchors
*
2
,
1
)
self
.
feature_adaption
=
FeatureAdaption
(
self
.
in_channels
,
self
.
feat_channels
,
kernel_size
=
3
,
deform_groups
=
self
.
deform_groups
)
self
.
conv_cls
=
MaskedConv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
1
)
self
.
conv_reg
=
MaskedConv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
4
,
1
)
def
init_weights
(
self
):
normal_init
(
self
.
conv_cls
,
std
=
0.01
)
normal_init
(
self
.
conv_reg
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
conv_loc
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
conv_shape
,
std
=
0.01
)
self
.
feature_adaption
.
init_weights
()
def
forward_single
(
self
,
x
):
loc_pred
=
self
.
conv_loc
(
x
)
shape_pred
=
self
.
conv_shape
(
x
)
x
=
self
.
feature_adaption
(
x
,
shape_pred
)
# masked conv is only used during inference for speed-up
if
not
self
.
training
:
mask
=
loc_pred
.
sigmoid
()[
0
]
>=
self
.
loc_filter_thr
else
:
mask
=
None
cls_score
=
self
.
conv_cls
(
x
,
mask
)
bbox_pred
=
self
.
conv_reg
(
x
,
mask
)
return
cls_score
,
bbox_pred
,
shape_pred
,
loc_pred
def
forward
(
self
,
feats
):
return
multi_apply
(
self
.
forward_single
,
feats
)
def
get_sampled_approxs
(
self
,
featmap_sizes
,
img_metas
,
device
=
'cuda'
):
"""Get sampled approxs and inside flags according to feature map sizes.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
img_metas (list[dict]): Image meta info.
device (torch.device | str): device for returned tensors
Returns:
tuple: approxes of each image, inside flags of each image
"""
num_imgs
=
len
(
img_metas
)
# since feature map sizes of all images are the same, we only compute
# approxes for one time
multi_level_approxs
=
self
.
approx_anchor_generator
.
grid_anchors
(
featmap_sizes
,
device
=
device
)
approxs_list
=
[
multi_level_approxs
for
_
in
range
(
num_imgs
)]
# for each image, we compute inside flags of multi level approxes
inside_flag_list
=
[]
for
img_id
,
img_meta
in
enumerate
(
img_metas
):
multi_level_flags
=
[]
multi_level_approxs
=
approxs_list
[
img_id
]
# obtain valid flags for each approx first
multi_level_approx_flags
=
self
.
approx_anchor_generator
\
.
valid_flags
(
featmap_sizes
,
img_meta
[
'pad_shape'
],
device
=
device
)
for
i
,
flags
in
enumerate
(
multi_level_approx_flags
):
approxs
=
multi_level_approxs
[
i
]
inside_flags_list
=
[]
for
i
in
range
(
self
.
approxs_per_octave
):
split_valid_flags
=
flags
[
i
::
self
.
approxs_per_octave
]
split_approxs
=
approxs
[
i
::
self
.
approxs_per_octave
,
:]
inside_flags
=
anchor_inside_flags
(
split_approxs
,
split_valid_flags
,
img_meta
[
'img_shape'
][:
2
],
self
.
train_cfg
.
allowed_border
)
inside_flags_list
.
append
(
inside_flags
)
# inside_flag for a position is true if any anchor in this
# position is true
inside_flags
=
(
torch
.
stack
(
inside_flags_list
,
0
).
sum
(
dim
=
0
)
>
0
)
multi_level_flags
.
append
(
inside_flags
)
inside_flag_list
.
append
(
multi_level_flags
)
return
approxs_list
,
inside_flag_list
def
get_anchors
(
self
,
featmap_sizes
,
shape_preds
,
loc_preds
,
img_metas
,
use_loc_filter
=
False
,
device
=
'cuda'
):
"""Get squares according to feature map sizes and guided anchors.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
shape_preds (list[tensor]): Multi-level shape predictions.
loc_preds (list[tensor]): Multi-level location predictions.
img_metas (list[dict]): Image meta info.
use_loc_filter (bool): Use loc filter or not.
device (torch.device | str): device for returned tensors
Returns:
tuple: square approxs of each image, guided anchors of each image,
loc masks of each image
"""
num_imgs
=
len
(
img_metas
)
num_levels
=
len
(
featmap_sizes
)
# since feature map sizes of all images are the same, we only compute
# squares for one time
multi_level_squares
=
self
.
square_anchor_generator
.
grid_anchors
(
featmap_sizes
,
device
=
device
)
squares_list
=
[
multi_level_squares
for
_
in
range
(
num_imgs
)]
# for each image, we compute multi level guided anchors
guided_anchors_list
=
[]
loc_mask_list
=
[]
for
img_id
,
img_meta
in
enumerate
(
img_metas
):
multi_level_guided_anchors
=
[]
multi_level_loc_mask
=
[]
for
i
in
range
(
num_levels
):
squares
=
squares_list
[
img_id
][
i
]
shape_pred
=
shape_preds
[
i
][
img_id
]
loc_pred
=
loc_preds
[
i
][
img_id
]
guided_anchors
,
loc_mask
=
self
.
_get_guided_anchors_single
(
squares
,
shape_pred
,
loc_pred
,
use_loc_filter
=
use_loc_filter
)
multi_level_guided_anchors
.
append
(
guided_anchors
)
multi_level_loc_mask
.
append
(
loc_mask
)
guided_anchors_list
.
append
(
multi_level_guided_anchors
)
loc_mask_list
.
append
(
multi_level_loc_mask
)
return
squares_list
,
guided_anchors_list
,
loc_mask_list
def
_get_guided_anchors_single
(
self
,
squares
,
shape_pred
,
loc_pred
,
use_loc_filter
=
False
):
"""Get guided anchors and loc masks for a single level.
Args:
square (tensor): Squares of a single level.
shape_pred (tensor): Shape predections of a single level.
loc_pred (tensor): Loc predections of a single level.
use_loc_filter (list[tensor]): Use loc filter or not.
Returns:
tuple: guided anchors, location masks
"""
# calculate location filtering mask
loc_pred
=
loc_pred
.
sigmoid
().
detach
()
if
use_loc_filter
:
loc_mask
=
loc_pred
>=
self
.
loc_filter_thr
else
:
loc_mask
=
loc_pred
>=
0.0
mask
=
loc_mask
.
permute
(
1
,
2
,
0
).
expand
(
-
1
,
-
1
,
self
.
num_anchors
)
mask
=
mask
.
contiguous
().
view
(
-
1
)
# calculate guided anchors
squares
=
squares
[
mask
]
anchor_deltas
=
shape_pred
.
permute
(
1
,
2
,
0
).
contiguous
().
view
(
-
1
,
2
).
detach
()[
mask
]
bbox_deltas
=
anchor_deltas
.
new_full
(
squares
.
size
(),
0
)
bbox_deltas
[:,
2
:]
=
anchor_deltas
guided_anchors
=
self
.
anchor_coder
.
decode
(
squares
,
bbox_deltas
,
wh_ratio_clip
=
1e-6
)
return
guided_anchors
,
mask
def
ga_loc_targets
(
self
,
gt_bboxes_list
,
featmap_sizes
):
"""Compute location targets for guided anchoring.
Each feature map is divided into positive, negative and ignore regions.
- positive regions: target 1, weight 1
- ignore regions: target 0, weight 0
- negative regions: target 0, weight 0.1
Args:
gt_bboxes_list (list[Tensor]): Gt bboxes of each image.
featmap_sizes (list[tuple]): Multi level sizes of each feature
maps.
Returns:
tuple
"""
anchor_scale
=
self
.
approx_anchor_generator
.
octave_base_scale
anchor_strides
=
self
.
approx_anchor_generator
.
strides
# Currently only supports same stride in x and y direction.
for
stride
in
anchor_strides
:
assert
(
stride
[
0
]
==
stride
[
1
])
anchor_strides
=
[
stride
[
0
]
for
stride
in
anchor_strides
]
center_ratio
=
self
.
train_cfg
.
center_ratio
ignore_ratio
=
self
.
train_cfg
.
ignore_ratio
img_per_gpu
=
len
(
gt_bboxes_list
)
num_lvls
=
len
(
featmap_sizes
)
r1
=
(
1
-
center_ratio
)
/
2
r2
=
(
1
-
ignore_ratio
)
/
2
all_loc_targets
=
[]
all_loc_weights
=
[]
all_ignore_map
=
[]
for
lvl_id
in
range
(
num_lvls
):
h
,
w
=
featmap_sizes
[
lvl_id
]
loc_targets
=
torch
.
zeros
(
img_per_gpu
,
1
,
h
,
w
,
device
=
gt_bboxes_list
[
0
].
device
,
dtype
=
torch
.
float32
)
loc_weights
=
torch
.
full_like
(
loc_targets
,
-
1
)
ignore_map
=
torch
.
zeros_like
(
loc_targets
)
all_loc_targets
.
append
(
loc_targets
)
all_loc_weights
.
append
(
loc_weights
)
all_ignore_map
.
append
(
ignore_map
)
for
img_id
in
range
(
img_per_gpu
):
gt_bboxes
=
gt_bboxes_list
[
img_id
]
scale
=
torch
.
sqrt
((
gt_bboxes
[:,
2
]
-
gt_bboxes
[:,
0
])
*
(
gt_bboxes
[:,
3
]
-
gt_bboxes
[:,
1
]))
min_anchor_size
=
scale
.
new_full
(
(
1
,
),
float
(
anchor_scale
*
anchor_strides
[
0
]))
# assign gt bboxes to different feature levels w.r.t. their scales
target_lvls
=
torch
.
floor
(
torch
.
log2
(
scale
)
-
torch
.
log2
(
min_anchor_size
)
+
0.5
)
target_lvls
=
target_lvls
.
clamp
(
min
=
0
,
max
=
num_lvls
-
1
).
long
()
for
gt_id
in
range
(
gt_bboxes
.
size
(
0
)):
lvl
=
target_lvls
[
gt_id
].
item
()
# rescaled to corresponding feature map
gt_
=
gt_bboxes
[
gt_id
,
:
4
]
/
anchor_strides
[
lvl
]
# calculate ignore regions
ignore_x1
,
ignore_y1
,
ignore_x2
,
ignore_y2
=
calc_region
(
gt_
,
r2
,
featmap_sizes
[
lvl
])
# calculate positive (center) regions
ctr_x1
,
ctr_y1
,
ctr_x2
,
ctr_y2
=
calc_region
(
gt_
,
r1
,
featmap_sizes
[
lvl
])
all_loc_targets
[
lvl
][
img_id
,
0
,
ctr_y1
:
ctr_y2
+
1
,
ctr_x1
:
ctr_x2
+
1
]
=
1
all_loc_weights
[
lvl
][
img_id
,
0
,
ignore_y1
:
ignore_y2
+
1
,
ignore_x1
:
ignore_x2
+
1
]
=
0
all_loc_weights
[
lvl
][
img_id
,
0
,
ctr_y1
:
ctr_y2
+
1
,
ctr_x1
:
ctr_x2
+
1
]
=
1
# calculate ignore map on nearby low level feature
if
lvl
>
0
:
d_lvl
=
lvl
-
1
# rescaled to corresponding feature map
gt_
=
gt_bboxes
[
gt_id
,
:
4
]
/
anchor_strides
[
d_lvl
]
ignore_x1
,
ignore_y1
,
ignore_x2
,
ignore_y2
=
calc_region
(
gt_
,
r2
,
featmap_sizes
[
d_lvl
])
all_ignore_map
[
d_lvl
][
img_id
,
0
,
ignore_y1
:
ignore_y2
+
1
,
ignore_x1
:
ignore_x2
+
1
]
=
1
# calculate ignore map on nearby high level feature
if
lvl
<
num_lvls
-
1
:
u_lvl
=
lvl
+
1
# rescaled to corresponding feature map
gt_
=
gt_bboxes
[
gt_id
,
:
4
]
/
anchor_strides
[
u_lvl
]
ignore_x1
,
ignore_y1
,
ignore_x2
,
ignore_y2
=
calc_region
(
gt_
,
r2
,
featmap_sizes
[
u_lvl
])
all_ignore_map
[
u_lvl
][
img_id
,
0
,
ignore_y1
:
ignore_y2
+
1
,
ignore_x1
:
ignore_x2
+
1
]
=
1
for
lvl_id
in
range
(
num_lvls
):
# ignore negative regions w.r.t. ignore map
all_loc_weights
[
lvl_id
][(
all_loc_weights
[
lvl_id
]
<
0
)
&
(
all_ignore_map
[
lvl_id
]
>
0
)]
=
0
# set negative regions with weight 0.1
all_loc_weights
[
lvl_id
][
all_loc_weights
[
lvl_id
]
<
0
]
=
0.1
# loc average factor to balance loss
loc_avg_factor
=
sum
(
[
t
.
size
(
0
)
*
t
.
size
(
-
1
)
*
t
.
size
(
-
2
)
for
t
in
all_loc_targets
])
/
200
return
all_loc_targets
,
all_loc_weights
,
loc_avg_factor
def
_ga_shape_target_single
(
self
,
flat_approxs
,
inside_flags
,
flat_squares
,
gt_bboxes
,
gt_bboxes_ignore
,
img_meta
,
unmap_outputs
=
True
):
"""Compute guided anchoring targets.
This function returns sampled anchors and gt bboxes directly
rather than calculates regression targets.
Args:
flat_approxs (Tensor): flat approxs of a single image,
shape (n, 4)
inside_flags (Tensor): inside flags of a single image,
shape (n, ).
flat_squares (Tensor): flat squares of a single image,
shape (approxs_per_octave * n, 4)
gt_bboxes (Tensor): Ground truth bboxes of a single image.
img_meta (dict): Meta info of a single image.
approxs_per_octave (int): number of approxs per octave
cfg (dict): RPN train configs.
unmap_outputs (bool): unmap outputs or not.
Returns:
tuple
"""
if
not
inside_flags
.
any
():
return
(
None
,
)
*
5
# assign gt and sample anchors
expand_inside_flags
=
inside_flags
[:,
None
].
expand
(
-
1
,
self
.
approxs_per_octave
).
reshape
(
-
1
)
approxs
=
flat_approxs
[
expand_inside_flags
,
:]
squares
=
flat_squares
[
inside_flags
,
:]
assign_result
=
self
.
ga_assigner
.
assign
(
approxs
,
squares
,
self
.
approxs_per_octave
,
gt_bboxes
,
gt_bboxes_ignore
)
sampling_result
=
self
.
ga_sampler
.
sample
(
assign_result
,
squares
,
gt_bboxes
)
bbox_anchors
=
torch
.
zeros_like
(
squares
)
bbox_gts
=
torch
.
zeros_like
(
squares
)
bbox_weights
=
torch
.
zeros_like
(
squares
)
pos_inds
=
sampling_result
.
pos_inds
neg_inds
=
sampling_result
.
neg_inds
if
len
(
pos_inds
)
>
0
:
bbox_anchors
[
pos_inds
,
:]
=
sampling_result
.
pos_bboxes
bbox_gts
[
pos_inds
,
:]
=
sampling_result
.
pos_gt_bboxes
bbox_weights
[
pos_inds
,
:]
=
1.0
# map up to original set of anchors
if
unmap_outputs
:
num_total_anchors
=
flat_squares
.
size
(
0
)
bbox_anchors
=
unmap
(
bbox_anchors
,
num_total_anchors
,
inside_flags
)
bbox_gts
=
unmap
(
bbox_gts
,
num_total_anchors
,
inside_flags
)
bbox_weights
=
unmap
(
bbox_weights
,
num_total_anchors
,
inside_flags
)
return
(
bbox_anchors
,
bbox_gts
,
bbox_weights
,
pos_inds
,
neg_inds
)
def
ga_shape_targets
(
self
,
approx_list
,
inside_flag_list
,
square_list
,
gt_bboxes_list
,
img_metas
,
gt_bboxes_ignore_list
=
None
,
unmap_outputs
=
True
):
"""Compute guided anchoring targets.
Args:
approx_list (list[list]): Multi level approxs of each image.
inside_flag_list (list[list]): Multi level inside flags of each
image.
square_list (list[list]): Multi level squares of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes.
unmap_outputs (bool): unmap outputs or not.
Returns:
tuple
"""
num_imgs
=
len
(
img_metas
)
assert
len
(
approx_list
)
==
len
(
inside_flag_list
)
==
len
(
square_list
)
==
num_imgs
# anchor number of multi levels
num_level_squares
=
[
squares
.
size
(
0
)
for
squares
in
square_list
[
0
]]
# concat all level anchors and flags to a single tensor
inside_flag_flat_list
=
[]
approx_flat_list
=
[]
square_flat_list
=
[]
for
i
in
range
(
num_imgs
):
assert
len
(
square_list
[
i
])
==
len
(
inside_flag_list
[
i
])
inside_flag_flat_list
.
append
(
torch
.
cat
(
inside_flag_list
[
i
]))
approx_flat_list
.
append
(
torch
.
cat
(
approx_list
[
i
]))
square_flat_list
.
append
(
torch
.
cat
(
square_list
[
i
]))
# compute targets for each image
if
gt_bboxes_ignore_list
is
None
:
gt_bboxes_ignore_list
=
[
None
for
_
in
range
(
num_imgs
)]
(
all_bbox_anchors
,
all_bbox_gts
,
all_bbox_weights
,
pos_inds_list
,
neg_inds_list
)
=
multi_apply
(
self
.
_ga_shape_target_single
,
approx_flat_list
,
inside_flag_flat_list
,
square_flat_list
,
gt_bboxes_list
,
gt_bboxes_ignore_list
,
img_metas
,
unmap_outputs
=
unmap_outputs
)
# no valid anchors
if
any
([
bbox_anchors
is
None
for
bbox_anchors
in
all_bbox_anchors
]):
return
None
# sampled anchors of all images
num_total_pos
=
sum
([
max
(
inds
.
numel
(),
1
)
for
inds
in
pos_inds_list
])
num_total_neg
=
sum
([
max
(
inds
.
numel
(),
1
)
for
inds
in
neg_inds_list
])
# split targets to a list w.r.t. multiple levels
bbox_anchors_list
=
images_to_levels
(
all_bbox_anchors
,
num_level_squares
)
bbox_gts_list
=
images_to_levels
(
all_bbox_gts
,
num_level_squares
)
bbox_weights_list
=
images_to_levels
(
all_bbox_weights
,
num_level_squares
)
return
(
bbox_anchors_list
,
bbox_gts_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
)
def
loss_shape_single
(
self
,
shape_pred
,
bbox_anchors
,
bbox_gts
,
anchor_weights
,
anchor_total_num
):
shape_pred
=
shape_pred
.
permute
(
0
,
2
,
3
,
1
).
contiguous
().
view
(
-
1
,
2
)
bbox_anchors
=
bbox_anchors
.
contiguous
().
view
(
-
1
,
4
)
bbox_gts
=
bbox_gts
.
contiguous
().
view
(
-
1
,
4
)
anchor_weights
=
anchor_weights
.
contiguous
().
view
(
-
1
,
4
)
bbox_deltas
=
bbox_anchors
.
new_full
(
bbox_anchors
.
size
(),
0
)
bbox_deltas
[:,
2
:]
+=
shape_pred
# filter out negative samples to speed-up weighted_bounded_iou_loss
inds
=
torch
.
nonzero
(
anchor_weights
[:,
0
]
>
0
,
as_tuple
=
False
).
squeeze
(
1
)
bbox_deltas_
=
bbox_deltas
[
inds
]
bbox_anchors_
=
bbox_anchors
[
inds
]
bbox_gts_
=
bbox_gts
[
inds
]
anchor_weights_
=
anchor_weights
[
inds
]
pred_anchors_
=
self
.
anchor_coder
.
decode
(
bbox_anchors_
,
bbox_deltas_
,
wh_ratio_clip
=
1e-6
)
loss_shape
=
self
.
loss_shape
(
pred_anchors_
,
bbox_gts_
,
anchor_weights_
,
avg_factor
=
anchor_total_num
)
return
loss_shape
def
loss_loc_single
(
self
,
loc_pred
,
loc_target
,
loc_weight
,
loc_avg_factor
):
loss_loc
=
self
.
loss_loc
(
loc_pred
.
reshape
(
-
1
,
1
),
loc_target
.
reshape
(
-
1
).
long
(),
loc_weight
.
reshape
(
-
1
),
avg_factor
=
loc_avg_factor
)
return
loss_loc
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'shape_preds'
,
'loc_preds'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
shape_preds
,
loc_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
self
.
approx_anchor_generator
.
num_levels
device
=
cls_scores
[
0
].
device
# get loc targets
loc_targets
,
loc_weights
,
loc_avg_factor
=
self
.
ga_loc_targets
(
gt_bboxes
,
featmap_sizes
)
# get sampled approxes
approxs_list
,
inside_flag_list
=
self
.
get_sampled_approxs
(
featmap_sizes
,
img_metas
,
device
=
device
)
# get squares and guided anchors
squares_list
,
guided_anchors_list
,
_
=
self
.
get_anchors
(
featmap_sizes
,
shape_preds
,
loc_preds
,
img_metas
,
device
=
device
)
# get shape targets
shape_targets
=
self
.
ga_shape_targets
(
approxs_list
,
inside_flag_list
,
squares_list
,
gt_bboxes
,
img_metas
)
if
shape_targets
is
None
:
return
None
(
bbox_anchors_list
,
bbox_gts_list
,
anchor_weights_list
,
anchor_fg_num
,
anchor_bg_num
)
=
shape_targets
anchor_total_num
=
(
anchor_fg_num
if
not
self
.
ga_sampling
else
anchor_fg_num
+
anchor_bg_num
)
# get anchor targets
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
cls_reg_targets
=
self
.
get_targets
(
guided_anchors_list
,
inside_flag_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
label_channels
)
if
cls_reg_targets
is
None
:
return
None
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
)
=
cls_reg_targets
num_total_samples
=
(
num_total_pos
+
num_total_neg
if
self
.
sampling
else
num_total_pos
)
# anchor number of multi levels
num_level_anchors
=
[
anchors
.
size
(
0
)
for
anchors
in
guided_anchors_list
[
0
]
]
# concat all level anchors to a single tensor
concat_anchor_list
=
[]
for
i
in
range
(
len
(
guided_anchors_list
)):
concat_anchor_list
.
append
(
torch
.
cat
(
guided_anchors_list
[
i
]))
all_anchor_list
=
images_to_levels
(
concat_anchor_list
,
num_level_anchors
)
# get classification and bbox regression losses
losses_cls
,
losses_bbox
=
multi_apply
(
self
.
loss_single
,
cls_scores
,
bbox_preds
,
all_anchor_list
,
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_samples
=
num_total_samples
)
# get anchor location loss
losses_loc
=
[]
for
i
in
range
(
len
(
loc_preds
)):
loss_loc
=
self
.
loss_loc_single
(
loc_preds
[
i
],
loc_targets
[
i
],
loc_weights
[
i
],
loc_avg_factor
=
loc_avg_factor
)
losses_loc
.
append
(
loss_loc
)
# get anchor shape loss
losses_shape
=
[]
for
i
in
range
(
len
(
shape_preds
)):
loss_shape
=
self
.
loss_shape_single
(
shape_preds
[
i
],
bbox_anchors_list
[
i
],
bbox_gts_list
[
i
],
anchor_weights_list
[
i
],
anchor_total_num
=
anchor_total_num
)
losses_shape
.
append
(
loss_shape
)
return
dict
(
loss_cls
=
losses_cls
,
loss_bbox
=
losses_bbox
,
loss_shape
=
losses_shape
,
loss_loc
=
losses_loc
)
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'shape_preds'
,
'loc_preds'
))
def
get_bboxes
(
self
,
cls_scores
,
bbox_preds
,
shape_preds
,
loc_preds
,
img_metas
,
cfg
=
None
,
rescale
=
False
):
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
shape_preds
)
==
len
(
loc_preds
)
num_levels
=
len
(
cls_scores
)
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
device
=
cls_scores
[
0
].
device
# get guided anchors
_
,
guided_anchors
,
loc_masks
=
self
.
get_anchors
(
featmap_sizes
,
shape_preds
,
loc_preds
,
img_metas
,
use_loc_filter
=
not
self
.
training
,
device
=
device
)
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
cls_score_list
=
[
cls_scores
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
bbox_pred_list
=
[
bbox_preds
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
guided_anchor_list
=
[
guided_anchors
[
img_id
][
i
].
detach
()
for
i
in
range
(
num_levels
)
]
loc_mask_list
=
[
loc_masks
[
img_id
][
i
].
detach
()
for
i
in
range
(
num_levels
)
]
img_shape
=
img_metas
[
img_id
][
'img_shape'
]
scale_factor
=
img_metas
[
img_id
][
'scale_factor'
]
proposals
=
self
.
_get_bboxes_single
(
cls_score_list
,
bbox_pred_list
,
guided_anchor_list
,
loc_mask_list
,
img_shape
,
scale_factor
,
cfg
,
rescale
)
result_list
.
append
(
proposals
)
return
result_list
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_anchors
,
mlvl_masks
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
):
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
mlvl_anchors
)
mlvl_bboxes
=
[]
mlvl_scores
=
[]
for
cls_score
,
bbox_pred
,
anchors
,
mask
in
zip
(
cls_scores
,
bbox_preds
,
mlvl_anchors
,
mlvl_masks
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
# if no location is kept, end.
if
mask
.
sum
()
==
0
:
continue
# reshape scores and bbox_pred
cls_score
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
)
if
self
.
use_sigmoid_cls
:
scores
=
cls_score
.
sigmoid
()
else
:
scores
=
cls_score
.
softmax
(
-
1
)
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)
# filter scores, bbox_pred w.r.t. mask.
# anchors are filtered in get_anchors() beforehand.
scores
=
scores
[
mask
,
:]
bbox_pred
=
bbox_pred
[
mask
,
:]
if
scores
.
dim
()
==
0
:
anchors
=
anchors
.
unsqueeze
(
0
)
scores
=
scores
.
unsqueeze
(
0
)
bbox_pred
=
bbox_pred
.
unsqueeze
(
0
)
# filter anchors, bbox_pred, scores w.r.t. scores
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
if
self
.
use_sigmoid_cls
:
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
else
:
# remind that we set FG labels to [0, num_class-1]
# since mmdet v2.0
# BG cat_id: num_class
max_scores
,
_
=
scores
[:,
:
-
1
].
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
anchors
=
anchors
[
topk_inds
,
:]
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
bboxes
=
self
.
bbox_coder
.
decode
(
anchors
,
bbox_pred
,
max_shape
=
img_shape
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
if
rescale
:
mlvl_bboxes
/=
mlvl_bboxes
.
new_tensor
(
scale_factor
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
if
self
.
use_sigmoid_cls
:
# Add a dummy background class to the backend when using sigmoid
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
mlvl_scores
,
padding
],
dim
=
1
)
# multi class NMS
det_bboxes
,
det_labels
=
multiclass_nms
(
mlvl_bboxes
,
mlvl_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
)
return
det_bboxes
,
det_labels
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/nasfcos_head.py
0 → 100644
View file @
142dcf29
import
copy
import
torch.nn
as
nn
from
mmcv.cnn
import
(
ConvModule
,
Scale
,
bias_init_with_prob
,
caffe2_xavier_init
,
normal_init
)
from
mmdet.models.dense_heads.fcos_head
import
FCOSHead
from
..builder
import
HEADS
@
HEADS
.
register_module
()
class
NASFCOSHead
(
FCOSHead
):
"""Anchor-free head used in `NASFCOS <https://arxiv.org/abs/1906.04423>`_.
It is quite similar with FCOS head, except for the searched structure of
classification branch and bbox regression branch, where a structure of
"dconv3x3, conv3x3, dconv3x3, conv1x1" is utilized instead.
"""
def
_init_layers
(
self
):
"""Initialize layers of the head."""
dconv3x3_config
=
dict
(
type
=
'DCNv2'
,
kernel_size
=
3
,
use_bias
=
True
,
deform_groups
=
2
,
padding
=
1
)
conv3x3_config
=
dict
(
type
=
'Conv'
,
kernel_size
=
3
,
padding
=
1
)
conv1x1_config
=
dict
(
type
=
'Conv'
,
kernel_size
=
1
)
self
.
arch_config
=
[
dconv3x3_config
,
conv3x3_config
,
dconv3x3_config
,
conv1x1_config
]
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
,
op_
in
enumerate
(
self
.
arch_config
):
op
=
copy
.
deepcopy
(
op_
)
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
assert
isinstance
(
op
,
dict
)
use_bias
=
op
.
pop
(
'use_bias'
,
False
)
padding
=
op
.
pop
(
'padding'
,
0
)
kernel_size
=
op
.
pop
(
'kernel_size'
)
module
=
ConvModule
(
chn
,
self
.
feat_channels
,
kernel_size
,
stride
=
1
,
padding
=
padding
,
norm_cfg
=
self
.
norm_cfg
,
bias
=
use_bias
,
conv_cfg
=
op
)
self
.
cls_convs
.
append
(
copy
.
deepcopy
(
module
))
self
.
reg_convs
.
append
(
copy
.
deepcopy
(
module
))
self
.
conv_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
cls_out_channels
,
3
,
padding
=
1
)
self
.
conv_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
4
,
3
,
padding
=
1
)
self
.
conv_centerness
=
nn
.
Conv2d
(
self
.
feat_channels
,
1
,
3
,
padding
=
1
)
self
.
scales
=
nn
.
ModuleList
([
Scale
(
1.0
)
for
_
in
self
.
strides
])
def
init_weights
(
self
):
"""Initialize weights of the head."""
# retinanet_bias_init
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
conv_reg
,
std
=
0.01
)
normal_init
(
self
.
conv_centerness
,
std
=
0.01
)
normal_init
(
self
.
conv_cls
,
std
=
0.01
,
bias
=
bias_cls
)
for
branch
in
[
self
.
cls_convs
,
self
.
reg_convs
]:
for
module
in
branch
.
modules
():
if
isinstance
(
module
,
ConvModule
)
\
and
isinstance
(
module
.
conv
,
nn
.
Conv2d
):
caffe2_xavier_init
(
module
.
conv
)
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/paa_head.py
0 → 100644
View file @
142dcf29
import
numpy
as
np
import
torch
from
mmcv.runner
import
force_fp32
from
mmdet.core
import
multi_apply
,
multiclass_nms
from
mmdet.core.bbox.iou_calculators
import
bbox_overlaps
from
mmdet.models
import
HEADS
from
mmdet.models.dense_heads
import
ATSSHead
EPS
=
1e-12
try
:
import
sklearn.mixture
as
skm
except
ImportError
:
skm
=
None
def
levels_to_images
(
mlvl_tensor
):
"""Concat multi-level feature maps by image.
[feature_level0, feature_level1...] -> [feature_image0, feature_image1...]
Convert the shape of each element in mlvl_tensor from (N, C, H, W) to
(N, H*W , C), then split the element to N elements with shape (H*W, C), and
concat elements in same image of all level along first dimension.
Args:
mlvl_tensor (list[torch.Tensor]): list of Tensor which collect from
corresponding level. Each element is of shape (N, C, H, W)
Returns:
list[torch.Tensor]: A list that contains N tensors and each tensor is
of shape (num_elements, C)
"""
batch_size
=
mlvl_tensor
[
0
].
size
(
0
)
batch_list
=
[[]
for
_
in
range
(
batch_size
)]
channels
=
mlvl_tensor
[
0
].
size
(
1
)
for
t
in
mlvl_tensor
:
t
=
t
.
permute
(
0
,
2
,
3
,
1
)
t
=
t
.
view
(
batch_size
,
-
1
,
channels
).
contiguous
()
for
img
in
range
(
batch_size
):
batch_list
[
img
].
append
(
t
[
img
])
return
[
torch
.
cat
(
item
,
0
)
for
item
in
batch_list
]
@
HEADS
.
register_module
()
class
PAAHead
(
ATSSHead
):
"""Head of PAAAssignment: Probabilistic Anchor Assignment with IoU
Prediction for Object Detection.
Code is modified from the `official github repo
<https://github.com/kkhoot/PAA/blob/master/paa_core
/modeling/rpn/paa/loss.py>`_.
More details can be found in the `paper
<https://arxiv.org/abs/2007.08103>`_ .
Args:
topk (int): Select topk samples with smallest loss in
each level.
score_voting (bool): Whether to use score voting in post-process.
covariance_type : String describing the type of covariance parameters
to be used in :class:`sklearn.mixture.GaussianMixture`.
It must be one of:
- 'full': each component has its own general covariance matrix
- 'tied': all components share the same general covariance matrix
- 'diag': each component has its own diagonal covariance matrix
- 'spherical': each component has its own single variance
Default: 'diag'. From 'full' to 'spherical', the gmm fitting
process is faster yet the performance could be influenced. For most
cases, 'diag' should be a good choice.
"""
def
__init__
(
self
,
*
args
,
topk
=
9
,
score_voting
=
True
,
covariance_type
=
'diag'
,
**
kwargs
):
# topk used in paa reassign process
self
.
topk
=
topk
self
.
with_score_voting
=
score_voting
self
.
covariance_type
=
covariance_type
super
(
PAAHead
,
self
).
__init__
(
*
args
,
**
kwargs
)
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'iou_preds'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
iou_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute losses of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_anchors * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_anchors * 4, H, W)
iou_preds (list[Tensor]): iou_preds for each scale
level with shape (N, num_anchors * 1, H, W)
gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (list[Tensor] | None): Specify which bounding
boxes can be ignored when are computing the loss.
Returns:
dict[str, Tensor]: A dictionary of loss gmm_assignment.
"""
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
self
.
anchor_generator
.
num_levels
device
=
cls_scores
[
0
].
device
anchor_list
,
valid_flag_list
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
,
device
=
device
)
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
cls_reg_targets
=
self
.
get_targets
(
anchor_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
label_channels
,
)
(
labels
,
labels_weight
,
bboxes_target
,
bboxes_weight
,
pos_inds
,
pos_gt_index
)
=
cls_reg_targets
cls_scores
=
levels_to_images
(
cls_scores
)
cls_scores
=
[
item
.
reshape
(
-
1
,
self
.
cls_out_channels
)
for
item
in
cls_scores
]
bbox_preds
=
levels_to_images
(
bbox_preds
)
bbox_preds
=
[
item
.
reshape
(
-
1
,
4
)
for
item
in
bbox_preds
]
iou_preds
=
levels_to_images
(
iou_preds
)
iou_preds
=
[
item
.
reshape
(
-
1
,
1
)
for
item
in
iou_preds
]
pos_losses_list
,
=
multi_apply
(
self
.
get_pos_loss
,
anchor_list
,
cls_scores
,
bbox_preds
,
labels
,
labels_weight
,
bboxes_target
,
bboxes_weight
,
pos_inds
)
with
torch
.
no_grad
():
reassign_labels
,
reassign_label_weight
,
\
reassign_bbox_weights
,
num_pos
=
multi_apply
(
self
.
paa_reassign
,
pos_losses_list
,
labels
,
labels_weight
,
bboxes_weight
,
pos_inds
,
pos_gt_index
,
anchor_list
)
num_pos
=
sum
(
num_pos
)
# convert all tensor list to a flatten tensor
cls_scores
=
torch
.
cat
(
cls_scores
,
0
).
view
(
-
1
,
cls_scores
[
0
].
size
(
-
1
))
bbox_preds
=
torch
.
cat
(
bbox_preds
,
0
).
view
(
-
1
,
bbox_preds
[
0
].
size
(
-
1
))
iou_preds
=
torch
.
cat
(
iou_preds
,
0
).
view
(
-
1
,
iou_preds
[
0
].
size
(
-
1
))
labels
=
torch
.
cat
(
reassign_labels
,
0
).
view
(
-
1
)
flatten_anchors
=
torch
.
cat
(
[
torch
.
cat
(
item
,
0
)
for
item
in
anchor_list
])
labels_weight
=
torch
.
cat
(
reassign_label_weight
,
0
).
view
(
-
1
)
bboxes_target
=
torch
.
cat
(
bboxes_target
,
0
).
view
(
-
1
,
bboxes_target
[
0
].
size
(
-
1
))
pos_inds_flatten
=
((
labels
>=
0
)
&
(
labels
<
self
.
num_classes
)).
nonzero
().
reshape
(
-
1
)
losses_cls
=
self
.
loss_cls
(
cls_scores
,
labels
,
labels_weight
,
avg_factor
=
max
(
num_pos
,
len
(
img_metas
)))
# avoid num_pos=0
if
num_pos
:
pos_bbox_pred
=
self
.
bbox_coder
.
decode
(
flatten_anchors
[
pos_inds_flatten
],
bbox_preds
[
pos_inds_flatten
])
pos_bbox_target
=
bboxes_target
[
pos_inds_flatten
]
iou_target
=
bbox_overlaps
(
pos_bbox_pred
.
detach
(),
pos_bbox_target
,
is_aligned
=
True
)
losses_iou
=
self
.
loss_centerness
(
iou_preds
[
pos_inds_flatten
],
iou_target
.
unsqueeze
(
-
1
),
avg_factor
=
num_pos
)
losses_bbox
=
self
.
loss_bbox
(
pos_bbox_pred
,
pos_bbox_target
,
iou_target
.
clamp
(
min
=
EPS
),
avg_factor
=
iou_target
.
sum
())
else
:
losses_iou
=
iou_preds
.
sum
()
*
0
losses_bbox
=
bbox_preds
.
sum
()
*
0
return
dict
(
loss_cls
=
losses_cls
,
loss_bbox
=
losses_bbox
,
loss_iou
=
losses_iou
)
def
get_pos_loss
(
self
,
anchors
,
cls_score
,
bbox_pred
,
label
,
label_weight
,
bbox_target
,
bbox_weight
,
pos_inds
):
"""Calculate loss of all potential positive samples obtained from first
match process.
Args:
anchors (list[Tensor]): Anchors of each scale.
cls_score (Tensor): Box scores of single image with shape
(num_anchors, num_classes)
bbox_pred (Tensor): Box energies / deltas of single image
with shape (num_anchors, 4)
label (Tensor): classification target of each anchor with
shape (num_anchors,)
label_weight (Tensor): Classification loss weight of each
anchor with shape (num_anchors).
bbox_target (dict): Regression target of each anchor with
shape (num_anchors, 4).
bbox_weight (Tensor): Bbox weight of each anchor with shape
(num_anchors, 4).
pos_inds (Tensor): Index of all positive samples got from
first assign process.
Returns:
Tensor: Losses of all positive samples in single image.
"""
if
not
len
(
pos_inds
):
return
cls_score
.
new
([]),
anchors_all_level
=
torch
.
cat
(
anchors
,
0
)
pos_scores
=
cls_score
[
pos_inds
]
pos_bbox_pred
=
bbox_pred
[
pos_inds
]
pos_label
=
label
[
pos_inds
]
pos_label_weight
=
label_weight
[
pos_inds
]
pos_bbox_target
=
bbox_target
[
pos_inds
]
pos_bbox_weight
=
bbox_weight
[
pos_inds
]
pos_anchors
=
anchors_all_level
[
pos_inds
]
pos_bbox_pred
=
self
.
bbox_coder
.
decode
(
pos_anchors
,
pos_bbox_pred
)
# to keep loss dimension
loss_cls
=
self
.
loss_cls
(
pos_scores
,
pos_label
,
pos_label_weight
,
avg_factor
=
self
.
loss_cls
.
loss_weight
,
reduction_override
=
'none'
)
loss_bbox
=
self
.
loss_bbox
(
pos_bbox_pred
,
pos_bbox_target
,
pos_bbox_weight
,
avg_factor
=
self
.
loss_cls
.
loss_weight
,
reduction_override
=
'none'
)
loss_cls
=
loss_cls
.
sum
(
-
1
)
pos_loss
=
loss_bbox
+
loss_cls
return
pos_loss
,
def
paa_reassign
(
self
,
pos_losses
,
label
,
label_weight
,
bbox_weight
,
pos_inds
,
pos_gt_inds
,
anchors
):
"""Fit loss to GMM distribution and separate positive, ignore, negative
samples again with GMM model.
Args:
pos_losses (Tensor): Losses of all positive samples in
single image.
label (Tensor): classification target of each anchor with
shape (num_anchors,)
label_weight (Tensor): Classification loss weight of each
anchor with shape (num_anchors).
bbox_weight (Tensor): Bbox weight of each anchor with shape
(num_anchors, 4).
pos_inds (Tensor): Index of all positive samples got from
first assign process.
pos_gt_inds (Tensor): Gt_index of all positive samples got
from first assign process.
anchors (list[Tensor]): Anchors of each scale.
Returns:
tuple: Usually returns a tuple containing learning targets.
- label (Tensor): classification target of each anchor after
paa assign, with shape (num_anchors,)
- label_weight (Tensor): Classification loss weight of each
anchor after paa assign, with shape (num_anchors).
- bbox_weight (Tensor): Bbox weight of each anchor with shape
(num_anchors, 4).
- num_pos (int): The number of positive samples after paa
assign.
"""
if
not
len
(
pos_inds
):
return
label
,
label_weight
,
bbox_weight
,
0
label
=
label
.
clone
()
label_weight
=
label_weight
.
clone
()
bbox_weight
=
bbox_weight
.
clone
()
num_gt
=
pos_gt_inds
.
max
()
+
1
num_level
=
len
(
anchors
)
num_anchors_each_level
=
[
item
.
size
(
0
)
for
item
in
anchors
]
num_anchors_each_level
.
insert
(
0
,
0
)
inds_level_interval
=
np
.
cumsum
(
num_anchors_each_level
)
pos_level_mask
=
[]
for
i
in
range
(
num_level
):
mask
=
(
pos_inds
>=
inds_level_interval
[
i
])
&
(
pos_inds
<
inds_level_interval
[
i
+
1
])
pos_level_mask
.
append
(
mask
)
pos_inds_after_paa
=
[
label
.
new_tensor
([])]
ignore_inds_after_paa
=
[
label
.
new_tensor
([])]
for
gt_ind
in
range
(
num_gt
):
pos_inds_gmm
=
[]
pos_loss_gmm
=
[]
gt_mask
=
pos_gt_inds
==
gt_ind
for
level
in
range
(
num_level
):
level_mask
=
pos_level_mask
[
level
]
level_gt_mask
=
level_mask
&
gt_mask
value
,
topk_inds
=
pos_losses
[
level_gt_mask
].
topk
(
min
(
level_gt_mask
.
sum
(),
self
.
topk
),
largest
=
False
)
pos_inds_gmm
.
append
(
pos_inds
[
level_gt_mask
][
topk_inds
])
pos_loss_gmm
.
append
(
value
)
pos_inds_gmm
=
torch
.
cat
(
pos_inds_gmm
)
pos_loss_gmm
=
torch
.
cat
(
pos_loss_gmm
)
# fix gmm need at least two sample
if
len
(
pos_inds_gmm
)
<
2
:
continue
device
=
pos_inds_gmm
.
device
pos_loss_gmm
,
sort_inds
=
pos_loss_gmm
.
sort
()
pos_inds_gmm
=
pos_inds_gmm
[
sort_inds
]
pos_loss_gmm
=
pos_loss_gmm
.
view
(
-
1
,
1
).
cpu
().
numpy
()
min_loss
,
max_loss
=
pos_loss_gmm
.
min
(),
pos_loss_gmm
.
max
()
means_init
=
np
.
array
([
min_loss
,
max_loss
]).
reshape
(
2
,
1
)
weights_init
=
np
.
array
([
0.5
,
0.5
])
precisions_init
=
np
.
array
([
1.0
,
1.0
]).
reshape
(
2
,
1
,
1
)
# full
if
self
.
covariance_type
==
'spherical'
:
precisions_init
=
precisions_init
.
reshape
(
2
)
elif
self
.
covariance_type
==
'diag'
:
precisions_init
=
precisions_init
.
reshape
(
2
,
1
)
elif
self
.
covariance_type
==
'tied'
:
precisions_init
=
np
.
array
([[
1.0
]])
if
skm
is
None
:
raise
ImportError
(
'Please run "pip install sklearn" '
'to install sklearn first.'
)
gmm
=
skm
.
GaussianMixture
(
2
,
weights_init
=
weights_init
,
means_init
=
means_init
,
precisions_init
=
precisions_init
,
covariance_type
=
self
.
covariance_type
)
gmm
.
fit
(
pos_loss_gmm
)
gmm_assignment
=
gmm
.
predict
(
pos_loss_gmm
)
scores
=
gmm
.
score_samples
(
pos_loss_gmm
)
gmm_assignment
=
torch
.
from_numpy
(
gmm_assignment
).
to
(
device
)
scores
=
torch
.
from_numpy
(
scores
).
to
(
device
)
pos_inds_temp
,
ignore_inds_temp
=
self
.
gmm_separation_scheme
(
gmm_assignment
,
scores
,
pos_inds_gmm
)
pos_inds_after_paa
.
append
(
pos_inds_temp
)
ignore_inds_after_paa
.
append
(
ignore_inds_temp
)
pos_inds_after_paa
=
torch
.
cat
(
pos_inds_after_paa
)
ignore_inds_after_paa
=
torch
.
cat
(
ignore_inds_after_paa
)
reassign_mask
=
(
pos_inds
.
unsqueeze
(
1
)
!=
pos_inds_after_paa
).
all
(
1
)
reassign_ids
=
pos_inds
[
reassign_mask
]
label
[
reassign_ids
]
=
self
.
num_classes
label_weight
[
ignore_inds_after_paa
]
=
0
bbox_weight
[
reassign_ids
]
=
0
num_pos
=
len
(
pos_inds_after_paa
)
return
label
,
label_weight
,
bbox_weight
,
num_pos
def
gmm_separation_scheme
(
self
,
gmm_assignment
,
scores
,
pos_inds_gmm
):
"""A general separation scheme for gmm model.
It separates a GMM distribution of candidate samples into three
parts, 0 1 and uncertain areas, and you can implement other
separation schemes by rewriting this function.
Args:
gmm_assignment (Tensor): The prediction of GMM which is of shape
(num_samples,). The 0/1 value indicates the distribution
that each sample comes from.
scores (Tensor): The probability of sample coming from the
fit GMM distribution. The tensor is of shape (num_samples,).
pos_inds_gmm (Tensor): All the indexes of samples which are used
to fit GMM model. The tensor is of shape (num_samples,)
Returns:
tuple[Tensor]: The indices of positive and ignored samples.
- pos_inds_temp (Tensor): Indices of positive samples.
- ignore_inds_temp (Tensor): Indices of ignore samples.
"""
# The implementation is (c) in Fig.3 in origin paper intead of (b).
# You can refer to issues such as
# https://github.com/kkhoot/PAA/issues/8 and
# https://github.com/kkhoot/PAA/issues/9.
fgs
=
gmm_assignment
==
0
pos_inds_temp
=
fgs
.
new_tensor
([],
dtype
=
torch
.
long
)
ignore_inds_temp
=
fgs
.
new_tensor
([],
dtype
=
torch
.
long
)
if
fgs
.
nonzero
().
numel
():
_
,
pos_thr_ind
=
scores
[
fgs
].
topk
(
1
)
pos_inds_temp
=
pos_inds_gmm
[
fgs
][:
pos_thr_ind
+
1
]
ignore_inds_temp
=
pos_inds_gmm
.
new_tensor
([])
return
pos_inds_temp
,
ignore_inds_temp
def
get_targets
(
self
,
anchor_list
,
valid_flag_list
,
gt_bboxes_list
,
img_metas
,
gt_bboxes_ignore_list
=
None
,
gt_labels_list
=
None
,
label_channels
=
1
,
unmap_outputs
=
True
,
):
"""Get targets for PAA head.
This method is almost the same as `AnchorHead.get_targets()`. We direct
return the results from _get_targets_single instead map it to levels
by images_to_levels function.
Args:
anchor_list (list[list[Tensor]]): Multi level anchors of each
image. The outer list indicates images, and the inner list
corresponds to feature levels of the image. Each element of
the inner list is a tensor of shape (num_anchors, 4).
valid_flag_list (list[list[Tensor]]): Multi level valid flags of
each image. The outer list indicates images, and the inner list
corresponds to feature levels of the image. Each element of
the inner list is a tensor of shape (num_anchors, )
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be
ignored.
gt_labels_list (list[Tensor]): Ground truth labels of each box.
label_channels (int): Channel of label.
unmap_outputs (bool): Whether to map outputs back to the original
set of anchors.
Returns:
tuple: Usually returns a tuple containing learning targets.
- labels (list[Tensor]): Labels of all anchors, each with
shape (num_anchors,).
- label_weights (list[Tensor]): Label weights of all anchor.
each with shape (num_anchors,).
- bbox_targets (list[Tensor]): BBox targets of all anchors.
each with shape (num_anchors, 4).
- bbox_weights (list[Tensor]): BBox weights of all anchors.
each with shape (num_anchors, 4).
- pos_inds (list[Tensor]): Contains all index of positive
sample in all anchor.
- gt_inds (list[Tensor]): Contains all gt_index of positive
sample in all anchor.
"""
num_imgs
=
len
(
img_metas
)
assert
len
(
anchor_list
)
==
len
(
valid_flag_list
)
==
num_imgs
concat_anchor_list
=
[]
concat_valid_flag_list
=
[]
for
i
in
range
(
num_imgs
):
assert
len
(
anchor_list
[
i
])
==
len
(
valid_flag_list
[
i
])
concat_anchor_list
.
append
(
torch
.
cat
(
anchor_list
[
i
]))
concat_valid_flag_list
.
append
(
torch
.
cat
(
valid_flag_list
[
i
]))
# compute targets for each image
if
gt_bboxes_ignore_list
is
None
:
gt_bboxes_ignore_list
=
[
None
for
_
in
range
(
num_imgs
)]
if
gt_labels_list
is
None
:
gt_labels_list
=
[
None
for
_
in
range
(
num_imgs
)]
results
=
multi_apply
(
self
.
_get_targets_single
,
concat_anchor_list
,
concat_valid_flag_list
,
gt_bboxes_list
,
gt_bboxes_ignore_list
,
gt_labels_list
,
img_metas
,
label_channels
=
label_channels
,
unmap_outputs
=
unmap_outputs
)
(
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
valid_pos_inds
,
valid_neg_inds
,
sampling_result
)
=
results
# Due to valid flag of anchors, we have to calculate the real pos_inds
# in origin anchor set.
pos_inds
=
[]
for
i
,
single_labels
in
enumerate
(
labels
):
pos_mask
=
(
0
<=
single_labels
)
&
(
single_labels
<
self
.
num_classes
)
pos_inds
.
append
(
pos_mask
.
nonzero
().
view
(
-
1
))
gt_inds
=
[
item
.
pos_assigned_gt_inds
for
item
in
sampling_result
]
return
(
labels
,
label_weights
,
bbox_targets
,
bbox_weights
,
pos_inds
,
gt_inds
)
def
_get_targets_single
(
self
,
flat_anchors
,
valid_flags
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
,
img_meta
,
label_channels
=
1
,
unmap_outputs
=
True
):
"""Compute regression and classification targets for anchors in a
single image.
This method is same as `AnchorHead._get_targets_single()`.
"""
assert
unmap_outputs
,
'We must map outputs back to the original'
\
'set of anchors in PAAhead'
return
super
(
ATSSHead
,
self
).
_get_targets_single
(
flat_anchors
,
valid_flags
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
,
img_meta
,
label_channels
=
1
,
unmap_outputs
=
True
)
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
iou_preds
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
,
with_nms
=
True
):
"""Transform outputs for a single batch item into labeled boxes.
This method is almost same as `ATSSHead._get_bboxes_single()`.
We use sqrt(iou_preds * cls_scores) in NMS process instead of just
cls_scores. Besides, score voting is used when `` score_voting``
is set to True.
"""
assert
with_nms
,
'PAA only supports "with_nms=True" now'
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
mlvl_anchors
)
mlvl_bboxes
=
[]
mlvl_scores
=
[]
mlvl_iou_preds
=
[]
for
cls_score
,
bbox_pred
,
iou_preds
,
anchors
in
zip
(
cls_scores
,
bbox_preds
,
iou_preds
,
mlvl_anchors
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
scores
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
).
sigmoid
()
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)
iou_preds
=
iou_preds
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
).
sigmoid
()
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
max_scores
,
_
=
(
scores
*
iou_preds
[:,
None
]).
sqrt
().
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
anchors
=
anchors
[
topk_inds
,
:]
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
iou_preds
=
iou_preds
[
topk_inds
]
bboxes
=
self
.
bbox_coder
.
decode
(
anchors
,
bbox_pred
,
max_shape
=
img_shape
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_iou_preds
.
append
(
iou_preds
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
if
rescale
:
mlvl_bboxes
/=
mlvl_bboxes
.
new_tensor
(
scale_factor
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
# Add a dummy background class to the backend when using sigmoid
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
mlvl_scores
,
padding
],
dim
=
1
)
mlvl_iou_preds
=
torch
.
cat
(
mlvl_iou_preds
)
mlvl_nms_scores
=
(
mlvl_scores
*
mlvl_iou_preds
[:,
None
]).
sqrt
()
det_bboxes
,
det_labels
=
multiclass_nms
(
mlvl_bboxes
,
mlvl_nms_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
,
score_factors
=
None
)
if
self
.
with_score_voting
and
len
(
det_bboxes
)
>
0
:
det_bboxes
,
det_labels
=
self
.
score_voting
(
det_bboxes
,
det_labels
,
mlvl_bboxes
,
mlvl_nms_scores
,
cfg
.
score_thr
)
return
det_bboxes
,
det_labels
def
score_voting
(
self
,
det_bboxes
,
det_labels
,
mlvl_bboxes
,
mlvl_nms_scores
,
score_thr
):
"""Implementation of score voting method works on each remaining boxes
after NMS procedure.
Args:
det_bboxes (Tensor): Remaining boxes after NMS procedure,
with shape (k, 5), each dimension means
(x1, y1, x2, y2, score).
det_labels (Tensor): The label of remaining boxes, with shape
(k, 1),Labels are 0-based.
mlvl_bboxes (Tensor): All boxes before the NMS procedure,
with shape (num_anchors,4).
mlvl_nms_scores (Tensor): The scores of all boxes which is used
in the NMS procedure, with shape (num_anchors, num_class)
mlvl_iou_preds (Tensot): The predictions of IOU of all boxes
before the NMS procedure, with shape (num_anchors, 1)
score_thr (float): The score threshold of bboxes.
Returns:
tuple: Usually returns a tuple containing voting results.
- det_bboxes_voted (Tensor): Remaining boxes after
score voting procedure, with shape (k, 5), each
dimension means (x1, y1, x2, y2, score).
- det_labels_voted (Tensor): Label of remaining bboxes
after voting, with shape (num_anchors,).
"""
candidate_mask
=
mlvl_nms_scores
>
score_thr
candidate_mask_nozeros
=
candidate_mask
.
nonzero
()
candidate_inds
=
candidate_mask_nozeros
[:,
0
]
candidate_labels
=
candidate_mask_nozeros
[:,
1
]
candidate_bboxes
=
mlvl_bboxes
[
candidate_inds
]
candidate_scores
=
mlvl_nms_scores
[
candidate_mask
]
det_bboxes_voted
=
[]
det_labels_voted
=
[]
for
cls
in
range
(
self
.
cls_out_channels
):
candidate_cls_mask
=
candidate_labels
==
cls
if
not
candidate_cls_mask
.
any
():
continue
candidate_cls_scores
=
candidate_scores
[
candidate_cls_mask
]
candidate_cls_bboxes
=
candidate_bboxes
[
candidate_cls_mask
]
det_cls_mask
=
det_labels
==
cls
det_cls_bboxes
=
det_bboxes
[
det_cls_mask
].
view
(
-
1
,
det_bboxes
.
size
(
-
1
))
det_candidate_ious
=
bbox_overlaps
(
det_cls_bboxes
[:,
:
4
],
candidate_cls_bboxes
)
for
det_ind
in
range
(
len
(
det_cls_bboxes
)):
single_det_ious
=
det_candidate_ious
[
det_ind
]
pos_ious_mask
=
single_det_ious
>
0.01
pos_ious
=
single_det_ious
[
pos_ious_mask
]
pos_bboxes
=
candidate_cls_bboxes
[
pos_ious_mask
]
pos_scores
=
candidate_cls_scores
[
pos_ious_mask
]
pis
=
(
torch
.
exp
(
-
(
1
-
pos_ious
)
**
2
/
0.025
)
*
pos_scores
)[:,
None
]
voted_box
=
torch
.
sum
(
pis
*
pos_bboxes
,
dim
=
0
)
/
torch
.
sum
(
pis
,
dim
=
0
)
voted_score
=
det_cls_bboxes
[
det_ind
][
-
1
:][
None
,
:]
det_bboxes_voted
.
append
(
torch
.
cat
((
voted_box
[
None
,
:],
voted_score
),
dim
=
1
))
det_labels_voted
.
append
(
cls
)
det_bboxes_voted
=
torch
.
cat
(
det_bboxes_voted
,
dim
=
0
)
det_labels_voted
=
det_labels
.
new_tensor
(
det_labels_voted
)
return
det_bboxes_voted
,
det_labels_voted
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/pisa_retinanet_head.py
0 → 100644
View file @
142dcf29
import
torch
from
mmcv.runner
import
force_fp32
from
mmdet.core
import
images_to_levels
from
..builder
import
HEADS
from
..losses
import
carl_loss
,
isr_p
from
.retina_head
import
RetinaHead
@
HEADS
.
register_module
()
class
PISARetinaHead
(
RetinaHead
):
"""PISA Retinanet Head.
The head owns the same structure with Retinanet Head, but differs in two
aspects:
1. Importance-based Sample Reweighting Positive (ISR-P) is applied to
change the positive loss weights.
2. Classification-aware regression loss is adopted as a third loss.
"""
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute losses of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_anchors * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_anchors * 4, H, W)
gt_bboxes (list[Tensor]): Ground truth bboxes of each image
with shape (num_obj, 4).
gt_labels (list[Tensor]): Ground truth labels of each image
with shape (num_obj, 4).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (list[Tensor]): Ignored gt bboxes of each image.
Default: None.
Returns:
dict: Loss dict, comprise classification loss, regression loss and
carl loss.
"""
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
self
.
anchor_generator
.
num_levels
device
=
cls_scores
[
0
].
device
anchor_list
,
valid_flag_list
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
,
device
=
device
)
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
cls_reg_targets
=
self
.
get_targets
(
anchor_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
label_channels
,
return_sampling_results
=
True
)
if
cls_reg_targets
is
None
:
return
None
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
,
sampling_results_list
)
=
cls_reg_targets
num_total_samples
=
(
num_total_pos
+
num_total_neg
if
self
.
sampling
else
num_total_pos
)
# anchor number of multi levels
num_level_anchors
=
[
anchors
.
size
(
0
)
for
anchors
in
anchor_list
[
0
]]
# concat all level anchors and flags to a single tensor
concat_anchor_list
=
[]
for
i
in
range
(
len
(
anchor_list
)):
concat_anchor_list
.
append
(
torch
.
cat
(
anchor_list
[
i
]))
all_anchor_list
=
images_to_levels
(
concat_anchor_list
,
num_level_anchors
)
num_imgs
=
len
(
img_metas
)
flatten_cls_scores
=
[
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
num_imgs
,
-
1
,
label_channels
)
for
cls_score
in
cls_scores
]
flatten_cls_scores
=
torch
.
cat
(
flatten_cls_scores
,
dim
=
1
).
reshape
(
-
1
,
flatten_cls_scores
[
0
].
size
(
-
1
))
flatten_bbox_preds
=
[
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
num_imgs
,
-
1
,
4
)
for
bbox_pred
in
bbox_preds
]
flatten_bbox_preds
=
torch
.
cat
(
flatten_bbox_preds
,
dim
=
1
).
view
(
-
1
,
flatten_bbox_preds
[
0
].
size
(
-
1
))
flatten_labels
=
torch
.
cat
(
labels_list
,
dim
=
1
).
reshape
(
-
1
)
flatten_label_weights
=
torch
.
cat
(
label_weights_list
,
dim
=
1
).
reshape
(
-
1
)
flatten_anchors
=
torch
.
cat
(
all_anchor_list
,
dim
=
1
).
reshape
(
-
1
,
4
)
flatten_bbox_targets
=
torch
.
cat
(
bbox_targets_list
,
dim
=
1
).
reshape
(
-
1
,
4
)
flatten_bbox_weights
=
torch
.
cat
(
bbox_weights_list
,
dim
=
1
).
reshape
(
-
1
,
4
)
# Apply ISR-P
isr_cfg
=
self
.
train_cfg
.
get
(
'isr'
,
None
)
if
isr_cfg
is
not
None
:
all_targets
=
(
flatten_labels
,
flatten_label_weights
,
flatten_bbox_targets
,
flatten_bbox_weights
)
with
torch
.
no_grad
():
all_targets
=
isr_p
(
flatten_cls_scores
,
flatten_bbox_preds
,
all_targets
,
flatten_anchors
,
sampling_results_list
,
bbox_coder
=
self
.
bbox_coder
,
loss_cls
=
self
.
loss_cls
,
num_class
=
self
.
num_classes
,
**
self
.
train_cfg
.
isr
)
(
flatten_labels
,
flatten_label_weights
,
flatten_bbox_targets
,
flatten_bbox_weights
)
=
all_targets
# For convenience we compute loss once instead separating by fpn level,
# so that we don't need to separate the weights by level again.
# The result should be the same
losses_cls
=
self
.
loss_cls
(
flatten_cls_scores
,
flatten_labels
,
flatten_label_weights
,
avg_factor
=
num_total_samples
)
losses_bbox
=
self
.
loss_bbox
(
flatten_bbox_preds
,
flatten_bbox_targets
,
flatten_bbox_weights
,
avg_factor
=
num_total_samples
)
loss_dict
=
dict
(
loss_cls
=
losses_cls
,
loss_bbox
=
losses_bbox
)
# CARL Loss
carl_cfg
=
self
.
train_cfg
.
get
(
'carl'
,
None
)
if
carl_cfg
is
not
None
:
loss_carl
=
carl_loss
(
flatten_cls_scores
,
flatten_labels
,
flatten_bbox_preds
,
flatten_bbox_targets
,
self
.
loss_bbox
,
**
self
.
train_cfg
.
carl
,
avg_factor
=
num_total_pos
,
sigmoid
=
True
,
num_class
=
self
.
num_classes
)
loss_dict
.
update
(
loss_carl
)
return
loss_dict
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/pisa_ssd_head.py
0 → 100644
View file @
142dcf29
import
torch
from
mmdet.core
import
multi_apply
from
..builder
import
HEADS
from
..losses
import
CrossEntropyLoss
,
SmoothL1Loss
,
carl_loss
,
isr_p
from
.ssd_head
import
SSDHead
# TODO: add loss evaluator for SSD
@
HEADS
.
register_module
()
class
PISASSDHead
(
SSDHead
):
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute losses of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_anchors * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_anchors * 4, H, W)
gt_bboxes (list[Tensor]): Ground truth bboxes of each image
with shape (num_obj, 4).
gt_labels (list[Tensor]): Ground truth labels of each image
with shape (num_obj, 4).
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (list[Tensor]): Ignored gt bboxes of each image.
Default: None.
Returns:
dict: Loss dict, comprise classification loss regression loss and
carl loss.
"""
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
self
.
anchor_generator
.
num_levels
device
=
cls_scores
[
0
].
device
anchor_list
,
valid_flag_list
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
,
device
=
device
)
cls_reg_targets
=
self
.
get_targets
(
anchor_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
1
,
unmap_outputs
=
False
,
return_sampling_results
=
True
)
if
cls_reg_targets
is
None
:
return
None
(
labels_list
,
label_weights_list
,
bbox_targets_list
,
bbox_weights_list
,
num_total_pos
,
num_total_neg
,
sampling_results_list
)
=
cls_reg_targets
num_images
=
len
(
img_metas
)
all_cls_scores
=
torch
.
cat
([
s
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
num_images
,
-
1
,
self
.
cls_out_channels
)
for
s
in
cls_scores
],
1
)
all_labels
=
torch
.
cat
(
labels_list
,
-
1
).
view
(
num_images
,
-
1
)
all_label_weights
=
torch
.
cat
(
label_weights_list
,
-
1
).
view
(
num_images
,
-
1
)
all_bbox_preds
=
torch
.
cat
([
b
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
num_images
,
-
1
,
4
)
for
b
in
bbox_preds
],
-
2
)
all_bbox_targets
=
torch
.
cat
(
bbox_targets_list
,
-
2
).
view
(
num_images
,
-
1
,
4
)
all_bbox_weights
=
torch
.
cat
(
bbox_weights_list
,
-
2
).
view
(
num_images
,
-
1
,
4
)
# concat all level anchors to a single tensor
all_anchors
=
[]
for
i
in
range
(
num_images
):
all_anchors
.
append
(
torch
.
cat
(
anchor_list
[
i
]))
isr_cfg
=
self
.
train_cfg
.
get
(
'isr'
,
None
)
all_targets
=
(
all_labels
.
view
(
-
1
),
all_label_weights
.
view
(
-
1
),
all_bbox_targets
.
view
(
-
1
,
4
),
all_bbox_weights
.
view
(
-
1
,
4
))
# apply ISR-P
if
isr_cfg
is
not
None
:
all_targets
=
isr_p
(
all_cls_scores
.
view
(
-
1
,
all_cls_scores
.
size
(
-
1
)),
all_bbox_preds
.
view
(
-
1
,
4
),
all_targets
,
torch
.
cat
(
all_anchors
),
sampling_results_list
,
loss_cls
=
CrossEntropyLoss
(),
bbox_coder
=
self
.
bbox_coder
,
**
self
.
train_cfg
.
isr
,
num_class
=
self
.
num_classes
)
(
new_labels
,
new_label_weights
,
new_bbox_targets
,
new_bbox_weights
)
=
all_targets
all_labels
=
new_labels
.
view
(
all_labels
.
shape
)
all_label_weights
=
new_label_weights
.
view
(
all_label_weights
.
shape
)
all_bbox_targets
=
new_bbox_targets
.
view
(
all_bbox_targets
.
shape
)
all_bbox_weights
=
new_bbox_weights
.
view
(
all_bbox_weights
.
shape
)
# add CARL loss
carl_loss_cfg
=
self
.
train_cfg
.
get
(
'carl'
,
None
)
if
carl_loss_cfg
is
not
None
:
loss_carl
=
carl_loss
(
all_cls_scores
.
view
(
-
1
,
all_cls_scores
.
size
(
-
1
)),
all_targets
[
0
],
all_bbox_preds
.
view
(
-
1
,
4
),
all_targets
[
2
],
SmoothL1Loss
(
beta
=
1.
),
**
self
.
train_cfg
.
carl
,
avg_factor
=
num_total_pos
,
num_class
=
self
.
num_classes
)
# check NaN and Inf
assert
torch
.
isfinite
(
all_cls_scores
).
all
().
item
(),
\
'classification scores become infinite or NaN!'
assert
torch
.
isfinite
(
all_bbox_preds
).
all
().
item
(),
\
'bbox predications become infinite or NaN!'
losses_cls
,
losses_bbox
=
multi_apply
(
self
.
loss_single
,
all_cls_scores
,
all_bbox_preds
,
all_anchors
,
all_labels
,
all_label_weights
,
all_bbox_targets
,
all_bbox_weights
,
num_total_samples
=
num_total_pos
)
loss_dict
=
dict
(
loss_cls
=
losses_cls
,
loss_bbox
=
losses_bbox
)
if
carl_loss_cfg
is
not
None
:
loss_dict
.
update
(
loss_carl
)
return
loss_dict
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/reppoints_head.py
0 → 100644
View file @
142dcf29
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
mmcv.ops
import
DeformConv2d
from
mmdet.core
import
(
PointGenerator
,
build_assigner
,
build_sampler
,
images_to_levels
,
multi_apply
,
multiclass_nms
,
unmap
)
from
..builder
import
HEADS
,
build_loss
from
.anchor_free_head
import
AnchorFreeHead
@
HEADS
.
register_module
()
class
RepPointsHead
(
AnchorFreeHead
):
"""RepPoint head.
Args:
point_feat_channels (int): Number of channels of points features.
gradient_mul (float): The multiplier to gradients from
points refinement and recognition.
point_strides (Iterable): points strides.
point_base_scale (int): bbox scale for assigning labels.
loss_cls (dict): Config of classification loss.
loss_bbox_init (dict): Config of initial points loss.
loss_bbox_refine (dict): Config of points loss in refinement.
use_grid_points (bool): If we use bounding box representation, the
reppoints is represented as grid points on the bounding box.
center_init (bool): Whether to use center point assignment.
transform_method (str): The methods to transform RepPoints to bbox.
"""
# noqa: W605
def
__init__
(
self
,
num_classes
,
in_channels
,
point_feat_channels
=
256
,
num_points
=
9
,
gradient_mul
=
0.1
,
point_strides
=
[
8
,
16
,
32
,
64
,
128
],
point_base_scale
=
4
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_init
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
0.5
),
loss_bbox_refine
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
use_grid_points
=
False
,
center_init
=
True
,
transform_method
=
'moment'
,
moment_mul
=
0.01
,
**
kwargs
):
self
.
num_points
=
num_points
self
.
point_feat_channels
=
point_feat_channels
self
.
use_grid_points
=
use_grid_points
self
.
center_init
=
center_init
# we use deform conv to extract points features
self
.
dcn_kernel
=
int
(
np
.
sqrt
(
num_points
))
self
.
dcn_pad
=
int
((
self
.
dcn_kernel
-
1
)
/
2
)
assert
self
.
dcn_kernel
*
self
.
dcn_kernel
==
num_points
,
\
'The points number should be a square number.'
assert
self
.
dcn_kernel
%
2
==
1
,
\
'The points number should be an odd square number.'
dcn_base
=
np
.
arange
(
-
self
.
dcn_pad
,
self
.
dcn_pad
+
1
).
astype
(
np
.
float64
)
dcn_base_y
=
np
.
repeat
(
dcn_base
,
self
.
dcn_kernel
)
dcn_base_x
=
np
.
tile
(
dcn_base
,
self
.
dcn_kernel
)
dcn_base_offset
=
np
.
stack
([
dcn_base_y
,
dcn_base_x
],
axis
=
1
).
reshape
(
(
-
1
))
self
.
dcn_base_offset
=
torch
.
tensor
(
dcn_base_offset
).
view
(
1
,
-
1
,
1
,
1
)
super
().
__init__
(
num_classes
,
in_channels
,
loss_cls
=
loss_cls
,
**
kwargs
)
self
.
gradient_mul
=
gradient_mul
self
.
point_base_scale
=
point_base_scale
self
.
point_strides
=
point_strides
self
.
point_generators
=
[
PointGenerator
()
for
_
in
self
.
point_strides
]
self
.
sampling
=
loss_cls
[
'type'
]
not
in
[
'FocalLoss'
]
if
self
.
train_cfg
:
self
.
init_assigner
=
build_assigner
(
self
.
train_cfg
.
init
.
assigner
)
self
.
refine_assigner
=
build_assigner
(
self
.
train_cfg
.
refine
.
assigner
)
# use PseudoSampler when sampling is False
if
self
.
sampling
and
hasattr
(
self
.
train_cfg
,
'sampler'
):
sampler_cfg
=
self
.
train_cfg
.
sampler
else
:
sampler_cfg
=
dict
(
type
=
'PseudoSampler'
)
self
.
sampler
=
build_sampler
(
sampler_cfg
,
context
=
self
)
self
.
transform_method
=
transform_method
if
self
.
transform_method
==
'moment'
:
self
.
moment_transfer
=
nn
.
Parameter
(
data
=
torch
.
zeros
(
2
),
requires_grad
=
True
)
self
.
moment_mul
=
moment_mul
self
.
use_sigmoid_cls
=
loss_cls
.
get
(
'use_sigmoid'
,
False
)
if
self
.
use_sigmoid_cls
:
self
.
cls_out_channels
=
self
.
num_classes
else
:
self
.
cls_out_channels
=
self
.
num_classes
+
1
self
.
loss_bbox_init
=
build_loss
(
loss_bbox_init
)
self
.
loss_bbox_refine
=
build_loss
(
loss_bbox_refine
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
self
.
cls_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
reg_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
pts_out_dim
=
4
if
self
.
use_grid_points
else
2
*
self
.
num_points
self
.
reppoints_cls_conv
=
DeformConv2d
(
self
.
feat_channels
,
self
.
point_feat_channels
,
self
.
dcn_kernel
,
1
,
self
.
dcn_pad
)
self
.
reppoints_cls_out
=
nn
.
Conv2d
(
self
.
point_feat_channels
,
self
.
cls_out_channels
,
1
,
1
,
0
)
self
.
reppoints_pts_init_conv
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
point_feat_channels
,
3
,
1
,
1
)
self
.
reppoints_pts_init_out
=
nn
.
Conv2d
(
self
.
point_feat_channels
,
pts_out_dim
,
1
,
1
,
0
)
self
.
reppoints_pts_refine_conv
=
DeformConv2d
(
self
.
feat_channels
,
self
.
point_feat_channels
,
self
.
dcn_kernel
,
1
,
self
.
dcn_pad
)
self
.
reppoints_pts_refine_out
=
nn
.
Conv2d
(
self
.
point_feat_channels
,
pts_out_dim
,
1
,
1
,
0
)
def
init_weights
(
self
):
"""Initialize weights of the head."""
for
m
in
self
.
cls_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
for
m
in
self
.
reg_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
reppoints_cls_conv
,
std
=
0.01
)
normal_init
(
self
.
reppoints_cls_out
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
reppoints_pts_init_conv
,
std
=
0.01
)
normal_init
(
self
.
reppoints_pts_init_out
,
std
=
0.01
)
normal_init
(
self
.
reppoints_pts_refine_conv
,
std
=
0.01
)
normal_init
(
self
.
reppoints_pts_refine_out
,
std
=
0.01
)
def
points2bbox
(
self
,
pts
,
y_first
=
True
):
"""Converting the points set into bounding box.
:param pts: the input points sets (fields), each points
set (fields) is represented as 2n scalar.
:param y_first: if y_fisrt=True, the point set is represented as
[y1, x1, y2, x2 ... yn, xn], otherwise the point set is
represented as [x1, y1, x2, y2 ... xn, yn].
:return: each points set is converting to a bbox [x1, y1, x2, y2].
"""
pts_reshape
=
pts
.
view
(
pts
.
shape
[
0
],
-
1
,
2
,
*
pts
.
shape
[
2
:])
pts_y
=
pts_reshape
[:,
:,
0
,
...]
if
y_first
else
pts_reshape
[:,
:,
1
,
...]
pts_x
=
pts_reshape
[:,
:,
1
,
...]
if
y_first
else
pts_reshape
[:,
:,
0
,
...]
if
self
.
transform_method
==
'minmax'
:
bbox_left
=
pts_x
.
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_right
=
pts_x
.
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_up
=
pts_y
.
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_bottom
=
pts_y
.
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox
=
torch
.
cat
([
bbox_left
,
bbox_up
,
bbox_right
,
bbox_bottom
],
dim
=
1
)
elif
self
.
transform_method
==
'partial_minmax'
:
pts_y
=
pts_y
[:,
:
4
,
...]
pts_x
=
pts_x
[:,
:
4
,
...]
bbox_left
=
pts_x
.
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_right
=
pts_x
.
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_up
=
pts_y
.
min
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox_bottom
=
pts_y
.
max
(
dim
=
1
,
keepdim
=
True
)[
0
]
bbox
=
torch
.
cat
([
bbox_left
,
bbox_up
,
bbox_right
,
bbox_bottom
],
dim
=
1
)
elif
self
.
transform_method
==
'moment'
:
pts_y_mean
=
pts_y
.
mean
(
dim
=
1
,
keepdim
=
True
)
pts_x_mean
=
pts_x
.
mean
(
dim
=
1
,
keepdim
=
True
)
pts_y_std
=
torch
.
std
(
pts_y
-
pts_y_mean
,
dim
=
1
,
keepdim
=
True
)
pts_x_std
=
torch
.
std
(
pts_x
-
pts_x_mean
,
dim
=
1
,
keepdim
=
True
)
moment_transfer
=
(
self
.
moment_transfer
*
self
.
moment_mul
)
+
(
self
.
moment_transfer
.
detach
()
*
(
1
-
self
.
moment_mul
))
moment_width_transfer
=
moment_transfer
[
0
]
moment_height_transfer
=
moment_transfer
[
1
]
half_width
=
pts_x_std
*
torch
.
exp
(
moment_width_transfer
)
half_height
=
pts_y_std
*
torch
.
exp
(
moment_height_transfer
)
bbox
=
torch
.
cat
([
pts_x_mean
-
half_width
,
pts_y_mean
-
half_height
,
pts_x_mean
+
half_width
,
pts_y_mean
+
half_height
],
dim
=
1
)
else
:
raise
NotImplementedError
return
bbox
def
gen_grid_from_reg
(
self
,
reg
,
previous_boxes
):
"""Base on the previous bboxes and regression values, we compute the
regressed bboxes and generate the grids on the bboxes.
:param reg: the regression value to previous bboxes.
:param previous_boxes: previous bboxes.
:return: generate grids on the regressed bboxes.
"""
b
,
_
,
h
,
w
=
reg
.
shape
bxy
=
(
previous_boxes
[:,
:
2
,
...]
+
previous_boxes
[:,
2
:,
...])
/
2.
bwh
=
(
previous_boxes
[:,
2
:,
...]
-
previous_boxes
[:,
:
2
,
...]).
clamp
(
min
=
1e-6
)
grid_topleft
=
bxy
+
bwh
*
reg
[:,
:
2
,
...]
-
0.5
*
bwh
*
torch
.
exp
(
reg
[:,
2
:,
...])
grid_wh
=
bwh
*
torch
.
exp
(
reg
[:,
2
:,
...])
grid_left
=
grid_topleft
[:,
[
0
],
...]
grid_top
=
grid_topleft
[:,
[
1
],
...]
grid_width
=
grid_wh
[:,
[
0
],
...]
grid_height
=
grid_wh
[:,
[
1
],
...]
intervel
=
torch
.
linspace
(
0.
,
1.
,
self
.
dcn_kernel
).
view
(
1
,
self
.
dcn_kernel
,
1
,
1
).
type_as
(
reg
)
grid_x
=
grid_left
+
grid_width
*
intervel
grid_x
=
grid_x
.
unsqueeze
(
1
).
repeat
(
1
,
self
.
dcn_kernel
,
1
,
1
,
1
)
grid_x
=
grid_x
.
view
(
b
,
-
1
,
h
,
w
)
grid_y
=
grid_top
+
grid_height
*
intervel
grid_y
=
grid_y
.
unsqueeze
(
2
).
repeat
(
1
,
1
,
self
.
dcn_kernel
,
1
,
1
)
grid_y
=
grid_y
.
view
(
b
,
-
1
,
h
,
w
)
grid_yx
=
torch
.
stack
([
grid_y
,
grid_x
],
dim
=
2
)
grid_yx
=
grid_yx
.
view
(
b
,
-
1
,
h
,
w
)
regressed_bbox
=
torch
.
cat
([
grid_left
,
grid_top
,
grid_left
+
grid_width
,
grid_top
+
grid_height
],
1
)
return
grid_yx
,
regressed_bbox
def
forward
(
self
,
feats
):
return
multi_apply
(
self
.
forward_single
,
feats
)
def
forward_single
(
self
,
x
):
"""Forward feature map of a single FPN level."""
dcn_base_offset
=
self
.
dcn_base_offset
.
type_as
(
x
)
# If we use center_init, the initial reppoints is from center points.
# If we use bounding bbox representation, the initial reppoints is
# from regular grid placed on a pre-defined bbox.
if
self
.
use_grid_points
or
not
self
.
center_init
:
scale
=
self
.
point_base_scale
/
2
points_init
=
dcn_base_offset
/
dcn_base_offset
.
max
()
*
scale
bbox_init
=
x
.
new_tensor
([
-
scale
,
-
scale
,
scale
,
scale
]).
view
(
1
,
4
,
1
,
1
)
else
:
points_init
=
0
cls_feat
=
x
pts_feat
=
x
for
cls_conv
in
self
.
cls_convs
:
cls_feat
=
cls_conv
(
cls_feat
)
for
reg_conv
in
self
.
reg_convs
:
pts_feat
=
reg_conv
(
pts_feat
)
# initialize reppoints
pts_out_init
=
self
.
reppoints_pts_init_out
(
self
.
relu
(
self
.
reppoints_pts_init_conv
(
pts_feat
)))
if
self
.
use_grid_points
:
pts_out_init
,
bbox_out_init
=
self
.
gen_grid_from_reg
(
pts_out_init
,
bbox_init
.
detach
())
else
:
pts_out_init
=
pts_out_init
+
points_init
# refine and classify reppoints
pts_out_init_grad_mul
=
(
1
-
self
.
gradient_mul
)
*
pts_out_init
.
detach
(
)
+
self
.
gradient_mul
*
pts_out_init
dcn_offset
=
pts_out_init_grad_mul
-
dcn_base_offset
cls_out
=
self
.
reppoints_cls_out
(
self
.
relu
(
self
.
reppoints_cls_conv
(
cls_feat
,
dcn_offset
)))
pts_out_refine
=
self
.
reppoints_pts_refine_out
(
self
.
relu
(
self
.
reppoints_pts_refine_conv
(
pts_feat
,
dcn_offset
)))
if
self
.
use_grid_points
:
pts_out_refine
,
bbox_out_refine
=
self
.
gen_grid_from_reg
(
pts_out_refine
,
bbox_out_init
.
detach
())
else
:
pts_out_refine
=
pts_out_refine
+
pts_out_init
.
detach
()
return
cls_out
,
pts_out_init
,
pts_out_refine
def
get_points
(
self
,
featmap_sizes
,
img_metas
,
device
):
"""Get points according to feature map sizes.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
img_metas (list[dict]): Image meta info.
Returns:
tuple: points of each image, valid flags of each image
"""
num_imgs
=
len
(
img_metas
)
num_levels
=
len
(
featmap_sizes
)
# since feature map sizes of all images are the same, we only compute
# points center for one time
multi_level_points
=
[]
for
i
in
range
(
num_levels
):
points
=
self
.
point_generators
[
i
].
grid_points
(
featmap_sizes
[
i
],
self
.
point_strides
[
i
],
device
)
multi_level_points
.
append
(
points
)
points_list
=
[[
point
.
clone
()
for
point
in
multi_level_points
]
for
_
in
range
(
num_imgs
)]
# for each image, we compute valid flags of multi level grids
valid_flag_list
=
[]
for
img_id
,
img_meta
in
enumerate
(
img_metas
):
multi_level_flags
=
[]
for
i
in
range
(
num_levels
):
point_stride
=
self
.
point_strides
[
i
]
feat_h
,
feat_w
=
featmap_sizes
[
i
]
h
,
w
=
img_meta
[
'pad_shape'
][:
2
]
valid_feat_h
=
min
(
int
(
np
.
ceil
(
h
/
point_stride
)),
feat_h
)
valid_feat_w
=
min
(
int
(
np
.
ceil
(
w
/
point_stride
)),
feat_w
)
flags
=
self
.
point_generators
[
i
].
valid_flags
(
(
feat_h
,
feat_w
),
(
valid_feat_h
,
valid_feat_w
),
device
)
multi_level_flags
.
append
(
flags
)
valid_flag_list
.
append
(
multi_level_flags
)
return
points_list
,
valid_flag_list
def
centers_to_bboxes
(
self
,
point_list
):
"""Get bboxes according to center points.
Only used in :class:`MaxIoUAssigner`.
"""
bbox_list
=
[]
for
i_img
,
point
in
enumerate
(
point_list
):
bbox
=
[]
for
i_lvl
in
range
(
len
(
self
.
point_strides
)):
scale
=
self
.
point_base_scale
*
self
.
point_strides
[
i_lvl
]
*
0.5
bbox_shift
=
torch
.
Tensor
([
-
scale
,
-
scale
,
scale
,
scale
]).
view
(
1
,
4
).
type_as
(
point
[
0
])
bbox_center
=
torch
.
cat
(
[
point
[
i_lvl
][:,
:
2
],
point
[
i_lvl
][:,
:
2
]],
dim
=
1
)
bbox
.
append
(
bbox_center
+
bbox_shift
)
bbox_list
.
append
(
bbox
)
return
bbox_list
def
offset_to_pts
(
self
,
center_list
,
pred_list
):
"""Change from point offset to point coordinate."""
pts_list
=
[]
for
i_lvl
in
range
(
len
(
self
.
point_strides
)):
pts_lvl
=
[]
for
i_img
in
range
(
len
(
center_list
)):
pts_center
=
center_list
[
i_img
][
i_lvl
][:,
:
2
].
repeat
(
1
,
self
.
num_points
)
pts_shift
=
pred_list
[
i_lvl
][
i_img
]
yx_pts_shift
=
pts_shift
.
permute
(
1
,
2
,
0
).
view
(
-
1
,
2
*
self
.
num_points
)
y_pts_shift
=
yx_pts_shift
[...,
0
::
2
]
x_pts_shift
=
yx_pts_shift
[...,
1
::
2
]
xy_pts_shift
=
torch
.
stack
([
x_pts_shift
,
y_pts_shift
],
-
1
)
xy_pts_shift
=
xy_pts_shift
.
view
(
*
yx_pts_shift
.
shape
[:
-
1
],
-
1
)
pts
=
xy_pts_shift
*
self
.
point_strides
[
i_lvl
]
+
pts_center
pts_lvl
.
append
(
pts
)
pts_lvl
=
torch
.
stack
(
pts_lvl
,
0
)
pts_list
.
append
(
pts_lvl
)
return
pts_list
def
_point_target_single
(
self
,
flat_proposals
,
valid_flags
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
,
label_channels
=
1
,
stage
=
'init'
,
unmap_outputs
=
True
):
inside_flags
=
valid_flags
if
not
inside_flags
.
any
():
return
(
None
,
)
*
7
# assign gt and sample proposals
proposals
=
flat_proposals
[
inside_flags
,
:]
if
stage
==
'init'
:
assigner
=
self
.
init_assigner
pos_weight
=
self
.
train_cfg
.
init
.
pos_weight
else
:
assigner
=
self
.
refine_assigner
pos_weight
=
self
.
train_cfg
.
refine
.
pos_weight
assign_result
=
assigner
.
assign
(
proposals
,
gt_bboxes
,
gt_bboxes_ignore
,
None
if
self
.
sampling
else
gt_labels
)
sampling_result
=
self
.
sampler
.
sample
(
assign_result
,
proposals
,
gt_bboxes
)
num_valid_proposals
=
proposals
.
shape
[
0
]
bbox_gt
=
proposals
.
new_zeros
([
num_valid_proposals
,
4
])
pos_proposals
=
torch
.
zeros_like
(
proposals
)
proposals_weights
=
proposals
.
new_zeros
([
num_valid_proposals
,
4
])
labels
=
proposals
.
new_full
((
num_valid_proposals
,
),
self
.
num_classes
,
dtype
=
torch
.
long
)
label_weights
=
proposals
.
new_zeros
(
num_valid_proposals
,
dtype
=
torch
.
float
)
pos_inds
=
sampling_result
.
pos_inds
neg_inds
=
sampling_result
.
neg_inds
if
len
(
pos_inds
)
>
0
:
pos_gt_bboxes
=
sampling_result
.
pos_gt_bboxes
bbox_gt
[
pos_inds
,
:]
=
pos_gt_bboxes
pos_proposals
[
pos_inds
,
:]
=
proposals
[
pos_inds
,
:]
proposals_weights
[
pos_inds
,
:]
=
1.0
if
gt_labels
is
None
:
# Only rpn gives gt_labels as None
# Foreground is the first class
labels
[
pos_inds
]
=
0
else
:
labels
[
pos_inds
]
=
gt_labels
[
sampling_result
.
pos_assigned_gt_inds
]
if
pos_weight
<=
0
:
label_weights
[
pos_inds
]
=
1.0
else
:
label_weights
[
pos_inds
]
=
pos_weight
if
len
(
neg_inds
)
>
0
:
label_weights
[
neg_inds
]
=
1.0
# map up to original set of proposals
if
unmap_outputs
:
num_total_proposals
=
flat_proposals
.
size
(
0
)
labels
=
unmap
(
labels
,
num_total_proposals
,
inside_flags
)
label_weights
=
unmap
(
label_weights
,
num_total_proposals
,
inside_flags
)
bbox_gt
=
unmap
(
bbox_gt
,
num_total_proposals
,
inside_flags
)
pos_proposals
=
unmap
(
pos_proposals
,
num_total_proposals
,
inside_flags
)
proposals_weights
=
unmap
(
proposals_weights
,
num_total_proposals
,
inside_flags
)
return
(
labels
,
label_weights
,
bbox_gt
,
pos_proposals
,
proposals_weights
,
pos_inds
,
neg_inds
)
def
get_targets
(
self
,
proposals_list
,
valid_flag_list
,
gt_bboxes_list
,
img_metas
,
gt_bboxes_ignore_list
=
None
,
gt_labels_list
=
None
,
stage
=
'init'
,
label_channels
=
1
,
unmap_outputs
=
True
):
"""Compute corresponding GT box and classification targets for
proposals.
Args:
proposals_list (list[list]): Multi level points/bboxes of each
image.
valid_flag_list (list[list]): Multi level valid flags of each
image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
gt_bboxes_ignore_list (list[Tensor]): Ground truth bboxes to be
ignored.
gt_bboxes_list (list[Tensor]): Ground truth labels of each box.
stage (str): `init` or `refine`. Generate target for init stage or
refine stage
label_channels (int): Channel of label.
unmap_outputs (bool): Whether to map outputs back to the original
set of anchors.
Returns:
tuple:
- labels_list (list[Tensor]): Labels of each level.
- label_weights_list (list[Tensor]): Label weights of each level. # noqa: E501
- bbox_gt_list (list[Tensor]): Ground truth bbox of each level.
- proposal_list (list[Tensor]): Proposals(points/bboxes) of each level. # noqa: E501
- proposal_weights_list (list[Tensor]): Proposal weights of each level. # noqa: E501
- num_total_pos (int): Number of positive samples in all images. # noqa: E501
- num_total_neg (int): Number of negative samples in all images. # noqa: E501
"""
assert
stage
in
[
'init'
,
'refine'
]
num_imgs
=
len
(
img_metas
)
assert
len
(
proposals_list
)
==
len
(
valid_flag_list
)
==
num_imgs
# points number of multi levels
num_level_proposals
=
[
points
.
size
(
0
)
for
points
in
proposals_list
[
0
]]
# concat all level points and flags to a single tensor
for
i
in
range
(
num_imgs
):
assert
len
(
proposals_list
[
i
])
==
len
(
valid_flag_list
[
i
])
proposals_list
[
i
]
=
torch
.
cat
(
proposals_list
[
i
])
valid_flag_list
[
i
]
=
torch
.
cat
(
valid_flag_list
[
i
])
# compute targets for each image
if
gt_bboxes_ignore_list
is
None
:
gt_bboxes_ignore_list
=
[
None
for
_
in
range
(
num_imgs
)]
if
gt_labels_list
is
None
:
gt_labels_list
=
[
None
for
_
in
range
(
num_imgs
)]
(
all_labels
,
all_label_weights
,
all_bbox_gt
,
all_proposals
,
all_proposal_weights
,
pos_inds_list
,
neg_inds_list
)
=
multi_apply
(
self
.
_point_target_single
,
proposals_list
,
valid_flag_list
,
gt_bboxes_list
,
gt_bboxes_ignore_list
,
gt_labels_list
,
stage
=
stage
,
label_channels
=
label_channels
,
unmap_outputs
=
unmap_outputs
)
# no valid points
if
any
([
labels
is
None
for
labels
in
all_labels
]):
return
None
# sampled points of all images
num_total_pos
=
sum
([
max
(
inds
.
numel
(),
1
)
for
inds
in
pos_inds_list
])
num_total_neg
=
sum
([
max
(
inds
.
numel
(),
1
)
for
inds
in
neg_inds_list
])
labels_list
=
images_to_levels
(
all_labels
,
num_level_proposals
)
label_weights_list
=
images_to_levels
(
all_label_weights
,
num_level_proposals
)
bbox_gt_list
=
images_to_levels
(
all_bbox_gt
,
num_level_proposals
)
proposals_list
=
images_to_levels
(
all_proposals
,
num_level_proposals
)
proposal_weights_list
=
images_to_levels
(
all_proposal_weights
,
num_level_proposals
)
return
(
labels_list
,
label_weights_list
,
bbox_gt_list
,
proposals_list
,
proposal_weights_list
,
num_total_pos
,
num_total_neg
)
def
loss_single
(
self
,
cls_score
,
pts_pred_init
,
pts_pred_refine
,
labels
,
label_weights
,
bbox_gt_init
,
bbox_weights_init
,
bbox_gt_refine
,
bbox_weights_refine
,
stride
,
num_total_samples_init
,
num_total_samples_refine
):
# classification loss
labels
=
labels
.
reshape
(
-
1
)
label_weights
=
label_weights
.
reshape
(
-
1
)
cls_score
=
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
cls_out_channels
)
cls_score
=
cls_score
.
contiguous
()
loss_cls
=
self
.
loss_cls
(
cls_score
,
labels
,
label_weights
,
avg_factor
=
num_total_samples_refine
)
# points loss
bbox_gt_init
=
bbox_gt_init
.
reshape
(
-
1
,
4
)
bbox_weights_init
=
bbox_weights_init
.
reshape
(
-
1
,
4
)
bbox_pred_init
=
self
.
points2bbox
(
pts_pred_init
.
reshape
(
-
1
,
2
*
self
.
num_points
),
y_first
=
False
)
bbox_gt_refine
=
bbox_gt_refine
.
reshape
(
-
1
,
4
)
bbox_weights_refine
=
bbox_weights_refine
.
reshape
(
-
1
,
4
)
bbox_pred_refine
=
self
.
points2bbox
(
pts_pred_refine
.
reshape
(
-
1
,
2
*
self
.
num_points
),
y_first
=
False
)
normalize_term
=
self
.
point_base_scale
*
stride
loss_pts_init
=
self
.
loss_bbox_init
(
bbox_pred_init
/
normalize_term
,
bbox_gt_init
/
normalize_term
,
bbox_weights_init
,
avg_factor
=
num_total_samples_init
)
loss_pts_refine
=
self
.
loss_bbox_refine
(
bbox_pred_refine
/
normalize_term
,
bbox_gt_refine
/
normalize_term
,
bbox_weights_refine
,
avg_factor
=
num_total_samples_refine
)
return
loss_cls
,
loss_pts_init
,
loss_pts_refine
def
loss
(
self
,
cls_scores
,
pts_preds_init
,
pts_preds_refine
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
len
(
self
.
point_generators
)
device
=
cls_scores
[
0
].
device
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
# target for initial stage
center_list
,
valid_flag_list
=
self
.
get_points
(
featmap_sizes
,
img_metas
,
device
)
pts_coordinate_preds_init
=
self
.
offset_to_pts
(
center_list
,
pts_preds_init
)
if
self
.
train_cfg
.
init
.
assigner
[
'type'
]
==
'PointAssigner'
:
# Assign target for center list
candidate_list
=
center_list
else
:
# transform center list to bbox list and
# assign target for bbox list
bbox_list
=
self
.
centers_to_bboxes
(
center_list
)
candidate_list
=
bbox_list
cls_reg_targets_init
=
self
.
get_targets
(
candidate_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
stage
=
'init'
,
label_channels
=
label_channels
)
(
*
_
,
bbox_gt_list_init
,
candidate_list_init
,
bbox_weights_list_init
,
num_total_pos_init
,
num_total_neg_init
)
=
cls_reg_targets_init
num_total_samples_init
=
(
num_total_pos_init
+
num_total_neg_init
if
self
.
sampling
else
num_total_pos_init
)
# target for refinement stage
center_list
,
valid_flag_list
=
self
.
get_points
(
featmap_sizes
,
img_metas
,
device
)
pts_coordinate_preds_refine
=
self
.
offset_to_pts
(
center_list
,
pts_preds_refine
)
bbox_list
=
[]
for
i_img
,
center
in
enumerate
(
center_list
):
bbox
=
[]
for
i_lvl
in
range
(
len
(
pts_preds_refine
)):
bbox_preds_init
=
self
.
points2bbox
(
pts_preds_init
[
i_lvl
].
detach
())
bbox_shift
=
bbox_preds_init
*
self
.
point_strides
[
i_lvl
]
bbox_center
=
torch
.
cat
(
[
center
[
i_lvl
][:,
:
2
],
center
[
i_lvl
][:,
:
2
]],
dim
=
1
)
bbox
.
append
(
bbox_center
+
bbox_shift
[
i_img
].
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
))
bbox_list
.
append
(
bbox
)
cls_reg_targets_refine
=
self
.
get_targets
(
bbox_list
,
valid_flag_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
stage
=
'refine'
,
label_channels
=
label_channels
)
(
labels_list
,
label_weights_list
,
bbox_gt_list_refine
,
candidate_list_refine
,
bbox_weights_list_refine
,
num_total_pos_refine
,
num_total_neg_refine
)
=
cls_reg_targets_refine
num_total_samples_refine
=
(
num_total_pos_refine
+
num_total_neg_refine
if
self
.
sampling
else
num_total_pos_refine
)
# compute loss
losses_cls
,
losses_pts_init
,
losses_pts_refine
=
multi_apply
(
self
.
loss_single
,
cls_scores
,
pts_coordinate_preds_init
,
pts_coordinate_preds_refine
,
labels_list
,
label_weights_list
,
bbox_gt_list_init
,
bbox_weights_list_init
,
bbox_gt_list_refine
,
bbox_weights_list_refine
,
self
.
point_strides
,
num_total_samples_init
=
num_total_samples_init
,
num_total_samples_refine
=
num_total_samples_refine
)
loss_dict_all
=
{
'loss_cls'
:
losses_cls
,
'loss_pts_init'
:
losses_pts_init
,
'loss_pts_refine'
:
losses_pts_refine
}
return
loss_dict_all
def
get_bboxes
(
self
,
cls_scores
,
pts_preds_init
,
pts_preds_refine
,
img_metas
,
cfg
=
None
,
rescale
=
False
,
with_nms
=
True
):
assert
len
(
cls_scores
)
==
len
(
pts_preds_refine
)
device
=
cls_scores
[
0
].
device
bbox_preds_refine
=
[
self
.
points2bbox
(
pts_pred_refine
)
for
pts_pred_refine
in
pts_preds_refine
]
num_levels
=
len
(
cls_scores
)
mlvl_points
=
[
self
.
point_generators
[
i
].
grid_points
(
cls_scores
[
i
].
size
()[
-
2
:],
self
.
point_strides
[
i
],
device
)
for
i
in
range
(
num_levels
)
]
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
cls_score_list
=
[
cls_scores
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
bbox_pred_list
=
[
bbox_preds_refine
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
img_shape
=
img_metas
[
img_id
][
'img_shape'
]
scale_factor
=
img_metas
[
img_id
][
'scale_factor'
]
proposals
=
self
.
_get_bboxes_single
(
cls_score_list
,
bbox_pred_list
,
mlvl_points
,
img_shape
,
scale_factor
,
cfg
,
rescale
,
with_nms
)
result_list
.
append
(
proposals
)
return
result_list
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_points
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
,
with_nms
=
True
):
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
mlvl_points
)
mlvl_bboxes
=
[]
mlvl_scores
=
[]
for
i_lvl
,
(
cls_score
,
bbox_pred
,
points
)
in
enumerate
(
zip
(
cls_scores
,
bbox_preds
,
mlvl_points
)):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
cls_score
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
)
if
self
.
use_sigmoid_cls
:
scores
=
cls_score
.
sigmoid
()
else
:
scores
=
cls_score
.
softmax
(
-
1
)
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
if
self
.
use_sigmoid_cls
:
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
else
:
# remind that we set FG labels to [0, num_class-1]
# since mmdet v2.0
# BG cat_id: num_class
max_scores
,
_
=
scores
[:,
:
-
1
].
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
points
=
points
[
topk_inds
,
:]
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
bbox_pos_center
=
torch
.
cat
([
points
[:,
:
2
],
points
[:,
:
2
]],
dim
=
1
)
bboxes
=
bbox_pred
*
self
.
point_strides
[
i_lvl
]
+
bbox_pos_center
x1
=
bboxes
[:,
0
].
clamp
(
min
=
0
,
max
=
img_shape
[
1
])
y1
=
bboxes
[:,
1
].
clamp
(
min
=
0
,
max
=
img_shape
[
0
])
x2
=
bboxes
[:,
2
].
clamp
(
min
=
0
,
max
=
img_shape
[
1
])
y2
=
bboxes
[:,
3
].
clamp
(
min
=
0
,
max
=
img_shape
[
0
])
bboxes
=
torch
.
stack
([
x1
,
y1
,
x2
,
y2
],
dim
=-
1
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
if
rescale
:
mlvl_bboxes
/=
mlvl_bboxes
.
new_tensor
(
scale_factor
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
if
self
.
use_sigmoid_cls
:
# Add a dummy background class to the backend when using sigmoid
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
mlvl_scores
,
padding
],
dim
=
1
)
if
with_nms
:
det_bboxes
,
det_labels
=
multiclass_nms
(
mlvl_bboxes
,
mlvl_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
)
return
det_bboxes
,
det_labels
else
:
return
mlvl_bboxes
,
mlvl_scores
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/retina_head.py
0 → 100644
View file @
142dcf29
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
..builder
import
HEADS
from
.anchor_head
import
AnchorHead
@
HEADS
.
register_module
()
class
RetinaHead
(
AnchorHead
):
r
"""An anchor-based head used in `RetinaNet
<https://arxiv.org/pdf/1708.02002.pdf>`_.
The head contains two subnetworks. The first classifies anchor boxes and
the second regresses deltas for the anchors.
Example:
>>> import torch
>>> self = RetinaHead(11, 7)
>>> x = torch.rand(1, 7, 32, 32)
>>> cls_score, bbox_pred = self.forward_single(x)
>>> # Each anchor predicts a score for each class except background
>>> cls_per_anchor = cls_score.shape[1] / self.num_anchors
>>> box_per_anchor = bbox_pred.shape[1] / self.num_anchors
>>> assert cls_per_anchor == (self.num_classes)
>>> assert box_per_anchor == 4
"""
def
__init__
(
self
,
num_classes
,
in_channels
,
stacked_convs
=
4
,
conv_cfg
=
None
,
norm_cfg
=
None
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
8
,
16
,
32
,
64
,
128
]),
**
kwargs
):
self
.
stacked_convs
=
stacked_convs
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
super
(
RetinaHead
,
self
).
__init__
(
num_classes
,
in_channels
,
anchor_generator
=
anchor_generator
,
**
kwargs
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
self
.
cls_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
reg_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
retina_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
3
,
padding
=
1
)
self
.
retina_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
4
,
3
,
padding
=
1
)
def
init_weights
(
self
):
"""Initialize weights of the head."""
for
m
in
self
.
cls_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
for
m
in
self
.
reg_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
retina_cls
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
retina_reg
,
std
=
0.01
)
def
forward_single
(
self
,
x
):
"""Forward feature of a single scale level.
Args:
x (Tensor): Features of a single scale level.
Returns:
tuple:
cls_score (Tensor): Cls scores for a single scale level
the channels number is num_anchors * num_classes.
bbox_pred (Tensor): Box energies / deltas for a single scale
level, the channels number is num_anchors * 4.
"""
cls_feat
=
x
reg_feat
=
x
for
cls_conv
in
self
.
cls_convs
:
cls_feat
=
cls_conv
(
cls_feat
)
for
reg_conv
in
self
.
reg_convs
:
reg_feat
=
reg_conv
(
reg_feat
)
cls_score
=
self
.
retina_cls
(
cls_feat
)
bbox_pred
=
self
.
retina_reg
(
reg_feat
)
return
cls_score
,
bbox_pred
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/retina_sepbn_head.py
0 → 100644
View file @
142dcf29
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
..builder
import
HEADS
from
.anchor_head
import
AnchorHead
@
HEADS
.
register_module
()
class
RetinaSepBNHead
(
AnchorHead
):
""""RetinaHead with separate BN.
In RetinaHead, conv/norm layers are shared across different FPN levels,
while in RetinaSepBNHead, conv layers are shared across different FPN
levels, but BN layers are separated.
"""
def
__init__
(
self
,
num_classes
,
num_ins
,
in_channels
,
stacked_convs
=
4
,
conv_cfg
=
None
,
norm_cfg
=
None
,
**
kwargs
):
self
.
stacked_convs
=
stacked_convs
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
self
.
num_ins
=
num_ins
super
(
RetinaSepBNHead
,
self
).
__init__
(
num_classes
,
in_channels
,
**
kwargs
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
num_ins
):
cls_convs
=
nn
.
ModuleList
()
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
cls_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
reg_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
cls_convs
.
append
(
cls_convs
)
self
.
reg_convs
.
append
(
reg_convs
)
for
i
in
range
(
self
.
stacked_convs
):
for
j
in
range
(
1
,
self
.
num_ins
):
self
.
cls_convs
[
j
][
i
].
conv
=
self
.
cls_convs
[
0
][
i
].
conv
self
.
reg_convs
[
j
][
i
].
conv
=
self
.
reg_convs
[
0
][
i
].
conv
self
.
retina_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
3
,
padding
=
1
)
self
.
retina_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
4
,
3
,
padding
=
1
)
def
init_weights
(
self
):
"""Initialize weights of the head."""
for
m
in
self
.
cls_convs
[
0
]:
normal_init
(
m
.
conv
,
std
=
0.01
)
for
m
in
self
.
reg_convs
[
0
]:
normal_init
(
m
.
conv
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
retina_cls
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
retina_reg
,
std
=
0.01
)
def
forward
(
self
,
feats
):
"""Forward features from the upstream network.
Args:
feats (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
tuple: Usually a tuple of classification scores and bbox prediction
cls_scores (list[Tensor]): Classification scores for all scale
levels, each is a 4D-tensor, the channels number is
num_anchors * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for all scale
levels, each is a 4D-tensor, the channels number is
num_anchors * 4.
"""
cls_scores
=
[]
bbox_preds
=
[]
for
i
,
x
in
enumerate
(
feats
):
cls_feat
=
feats
[
i
]
reg_feat
=
feats
[
i
]
for
cls_conv
in
self
.
cls_convs
[
i
]:
cls_feat
=
cls_conv
(
cls_feat
)
for
reg_conv
in
self
.
reg_convs
[
i
]:
reg_feat
=
reg_conv
(
reg_feat
)
cls_score
=
self
.
retina_cls
(
cls_feat
)
bbox_pred
=
self
.
retina_reg
(
reg_feat
)
cls_scores
.
append
(
cls_score
)
bbox_preds
.
append
(
bbox_pred
)
return
cls_scores
,
bbox_preds
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/rpn_head.py
0 → 100644
View file @
142dcf29
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
mmcv.cnn
import
normal_init
from
mmcv.ops
import
batched_nms
from
..builder
import
HEADS
from
.anchor_head
import
AnchorHead
from
.rpn_test_mixin
import
RPNTestMixin
@
HEADS
.
register_module
()
class
RPNHead
(
RPNTestMixin
,
AnchorHead
):
"""RPN head.
Args:
in_channels (int): Number of channels in the input feature map.
"""
# noqa: W605
def
__init__
(
self
,
in_channels
,
**
kwargs
):
super
(
RPNHead
,
self
).
__init__
(
1
,
in_channels
,
**
kwargs
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
self
.
rpn_conv
=
nn
.
Conv2d
(
self
.
in_channels
,
self
.
feat_channels
,
3
,
padding
=
1
)
self
.
rpn_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
self
.
cls_out_channels
,
1
)
self
.
rpn_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
num_anchors
*
4
,
1
)
def
init_weights
(
self
):
"""Initialize weights of the head."""
normal_init
(
self
.
rpn_conv
,
std
=
0.01
)
normal_init
(
self
.
rpn_cls
,
std
=
0.01
)
normal_init
(
self
.
rpn_reg
,
std
=
0.01
)
def
forward_single
(
self
,
x
):
"""Forward feature map of a single scale level."""
x
=
self
.
rpn_conv
(
x
)
x
=
F
.
relu
(
x
,
inplace
=
True
)
rpn_cls_score
=
self
.
rpn_cls
(
x
)
rpn_bbox_pred
=
self
.
rpn_reg
(
x
)
return
rpn_cls_score
,
rpn_bbox_pred
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute losses of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_anchors * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_anchors * 4, H, W)
gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (None | list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
losses
=
super
(
RPNHead
,
self
).
loss
(
cls_scores
,
bbox_preds
,
gt_bboxes
,
None
,
img_metas
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
return
dict
(
loss_rpn_cls
=
losses
[
'loss_cls'
],
loss_rpn_bbox
=
losses
[
'loss_bbox'
])
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
):
"""Transform outputs for a single batch item into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (num_anchors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (num_anchors * 4, H, W).
mlvl_anchors (list[Tensor]): Box reference for each scale level
with shape (num_total_anchors, 4).
img_shape (tuple[int]): Shape of the input image,
(height, width, 3).
scale_factor (ndarray): Scale factor of the image arange as
(w_scale, h_scale, w_scale, h_scale).
cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Returns:
Tensor: Labeled boxes in shape (n, 5), where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1.
"""
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
# bboxes from different level should be independent during NMS,
# level_ids are used as labels for batched NMS to separate them
level_ids
=
[]
mlvl_scores
=
[]
mlvl_bbox_preds
=
[]
mlvl_valid_anchors
=
[]
for
idx
in
range
(
len
(
cls_scores
)):
rpn_cls_score
=
cls_scores
[
idx
]
rpn_bbox_pred
=
bbox_preds
[
idx
]
assert
rpn_cls_score
.
size
()[
-
2
:]
==
rpn_bbox_pred
.
size
()[
-
2
:]
rpn_cls_score
=
rpn_cls_score
.
permute
(
1
,
2
,
0
)
if
self
.
use_sigmoid_cls
:
rpn_cls_score
=
rpn_cls_score
.
reshape
(
-
1
)
scores
=
rpn_cls_score
.
sigmoid
()
else
:
rpn_cls_score
=
rpn_cls_score
.
reshape
(
-
1
,
2
)
# We set FG labels to [0, num_class-1] and BG label to
# num_class in RPN head since mmdet v2.5, which is unified to
# be consistent with other head since mmdet v2.0. In mmdet v2.0
# to v2.4 we keep BG label as 0 and FG label as 1 in rpn head.
scores
=
rpn_cls_score
.
softmax
(
dim
=
1
)[:,
0
]
rpn_bbox_pred
=
rpn_bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
4
)
anchors
=
mlvl_anchors
[
idx
]
if
cfg
.
nms_pre
>
0
and
scores
.
shape
[
0
]
>
cfg
.
nms_pre
:
# sort is faster than topk
# _, topk_inds = scores.topk(cfg.nms_pre)
ranked_scores
,
rank_inds
=
scores
.
sort
(
descending
=
True
)
topk_inds
=
rank_inds
[:
cfg
.
nms_pre
]
scores
=
ranked_scores
[:
cfg
.
nms_pre
]
rpn_bbox_pred
=
rpn_bbox_pred
[
topk_inds
,
:]
anchors
=
anchors
[
topk_inds
,
:]
mlvl_scores
.
append
(
scores
)
mlvl_bbox_preds
.
append
(
rpn_bbox_pred
)
mlvl_valid_anchors
.
append
(
anchors
)
level_ids
.
append
(
scores
.
new_full
((
scores
.
size
(
0
),
),
idx
,
dtype
=
torch
.
long
))
scores
=
torch
.
cat
(
mlvl_scores
)
anchors
=
torch
.
cat
(
mlvl_valid_anchors
)
rpn_bbox_pred
=
torch
.
cat
(
mlvl_bbox_preds
)
proposals
=
self
.
bbox_coder
.
decode
(
anchors
,
rpn_bbox_pred
,
max_shape
=
img_shape
)
ids
=
torch
.
cat
(
level_ids
)
if
cfg
.
min_bbox_size
>
0
:
w
=
proposals
[:,
2
]
-
proposals
[:,
0
]
h
=
proposals
[:,
3
]
-
proposals
[:,
1
]
valid_inds
=
torch
.
nonzero
(
(
w
>=
cfg
.
min_bbox_size
)
&
(
h
>=
cfg
.
min_bbox_size
),
as_tuple
=
False
).
squeeze
()
if
valid_inds
.
sum
().
item
()
!=
len
(
proposals
):
proposals
=
proposals
[
valid_inds
,
:]
scores
=
scores
[
valid_inds
]
ids
=
ids
[
valid_inds
]
# TODO: remove the hard coded nms type
nms_cfg
=
dict
(
type
=
'nms'
,
iou_threshold
=
cfg
.
nms_thr
)
dets
,
keep
=
batched_nms
(
proposals
,
scores
,
ids
,
nms_cfg
)
return
dets
[:
cfg
.
nms_post
]
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/rpn_test_mixin.py
0 → 100644
View file @
142dcf29
import
sys
from
mmdet.core
import
merge_aug_proposals
if
sys
.
version_info
>=
(
3
,
7
):
from
mmdet.utils.contextmanagers
import
completed
class
RPNTestMixin
(
object
):
"""Test methods of RPN."""
if
sys
.
version_info
>=
(
3
,
7
):
async
def
async_simple_test_rpn
(
self
,
x
,
img_metas
):
sleep_interval
=
self
.
test_cfg
.
pop
(
'async_sleep_interval'
,
0.025
)
async
with
completed
(
__name__
,
'rpn_head_forward'
,
sleep_interval
=
sleep_interval
):
rpn_outs
=
self
(
x
)
proposal_list
=
self
.
get_bboxes
(
*
rpn_outs
,
img_metas
)
return
proposal_list
def
simple_test_rpn
(
self
,
x
,
img_metas
):
"""Test without augmentation.
Args:
x (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
img_metas (list[dict]): Meta info of each image.
Returns:
list[Tensor]: Proposals of each image.
"""
rpn_outs
=
self
(
x
)
proposal_list
=
self
.
get_bboxes
(
*
rpn_outs
,
img_metas
)
return
proposal_list
def
aug_test_rpn
(
self
,
feats
,
img_metas
):
samples_per_gpu
=
len
(
img_metas
[
0
])
aug_proposals
=
[[]
for
_
in
range
(
samples_per_gpu
)]
for
x
,
img_meta
in
zip
(
feats
,
img_metas
):
proposal_list
=
self
.
simple_test_rpn
(
x
,
img_meta
)
for
i
,
proposals
in
enumerate
(
proposal_list
):
aug_proposals
[
i
].
append
(
proposals
)
# reorganize the order of 'img_metas' to match the dimensions
# of 'aug_proposals'
aug_img_metas
=
[]
for
i
in
range
(
samples_per_gpu
):
aug_img_meta
=
[]
for
j
in
range
(
len
(
img_metas
)):
aug_img_meta
.
append
(
img_metas
[
j
][
i
])
aug_img_metas
.
append
(
aug_img_meta
)
# after merging, proposals will be rescaled to the original image size
merged_proposals
=
[
merge_aug_proposals
(
proposals
,
aug_img_meta
,
self
.
test_cfg
)
for
proposals
,
aug_img_meta
in
zip
(
aug_proposals
,
aug_img_metas
)
]
return
merged_proposals
PyTorch/NLP/Conformer-main/mmdetection/mmdet/models/dense_heads/sabl_retina_head.py
0 → 100644
View file @
142dcf29
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
mmcv.runner
import
force_fp32
from
mmdet.core
import
(
build_anchor_generator
,
build_assigner
,
build_bbox_coder
,
build_sampler
,
images_to_levels
,
multi_apply
,
multiclass_nms
,
unmap
)
from
..builder
import
HEADS
,
build_loss
from
.base_dense_head
import
BaseDenseHead
from
.guided_anchor_head
import
GuidedAnchorHead
@
HEADS
.
register_module
()
class
SABLRetinaHead
(
BaseDenseHead
):
"""Side-Aware Boundary Localization (SABL) for RetinaNet.
The anchor generation, assigning and sampling in SABLRetinaHead
are the same as GuidedAnchorHead for guided anchoring.
Please refer to https://arxiv.org/abs/1912.04260 for more details.
Args:
num_classes (int): Number of classes.
in_channels (int): Number of channels in the input feature map.
stacked_convs (int): Number of Convs for classification
\
and regression branches. Defaults to 4.
feat_channels (int): Number of hidden channels.
\
Defaults to 256.
approx_anchor_generator (dict): Config dict for approx generator.
square_anchor_generator (dict): Config dict for square generator.
conv_cfg (dict): Config dict for ConvModule. Defaults to None.
norm_cfg (dict): Config dict for Norm Layer. Defaults to None.
bbox_coder (dict): Config dict for bbox coder.
reg_decoded_bbox (bool): If true, the regression loss would be
applied directly on decoded bounding boxes, converting both
the predicted boxes and regression targets to absolute
coordinates format. Default False. It should be `True` when
using `IoULoss`, `GIoULoss`, or `DIoULoss` in the bbox head.
train_cfg (dict): Training config of SABLRetinaHead.
test_cfg (dict): Testing config of SABLRetinaHead.
loss_cls (dict): Config of classification loss.
loss_bbox_cls (dict): Config of classification loss for bbox branch.
loss_bbox_reg (dict): Config of regression loss for bbox branch.
"""
def
__init__
(
self
,
num_classes
,
in_channels
,
stacked_convs
=
4
,
feat_channels
=
256
,
approx_anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
8
,
16
,
32
,
64
,
128
]),
square_anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
ratios
=
[
1.0
],
scales
=
[
4
],
strides
=
[
8
,
16
,
32
,
64
,
128
]),
conv_cfg
=
None
,
norm_cfg
=
None
,
bbox_coder
=
dict
(
type
=
'BucketingBBoxCoder'
,
num_buckets
=
14
,
scale_factor
=
3.0
),
reg_decoded_bbox
=
False
,
train_cfg
=
None
,
test_cfg
=
None
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.5
),
loss_bbox_reg
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.5
)):
super
(
SABLRetinaHead
,
self
).
__init__
()
self
.
in_channels
=
in_channels
self
.
num_classes
=
num_classes
self
.
feat_channels
=
feat_channels
self
.
num_buckets
=
bbox_coder
[
'num_buckets'
]
self
.
side_num
=
int
(
np
.
ceil
(
self
.
num_buckets
/
2
))
assert
(
approx_anchor_generator
[
'octave_base_scale'
]
==
square_anchor_generator
[
'scales'
][
0
])
assert
(
approx_anchor_generator
[
'strides'
]
==
square_anchor_generator
[
'strides'
])
self
.
approx_anchor_generator
=
build_anchor_generator
(
approx_anchor_generator
)
self
.
square_anchor_generator
=
build_anchor_generator
(
square_anchor_generator
)
self
.
approxs_per_octave
=
(
self
.
approx_anchor_generator
.
num_base_anchors
[
0
])
# one anchor per location
self
.
num_anchors
=
1
self
.
stacked_convs
=
stacked_convs
self
.
conv_cfg
=
conv_cfg
self
.
norm_cfg
=
norm_cfg
self
.
reg_decoded_bbox
=
reg_decoded_bbox
self
.
use_sigmoid_cls
=
loss_cls
.
get
(
'use_sigmoid'
,
False
)
self
.
sampling
=
loss_cls
[
'type'
]
not
in
[
'FocalLoss'
,
'GHMC'
,
'QualityFocalLoss'
]
if
self
.
use_sigmoid_cls
:
self
.
cls_out_channels
=
num_classes
else
:
self
.
cls_out_channels
=
num_classes
+
1
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
self
.
loss_cls
=
build_loss
(
loss_cls
)
self
.
loss_bbox_cls
=
build_loss
(
loss_bbox_cls
)
self
.
loss_bbox_reg
=
build_loss
(
loss_bbox_reg
)
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
if
self
.
train_cfg
:
self
.
assigner
=
build_assigner
(
self
.
train_cfg
.
assigner
)
# use PseudoSampler when sampling is False
if
self
.
sampling
and
hasattr
(
self
.
train_cfg
,
'sampler'
):
sampler_cfg
=
self
.
train_cfg
.
sampler
else
:
sampler_cfg
=
dict
(
type
=
'PseudoSampler'
)
self
.
sampler
=
build_sampler
(
sampler_cfg
,
context
=
self
)
self
.
fp16_enabled
=
False
self
.
_init_layers
()
def
_init_layers
(
self
):
self
.
relu
=
nn
.
ReLU
(
inplace
=
True
)
self
.
cls_convs
=
nn
.
ModuleList
()
self
.
reg_convs
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
stacked_convs
):
chn
=
self
.
in_channels
if
i
==
0
else
self
.
feat_channels
self
.
cls_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
reg_convs
.
append
(
ConvModule
(
chn
,
self
.
feat_channels
,
3
,
stride
=
1
,
padding
=
1
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
))
self
.
retina_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
cls_out_channels
,
3
,
padding
=
1
)
self
.
retina_bbox_reg
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
side_num
*
4
,
3
,
padding
=
1
)
self
.
retina_bbox_cls
=
nn
.
Conv2d
(
self
.
feat_channels
,
self
.
side_num
*
4
,
3
,
padding
=
1
)
def
init_weights
(
self
):
for
m
in
self
.
cls_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
for
m
in
self
.
reg_convs
:
normal_init
(
m
.
conv
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
retina_cls
,
std
=
0.01
,
bias
=
bias_cls
)
normal_init
(
self
.
retina_bbox_reg
,
std
=
0.01
)
normal_init
(
self
.
retina_bbox_cls
,
std
=
0.01
)
def
forward_single
(
self
,
x
):
cls_feat
=
x
reg_feat
=
x
for
cls_conv
in
self
.
cls_convs
:
cls_feat
=
cls_conv
(
cls_feat
)
for
reg_conv
in
self
.
reg_convs
:
reg_feat
=
reg_conv
(
reg_feat
)
cls_score
=
self
.
retina_cls
(
cls_feat
)
bbox_cls_pred
=
self
.
retina_bbox_cls
(
reg_feat
)
bbox_reg_pred
=
self
.
retina_bbox_reg
(
reg_feat
)
bbox_pred
=
(
bbox_cls_pred
,
bbox_reg_pred
)
return
cls_score
,
bbox_pred
def
forward
(
self
,
feats
):
return
multi_apply
(
self
.
forward_single
,
feats
)
def
get_anchors
(
self
,
featmap_sizes
,
img_metas
,
device
=
'cuda'
):
"""Get squares according to feature map sizes and guided anchors.
Args:
featmap_sizes (list[tuple]): Multi-level feature map sizes.
img_metas (list[dict]): Image meta info.
device (torch.device | str): device for returned tensors
Returns:
tuple: square approxs of each image
"""
num_imgs
=
len
(
img_metas
)
# since feature map sizes of all images are the same, we only compute
# squares for one time
multi_level_squares
=
self
.
square_anchor_generator
.
grid_anchors
(
featmap_sizes
,
device
=
device
)
squares_list
=
[
multi_level_squares
for
_
in
range
(
num_imgs
)]
return
squares_list
def
get_target
(
self
,
approx_list
,
inside_flag_list
,
square_list
,
gt_bboxes_list
,
img_metas
,
gt_bboxes_ignore_list
=
None
,
gt_labels_list
=
None
,
label_channels
=
None
,
sampling
=
True
,
unmap_outputs
=
True
):
"""Compute bucketing targets.
Args:
approx_list (list[list]): Multi level approxs of each image.
inside_flag_list (list[list]): Multi level inside flags of each
image.
square_list (list[list]): Multi level squares of each image.
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image.
img_metas (list[dict]): Meta info of each image.
gt_bboxes_ignore_list (list[Tensor]): ignore list of gt bboxes.
gt_bboxes_list (list[Tensor]): Gt bboxes of each image.
label_channels (int): Channel of label.
sampling (bool): Sample Anchors or not.
unmap_outputs (bool): unmap outputs or not.
Returns:
tuple: Returns a tuple containing learning targets.
- labels_list (list[Tensor]): Labels of each level.
- label_weights_list (list[Tensor]): Label weights of each
\
level.
- bbox_cls_targets_list (list[Tensor]): BBox cls targets of
\
each level.
- bbox_cls_weights_list (list[Tensor]): BBox cls weights of
\
each level.
- bbox_reg_targets_list (list[Tensor]): BBox reg targets of
\
each level.
- bbox_reg_weights_list (list[Tensor]): BBox reg weights of
\
each level.
- num_total_pos (int): Number of positive samples in all
\
images.
- num_total_neg (int): Number of negative samples in all
\
images.
"""
num_imgs
=
len
(
img_metas
)
assert
len
(
approx_list
)
==
len
(
inside_flag_list
)
==
len
(
square_list
)
==
num_imgs
# anchor number of multi levels
num_level_squares
=
[
squares
.
size
(
0
)
for
squares
in
square_list
[
0
]]
# concat all level anchors and flags to a single tensor
inside_flag_flat_list
=
[]
approx_flat_list
=
[]
square_flat_list
=
[]
for
i
in
range
(
num_imgs
):
assert
len
(
square_list
[
i
])
==
len
(
inside_flag_list
[
i
])
inside_flag_flat_list
.
append
(
torch
.
cat
(
inside_flag_list
[
i
]))
approx_flat_list
.
append
(
torch
.
cat
(
approx_list
[
i
]))
square_flat_list
.
append
(
torch
.
cat
(
square_list
[
i
]))
# compute targets for each image
if
gt_bboxes_ignore_list
is
None
:
gt_bboxes_ignore_list
=
[
None
for
_
in
range
(
num_imgs
)]
if
gt_labels_list
is
None
:
gt_labels_list
=
[
None
for
_
in
range
(
num_imgs
)]
(
all_labels
,
all_label_weights
,
all_bbox_cls_targets
,
all_bbox_cls_weights
,
all_bbox_reg_targets
,
all_bbox_reg_weights
,
pos_inds_list
,
neg_inds_list
)
=
multi_apply
(
self
.
_get_target_single
,
approx_flat_list
,
inside_flag_flat_list
,
square_flat_list
,
gt_bboxes_list
,
gt_bboxes_ignore_list
,
gt_labels_list
,
img_metas
,
label_channels
=
label_channels
,
sampling
=
sampling
,
unmap_outputs
=
unmap_outputs
)
# no valid anchors
if
any
([
labels
is
None
for
labels
in
all_labels
]):
return
None
# sampled anchors of all images
num_total_pos
=
sum
([
max
(
inds
.
numel
(),
1
)
for
inds
in
pos_inds_list
])
num_total_neg
=
sum
([
max
(
inds
.
numel
(),
1
)
for
inds
in
neg_inds_list
])
# split targets to a list w.r.t. multiple levels
labels_list
=
images_to_levels
(
all_labels
,
num_level_squares
)
label_weights_list
=
images_to_levels
(
all_label_weights
,
num_level_squares
)
bbox_cls_targets_list
=
images_to_levels
(
all_bbox_cls_targets
,
num_level_squares
)
bbox_cls_weights_list
=
images_to_levels
(
all_bbox_cls_weights
,
num_level_squares
)
bbox_reg_targets_list
=
images_to_levels
(
all_bbox_reg_targets
,
num_level_squares
)
bbox_reg_weights_list
=
images_to_levels
(
all_bbox_reg_weights
,
num_level_squares
)
return
(
labels_list
,
label_weights_list
,
bbox_cls_targets_list
,
bbox_cls_weights_list
,
bbox_reg_targets_list
,
bbox_reg_weights_list
,
num_total_pos
,
num_total_neg
)
def
_get_target_single
(
self
,
flat_approxs
,
inside_flags
,
flat_squares
,
gt_bboxes
,
gt_bboxes_ignore
,
gt_labels
,
img_meta
,
label_channels
=
None
,
sampling
=
True
,
unmap_outputs
=
True
):
"""Compute regression and classification targets for anchors in a
single image.
Args:
flat_approxs (Tensor): flat approxs of a single image,
shape (n, 4)
inside_flags (Tensor): inside flags of a single image,
shape (n, ).
flat_squares (Tensor): flat squares of a single image,
shape (approxs_per_octave * n, 4)
gt_bboxes (Tensor): Ground truth bboxes of a single image,
\
shape (num_gts, 4).
gt_bboxes_ignore (Tensor): Ground truth bboxes to be
ignored, shape (num_ignored_gts, 4).
gt_labels (Tensor): Ground truth labels of each box,
shape (num_gts,).
img_meta (dict): Meta info of the image.
label_channels (int): Channel of label.
sampling (bool): Sample Anchors or not.
unmap_outputs (bool): unmap outputs or not.
Returns:
tuple:
- labels_list (Tensor): Labels in a single image
- label_weights (Tensor): Label weights in a single image
- bbox_cls_targets (Tensor): BBox cls targets in a single image
- bbox_cls_weights (Tensor): BBox cls weights in a single image
- bbox_reg_targets (Tensor): BBox reg targets in a single image
- bbox_reg_weights (Tensor): BBox reg weights in a single image
- num_total_pos (int): Number of positive samples
\
in a single image
- num_total_neg (int): Number of negative samples
\
in a single image
"""
if
not
inside_flags
.
any
():
return
(
None
,
)
*
8
# assign gt and sample anchors
expand_inside_flags
=
inside_flags
[:,
None
].
expand
(
-
1
,
self
.
approxs_per_octave
).
reshape
(
-
1
)
approxs
=
flat_approxs
[
expand_inside_flags
,
:]
squares
=
flat_squares
[
inside_flags
,
:]
assign_result
=
self
.
assigner
.
assign
(
approxs
,
squares
,
self
.
approxs_per_octave
,
gt_bboxes
,
gt_bboxes_ignore
)
sampling_result
=
self
.
sampler
.
sample
(
assign_result
,
squares
,
gt_bboxes
)
num_valid_squares
=
squares
.
shape
[
0
]
bbox_cls_targets
=
squares
.
new_zeros
(
(
num_valid_squares
,
self
.
side_num
*
4
))
bbox_cls_weights
=
squares
.
new_zeros
(
(
num_valid_squares
,
self
.
side_num
*
4
))
bbox_reg_targets
=
squares
.
new_zeros
(
(
num_valid_squares
,
self
.
side_num
*
4
))
bbox_reg_weights
=
squares
.
new_zeros
(
(
num_valid_squares
,
self
.
side_num
*
4
))
labels
=
squares
.
new_full
((
num_valid_squares
,
),
self
.
num_classes
,
dtype
=
torch
.
long
)
label_weights
=
squares
.
new_zeros
(
num_valid_squares
,
dtype
=
torch
.
float
)
pos_inds
=
sampling_result
.
pos_inds
neg_inds
=
sampling_result
.
neg_inds
if
len
(
pos_inds
)
>
0
:
(
pos_bbox_reg_targets
,
pos_bbox_reg_weights
,
pos_bbox_cls_targets
,
pos_bbox_cls_weights
)
=
self
.
bbox_coder
.
encode
(
sampling_result
.
pos_bboxes
,
sampling_result
.
pos_gt_bboxes
)
bbox_cls_targets
[
pos_inds
,
:]
=
pos_bbox_cls_targets
bbox_reg_targets
[
pos_inds
,
:]
=
pos_bbox_reg_targets
bbox_cls_weights
[
pos_inds
,
:]
=
pos_bbox_cls_weights
bbox_reg_weights
[
pos_inds
,
:]
=
pos_bbox_reg_weights
if
gt_labels
is
None
:
# Only rpn gives gt_labels as None
# Foreground is the first class
labels
[
pos_inds
]
=
0
else
:
labels
[
pos_inds
]
=
gt_labels
[
sampling_result
.
pos_assigned_gt_inds
]
if
self
.
train_cfg
.
pos_weight
<=
0
:
label_weights
[
pos_inds
]
=
1.0
else
:
label_weights
[
pos_inds
]
=
self
.
train_cfg
.
pos_weight
if
len
(
neg_inds
)
>
0
:
label_weights
[
neg_inds
]
=
1.0
# map up to original set of anchors
if
unmap_outputs
:
num_total_anchors
=
flat_squares
.
size
(
0
)
labels
=
unmap
(
labels
,
num_total_anchors
,
inside_flags
,
fill
=
self
.
num_classes
)
label_weights
=
unmap
(
label_weights
,
num_total_anchors
,
inside_flags
)
bbox_cls_targets
=
unmap
(
bbox_cls_targets
,
num_total_anchors
,
inside_flags
)
bbox_cls_weights
=
unmap
(
bbox_cls_weights
,
num_total_anchors
,
inside_flags
)
bbox_reg_targets
=
unmap
(
bbox_reg_targets
,
num_total_anchors
,
inside_flags
)
bbox_reg_weights
=
unmap
(
bbox_reg_weights
,
num_total_anchors
,
inside_flags
)
return
(
labels
,
label_weights
,
bbox_cls_targets
,
bbox_cls_weights
,
bbox_reg_targets
,
bbox_reg_weights
,
pos_inds
,
neg_inds
)
def
loss_single
(
self
,
cls_score
,
bbox_pred
,
labels
,
label_weights
,
bbox_cls_targets
,
bbox_cls_weights
,
bbox_reg_targets
,
bbox_reg_weights
,
num_total_samples
):
# classification loss
labels
=
labels
.
reshape
(
-
1
)
label_weights
=
label_weights
.
reshape
(
-
1
)
cls_score
=
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
cls_out_channels
)
loss_cls
=
self
.
loss_cls
(
cls_score
,
labels
,
label_weights
,
avg_factor
=
num_total_samples
)
# regression loss
bbox_cls_targets
=
bbox_cls_targets
.
reshape
(
-
1
,
self
.
side_num
*
4
)
bbox_cls_weights
=
bbox_cls_weights
.
reshape
(
-
1
,
self
.
side_num
*
4
)
bbox_reg_targets
=
bbox_reg_targets
.
reshape
(
-
1
,
self
.
side_num
*
4
)
bbox_reg_weights
=
bbox_reg_weights
.
reshape
(
-
1
,
self
.
side_num
*
4
)
(
bbox_cls_pred
,
bbox_reg_pred
)
=
bbox_pred
bbox_cls_pred
=
bbox_cls_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
side_num
*
4
)
bbox_reg_pred
=
bbox_reg_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
side_num
*
4
)
loss_bbox_cls
=
self
.
loss_bbox_cls
(
bbox_cls_pred
,
bbox_cls_targets
.
long
(),
bbox_cls_weights
,
avg_factor
=
num_total_samples
*
4
*
self
.
side_num
)
loss_bbox_reg
=
self
.
loss_bbox_reg
(
bbox_reg_pred
,
bbox_reg_targets
,
bbox_reg_weights
,
avg_factor
=
num_total_samples
*
4
*
self
.
bbox_coder
.
offset_topk
)
return
loss_cls
,
loss_bbox_cls
,
loss_bbox_reg
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
assert
len
(
featmap_sizes
)
==
self
.
approx_anchor_generator
.
num_levels
device
=
cls_scores
[
0
].
device
# get sampled approxes
approxs_list
,
inside_flag_list
=
GuidedAnchorHead
.
get_sampled_approxs
(
self
,
featmap_sizes
,
img_metas
,
device
=
device
)
square_list
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
,
device
=
device
)
label_channels
=
self
.
cls_out_channels
if
self
.
use_sigmoid_cls
else
1
cls_reg_targets
=
self
.
get_target
(
approxs_list
,
inside_flag_list
,
square_list
,
gt_bboxes
,
img_metas
,
gt_bboxes_ignore_list
=
gt_bboxes_ignore
,
gt_labels_list
=
gt_labels
,
label_channels
=
label_channels
,
sampling
=
self
.
sampling
)
if
cls_reg_targets
is
None
:
return
None
(
labels_list
,
label_weights_list
,
bbox_cls_targets_list
,
bbox_cls_weights_list
,
bbox_reg_targets_list
,
bbox_reg_weights_list
,
num_total_pos
,
num_total_neg
)
=
cls_reg_targets
num_total_samples
=
(
num_total_pos
+
num_total_neg
if
self
.
sampling
else
num_total_pos
)
losses_cls
,
losses_bbox_cls
,
losses_bbox_reg
=
multi_apply
(
self
.
loss_single
,
cls_scores
,
bbox_preds
,
labels_list
,
label_weights_list
,
bbox_cls_targets_list
,
bbox_cls_weights_list
,
bbox_reg_targets_list
,
bbox_reg_weights_list
,
num_total_samples
=
num_total_samples
)
return
dict
(
loss_cls
=
losses_cls
,
loss_bbox_cls
=
losses_bbox_cls
,
loss_bbox_reg
=
losses_bbox_reg
)
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
))
def
get_bboxes
(
self
,
cls_scores
,
bbox_preds
,
img_metas
,
cfg
=
None
,
rescale
=
False
):
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
num_levels
=
len
(
cls_scores
)
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
device
=
cls_scores
[
0
].
device
mlvl_anchors
=
self
.
get_anchors
(
featmap_sizes
,
img_metas
,
device
=
device
)
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
cls_score_list
=
[
cls_scores
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
bbox_cls_pred_list
=
[
bbox_preds
[
i
][
0
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
bbox_reg_pred_list
=
[
bbox_preds
[
i
][
1
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
img_shape
=
img_metas
[
img_id
][
'img_shape'
]
scale_factor
=
img_metas
[
img_id
][
'scale_factor'
]
proposals
=
self
.
get_bboxes_single
(
cls_score_list
,
bbox_cls_pred_list
,
bbox_reg_pred_list
,
mlvl_anchors
[
img_id
],
img_shape
,
scale_factor
,
cfg
,
rescale
)
result_list
.
append
(
proposals
)
return
result_list
def
get_bboxes_single
(
self
,
cls_scores
,
bbox_cls_preds
,
bbox_reg_preds
,
mlvl_anchors
,
img_shape
,
scale_factor
,
cfg
,
rescale
=
False
):
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
mlvl_bboxes
=
[]
mlvl_scores
=
[]
mlvl_confids
=
[]
assert
len
(
cls_scores
)
==
len
(
bbox_cls_preds
)
==
len
(
bbox_reg_preds
)
==
len
(
mlvl_anchors
)
for
cls_score
,
bbox_cls_pred
,
bbox_reg_pred
,
anchors
in
zip
(
cls_scores
,
bbox_cls_preds
,
bbox_reg_preds
,
mlvl_anchors
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_cls_pred
.
size
(
)[
-
2
:]
==
bbox_reg_pred
.
size
()[
-
2
::]
cls_score
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
)
if
self
.
use_sigmoid_cls
:
scores
=
cls_score
.
sigmoid
()
else
:
scores
=
cls_score
.
softmax
(
-
1
)
bbox_cls_pred
=
bbox_cls_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
side_num
*
4
)
bbox_reg_pred
=
bbox_reg_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
side_num
*
4
)
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
if
self
.
use_sigmoid_cls
:
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
else
:
max_scores
,
_
=
scores
[:,
:
-
1
].
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
anchors
=
anchors
[
topk_inds
,
:]
bbox_cls_pred
=
bbox_cls_pred
[
topk_inds
,
:]
bbox_reg_pred
=
bbox_reg_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
bbox_preds
=
[
bbox_cls_pred
.
contiguous
(),
bbox_reg_pred
.
contiguous
()
]
bboxes
,
confids
=
self
.
bbox_coder
.
decode
(
anchors
.
contiguous
(),
bbox_preds
,
max_shape
=
img_shape
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_confids
.
append
(
confids
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
if
rescale
:
mlvl_bboxes
/=
mlvl_bboxes
.
new_tensor
(
scale_factor
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
mlvl_confids
=
torch
.
cat
(
mlvl_confids
)
if
self
.
use_sigmoid_cls
:
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
mlvl_scores
,
padding
],
dim
=
1
)
det_bboxes
,
det_labels
=
multiclass_nms
(
mlvl_bboxes
,
mlvl_scores
,
cfg
.
score_thr
,
cfg
.
nms
,
cfg
.
max_per_img
,
score_factors
=
mlvl_confids
)
return
det_bboxes
,
det_labels
Prev
1
…
9
10
11
12
13
14
15
16
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment