Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
32a4328b
Unverified
Commit
32a4328b
authored
Feb 24, 2022
by
Wenwei Zhang
Committed by
GitHub
Feb 24, 2022
Browse files
Bump version to V1.0.0rc0
Bump version to V1.0.0rc0
parents
86cc487c
a8817998
Changes
414
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1145 additions
and
246 deletions
+1145
-246
mmdet3d/models/backbones/nostem_regnet.py
mmdet3d/models/backbones/nostem_regnet.py
+1
-1
mmdet3d/models/backbones/pointnet2_sa_msg.py
mmdet3d/models/backbones/pointnet2_sa_msg.py
+6
-2
mmdet3d/models/backbones/pointnet2_sa_ssg.py
mmdet3d/models/backbones/pointnet2_sa_ssg.py
+3
-3
mmdet3d/models/backbones/second.py
mmdet3d/models/backbones/second.py
+1
-0
mmdet3d/models/builder.py
mmdet3d/models/builder.py
+1
-0
mmdet3d/models/decode_heads/__init__.py
mmdet3d/models/decode_heads/__init__.py
+2
-1
mmdet3d/models/decode_heads/decode_head.py
mmdet3d/models/decode_heads/decode_head.py
+11
-9
mmdet3d/models/decode_heads/dgcnn_head.py
mmdet3d/models/decode_heads/dgcnn_head.py
+67
-0
mmdet3d/models/decode_heads/paconv_head.py
mmdet3d/models/decode_heads/paconv_head.py
+1
-1
mmdet3d/models/decode_heads/pointnet2_head.py
mmdet3d/models/decode_heads/pointnet2_head.py
+1
-1
mmdet3d/models/dense_heads/__init__.py
mmdet3d/models/dense_heads/__init__.py
+6
-1
mmdet3d/models/dense_heads/anchor3d_head.py
mmdet3d/models/dense_heads/anchor3d_head.py
+18
-14
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
+81
-47
mmdet3d/models/dense_heads/base_mono3d_dense_head.py
mmdet3d/models/dense_heads/base_mono3d_dense_head.py
+1
-0
mmdet3d/models/dense_heads/centerpoint_head.py
mmdet3d/models/dense_heads/centerpoint_head.py
+57
-56
mmdet3d/models/dense_heads/fcos_mono3d_head.py
mmdet3d/models/dense_heads/fcos_mono3d_head.py
+87
-82
mmdet3d/models/dense_heads/free_anchor3d_head.py
mmdet3d/models/dense_heads/free_anchor3d_head.py
+1
-0
mmdet3d/models/dense_heads/groupfree3d_head.py
mmdet3d/models/dense_heads/groupfree3d_head.py
+19
-18
mmdet3d/models/dense_heads/monoflex_head.py
mmdet3d/models/dense_heads/monoflex_head.py
+771
-0
mmdet3d/models/dense_heads/parta2_rpn_head.py
mmdet3d/models/dense_heads/parta2_rpn_head.py
+10
-10
No files found.
mmdet3d/models/backbones/nostem_regnet.py
View file @
32a4328b
...
...
@@ -16,7 +16,7 @@ class NoStemRegNet(RegNet):
- wm (float): Quantization parameter to quantize the width.
- depth (int): Depth of the backbone.
- group_w (int): Width of group.
- bot_mul (float): Bottleneck ratio, i.e. expansion of bottlneck.
- bot_mul (float): Bottleneck ratio, i.e. expansion of bottl
e
neck.
strides (Sequence[int]): Strides of the first block of each stage.
base_channels (int): Base channels after stem layer.
in_channels (int): Number of input image channels. Normally 3.
...
...
mmdet3d/models/backbones/pointnet2_sa_msg.py
View file @
32a4328b
...
...
@@ -64,7 +64,11 @@ class PointNet2SAMSG(BasePointNet):
self
.
out_indices
=
out_indices
assert
max
(
out_indices
)
<
self
.
num_sa
assert
len
(
num_points
)
==
len
(
radii
)
==
len
(
num_samples
)
==
len
(
sa_channels
)
==
len
(
aggregation_channels
)
sa_channels
)
if
aggregation_channels
is
not
None
:
assert
len
(
sa_channels
)
==
len
(
aggregation_channels
)
else
:
aggregation_channels
=
[
None
]
*
len
(
sa_channels
)
self
.
SA_modules
=
nn
.
ModuleList
()
self
.
aggregation_mlps
=
nn
.
ModuleList
()
...
...
@@ -134,7 +138,7 @@ class PointNet2SAMSG(BasePointNet):
- sa_xyz (torch.Tensor): The coordinates of sa features.
- sa_features (torch.Tensor): The features from the
last Set Aggregation Layers.
- sa_indices (torch.Tensor): Indices of the
\
- sa_indices (torch.Tensor): Indices of the
input points.
"""
xyz
,
features
=
self
.
_split_point_feats
(
points
)
...
...
mmdet3d/models/backbones/pointnet2_sa_ssg.py
View file @
32a4328b
...
...
@@ -97,11 +97,11 @@ class PointNet2SASSG(BasePointNet):
Returns:
dict[str, list[torch.Tensor]]: Outputs after SA and FP modules.
- fp_xyz (list[torch.Tensor]): The coordinates of
\
- fp_xyz (list[torch.Tensor]): The coordinates of
each fp features.
- fp_features (list[torch.Tensor]): The features
\
- fp_features (list[torch.Tensor]): The features
from each Feature Propagate Layers.
- fp_indices (list[torch.Tensor]): Indices of the
\
- fp_indices (list[torch.Tensor]): Indices of the
input points.
"""
xyz
,
features
=
self
.
_split_point_feats
(
points
)
...
...
mmdet3d/models/backbones/second.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
from
mmcv.cnn
import
build_conv_layer
,
build_norm_layer
from
mmcv.runner
import
BaseModule
from
torch
import
nn
as
nn
...
...
mmdet3d/models/builder.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
from
mmcv.cnn
import
MODELS
as
MMCV_MODELS
from
mmcv.utils
import
Registry
...
...
mmdet3d/models/decode_heads/__init__.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
from
.dgcnn_head
import
DGCNNHead
from
.paconv_head
import
PAConvHead
from
.pointnet2_head
import
PointNet2Head
__all__
=
[
'PointNet2Head'
,
'PAConvHead'
]
__all__
=
[
'PointNet2Head'
,
'DGCNNHead'
,
'PAConvHead'
]
mmdet3d/models/decode_heads/decode_head.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
from
abc
import
ABCMeta
,
abstractmethod
from
mmcv.cnn
import
normal_init
from
mmcv.runner
import
BaseModule
,
auto_fp16
,
force_fp32
from
torch
import
nn
as
nn
...
...
@@ -13,17 +14,18 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
Args:
channels (int): Channels after modules, before conv_seg.
num_classes (int): Number of classes.
dropout_ratio (float): Ratio of dropout layer. Default: 0.5.
conv_cfg (dict
|None
): Config of conv layers.
dropout_ratio (float
, optional
): Ratio of dropout layer. Default: 0.5.
conv_cfg (dict
, optional
): Config of conv layers.
Default: dict(type='Conv1d').
norm_cfg (dict
|None
): Config of norm layers.
norm_cfg (dict
, optional
): Config of norm layers.
Default: dict(type='BN1d').
act_cfg (dict): Config of activation layers.
act_cfg (dict
, optional
): Config of activation layers.
Default: dict(type='ReLU').
loss_decode (dict): Config of decode loss.
loss_decode (dict
, optional
): Config of decode loss.
Default: dict(type='CrossEntropyLoss').
ignore_index (int | None): The label index to be ignored. When using
masked BCE loss, ignore_index should be set to None. Default: 255.
ignore_index (int, optional): The label index to be ignored.
When using masked BCE loss, ignore_index should be set to None.
Default: 255.
"""
def
__init__
(
self
,
...
...
@@ -110,9 +112,9 @@ class Base3DDecodeHead(BaseModule, metaclass=ABCMeta):
"""Compute semantic segmentation loss.
Args:
seg_logit (torch.Tensor): Predicted per-point segmentation logits
\
seg_logit (torch.Tensor): Predicted per-point segmentation logits
of shape [B, num_classes, N].
seg_label (torch.Tensor): Ground-truth segmentation label of
\
seg_label (torch.Tensor): Ground-truth segmentation label of
shape [B, N].
"""
loss
=
dict
()
...
...
mmdet3d/models/decode_heads/dgcnn_head.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
from
mmcv.cnn.bricks
import
ConvModule
from
mmdet3d.ops
import
DGCNNFPModule
from
mmdet.models
import
HEADS
from
.decode_head
import
Base3DDecodeHead
@
HEADS
.
register_module
()
class
DGCNNHead
(
Base3DDecodeHead
):
r
"""DGCNN decoder head.
Decoder head used in `DGCNN <https://arxiv.org/abs/1801.07829>`_.
Refer to the
`reimplementation code <https://github.com/AnTao97/dgcnn.pytorch>`_.
Args:
fp_channels (tuple[int], optional): Tuple of mlp channels in feature
propagation (FP) modules. Defaults to (1216, 512).
"""
def
__init__
(
self
,
fp_channels
=
(
1216
,
512
),
**
kwargs
):
super
(
DGCNNHead
,
self
).
__init__
(
**
kwargs
)
self
.
FP_module
=
DGCNNFPModule
(
mlp_channels
=
fp_channels
,
act_cfg
=
self
.
act_cfg
)
# https://github.com/charlesq34/pointnet2/blob/master/models/pointnet2_sem_seg.py#L40
self
.
pre_seg_conv
=
ConvModule
(
fp_channels
[
-
1
],
self
.
channels
,
kernel_size
=
1
,
bias
=
False
,
conv_cfg
=
self
.
conv_cfg
,
norm_cfg
=
self
.
norm_cfg
,
act_cfg
=
self
.
act_cfg
)
def
_extract_input
(
self
,
feat_dict
):
"""Extract inputs from features dictionary.
Args:
feat_dict (dict): Feature dict from backbone.
Returns:
torch.Tensor: points for decoder.
"""
fa_points
=
feat_dict
[
'fa_points'
]
return
fa_points
def
forward
(
self
,
feat_dict
):
"""Forward pass.
Args:
feat_dict (dict): Feature dict from backbone.
Returns:
torch.Tensor: Segmentation map of shape [B, num_classes, N].
"""
fa_points
=
self
.
_extract_input
(
feat_dict
)
fp_points
=
self
.
FP_module
(
fa_points
)
fp_points
=
fp_points
.
transpose
(
1
,
2
).
contiguous
()
output
=
self
.
pre_seg_conv
(
fp_points
)
output
=
self
.
cls_seg
(
output
)
return
output
mmdet3d/models/decode_heads/paconv_head.py
View file @
32a4328b
...
...
@@ -14,7 +14,7 @@ class PAConvHead(PointNet2Head):
Args:
fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules.
fp_norm_cfg (dict
|None
): Config of norm layers used in FP modules.
fp_norm_cfg (dict): Config of norm layers used in FP modules.
Default: dict(type='BN2d').
"""
...
...
mmdet3d/models/decode_heads/pointnet2_head.py
View file @
32a4328b
...
...
@@ -16,7 +16,7 @@ class PointNet2Head(Base3DDecodeHead):
Args:
fp_channels (tuple[tuple[int]]): Tuple of mlp channels in FP modules.
fp_norm_cfg (dict
|None
): Config of norm layers used in FP modules.
fp_norm_cfg (dict): Config of norm layers used in FP modules.
Default: dict(type='BN2d').
"""
...
...
mmdet3d/models/dense_heads/__init__.py
View file @
32a4328b
...
...
@@ -7,8 +7,12 @@ from .centerpoint_head import CenterHead
from
.fcos_mono3d_head
import
FCOSMono3DHead
from
.free_anchor3d_head
import
FreeAnchor3DHead
from
.groupfree3d_head
import
GroupFree3DHead
from
.monoflex_head
import
MonoFlexHead
from
.parta2_rpn_head
import
PartA2RPNHead
from
.pgd_head
import
PGDHead
from
.point_rpn_head
import
PointRPNHead
from
.shape_aware_head
import
ShapeAwareHead
from
.smoke_mono3d_head
import
SMOKEMono3DHead
from
.ssd_3d_head
import
SSD3DHead
from
.vote_head
import
VoteHead
...
...
@@ -16,5 +20,6 @@ __all__ = [
'Anchor3DHead'
,
'FreeAnchor3DHead'
,
'PartA2RPNHead'
,
'VoteHead'
,
'SSD3DHead'
,
'BaseConvBboxHead'
,
'CenterHead'
,
'ShapeAwareHead'
,
'BaseMono3DDenseHead'
,
'AnchorFreeMono3DHead'
,
'FCOSMono3DHead'
,
'GroupFree3DHead'
'GroupFree3DHead'
,
'PointRPNHead'
,
'SMOKEMono3DHead'
,
'PGDHead'
,
'MonoFlexHead'
]
mmdet3d/models/dense_heads/anchor3d_head.py
View file @
32a4328b
...
...
@@ -51,15 +51,15 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
type
=
'Anchor3DRangeGenerator'
,
range
=
[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
sizes
=
[[
3.9
,
1.6
,
1.56
]],
rotations
=
[
0
,
1.57
],
custom_values
=
[],
reshape_out
=
False
),
assigner_per_size
=
False
,
assign_per_class
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0
,
dir_limit_offset
=
1
,
dir_offset
=
-
np
.
pi
/
2
,
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
...
...
@@ -81,6 +81,10 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
self
.
assign_per_class
=
assign_per_class
self
.
dir_offset
=
dir_offset
self
.
dir_limit_offset
=
dir_limit_offset
import
warnings
warnings
.
warn
(
'dir_offset and dir_limit_offset will be depressed and be '
'incorporated into box coder in the future'
)
self
.
fp16_enabled
=
False
# build anchor generator
...
...
@@ -145,7 +149,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
x (torch.Tensor): Input features.
Returns:
tuple[torch.Tensor]: Contain score of each class, bbox
\
tuple[torch.Tensor]: Contain score of each class, bbox
regression and direction classification predictions.
"""
cls_score
=
self
.
conv_cls
(
x
)
...
...
@@ -163,7 +167,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
features produced by FPN.
Returns:
tuple[list[torch.Tensor]]: Multi-level class score, bbox
\
tuple[list[torch.Tensor]]: Multi-level class score, bbox
and direction predictions.
"""
return
multi_apply
(
self
.
forward_single
,
feats
)
...
...
@@ -177,7 +181,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
device (str): device of current module.
Returns:
list[list[torch.Tensor]]: Anchors of each image, valid flags
\
list[list[torch.Tensor]]: Anchors of each image, valid flags
of each image.
"""
num_imgs
=
len
(
input_metas
)
...
...
@@ -207,7 +211,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
num_total_samples (int): The number of valid samples.
Returns:
tuple[torch.Tensor]: Losses of class, bbox
\
tuple[torch.Tensor]: Losses of class, bbox
and direction, respectively.
"""
# classification loss
...
...
@@ -285,7 +289,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
the 7th dimension is rotation dimension.
Returns:
tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th
\
tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th
dimensions are changed.
"""
rad_pred_encoding
=
torch
.
sin
(
boxes1
[...,
6
:
7
])
*
torch
.
cos
(
...
...
@@ -318,16 +322,16 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
of each sample.
gt_labels (list[torch.Tensor]): Gt labels of each sample.
input_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
which bounding.
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding
boxes to ignore
.
Returns:
dict[str, list[torch.Tensor]]: Classification, bbox, and
\
dict[str, list[torch.Tensor]]: Classification, bbox, and
direction losses of each level.
- loss_cls (list[torch.Tensor]): Classification losses.
- loss_bbox (list[torch.Tensor]): Box regression losses.
- loss_dir (list[torch.Tensor]): Direction classification
\
- loss_dir (list[torch.Tensor]): Direction classification
losses.
"""
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
...
...
@@ -385,7 +389,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
input_metas (list[dict]): Contain pcd and img's meta info.
cfg (
None |
:obj:`ConfigDict`): Training or testing config.
cfg (:obj:`ConfigDict`): Training or testing config.
rescale (list[torch.Tensor]): Whether th rescale bbox.
Returns:
...
...
@@ -439,7 +443,7 @@ class Anchor3DHead(BaseModule, AnchorTrainMixin):
mlvl_anchors (List[torch.Tensor]): Multi-level anchors
in single batch.
input_meta (list[dict]): Contain pcd and img's meta info.
cfg (
None |
:obj:`ConfigDict`): Training or testing config.
cfg (:obj:`ConfigDict`): Training or testing config.
rescale (list[torch.Tensor]): whether th rescale bbox.
Returns:
...
...
mmdet3d/models/dense_heads/anchor_free_mono3d_head.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
abc
import
abstractmethod
import
torch
from
mmcv.cnn
import
ConvModule
,
bias_init_with_prob
,
normal_init
from
mmcv.runner
import
force_fp32
from
torch
import
nn
as
nn
...
...
@@ -18,35 +19,45 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
feat_channels (int): Number of hidden channels. Used in child classes.
stacked_convs (int): Number of stacking convs of the head.
strides (tuple): Downsample factor of each feature map.
dcn_on_last_conv (bool): If true, use dcn in the last layer of
towers. Default: False.
conv_bias (bool | str): If specified as `auto`, it will be decided by
the norm_cfg. Bias of conv will be set as True if `norm_cfg` is
None, otherwise False. Default: "auto".
background_label (int | None): Label ID of background, set as 0 for
RPN and num_classes for other heads. It will automatically set as
num_classes if None is given.
use_direction_classifier (bool): Whether to add a direction classifier.
diff_rad_by_sin (bool): Whether to change the difference into sin
difference for box regression loss.
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
loss_dir (dict): Config of direction classifier loss.
loss_attr (dict): Config of attribute classifier loss, which is only
active when pred_attrs=True.
bbox_code_size (int): Dimensions of predicted bounding boxes.
pred_attrs (bool): Whether to predict attributes. Default to False.
num_attrs (int): The number of attributes to be predicted. Default: 9.
pred_velo (bool): Whether to predict velocity. Default to False.
pred_bbox2d (bool): Whether to predict 2D boxes. Default to False.
group_reg_dims (tuple[int]): The dimension of each regression target
group. Default: (2, 1, 3, 1, 2).
cls_branch (tuple[int]): Channels for classification branch.
feat_channels (int, optional): Number of hidden channels.
Used in child classes. Defaults to 256.
stacked_convs (int, optional): Number of stacking convs of the head.
strides (tuple, optional): Downsample factor of each feature map.
dcn_on_last_conv (bool, optional): If true, use dcn in the last
layer of towers. Default: False.
conv_bias (bool | str, optional): If specified as `auto`, it will be
decided by the norm_cfg. Bias of conv will be set as True
if `norm_cfg` is None, otherwise False. Default: 'auto'.
background_label (int, optional): Label ID of background,
set as 0 for RPN and num_classes for other heads.
It will automatically set as `num_classes` if None is given.
use_direction_classifier (bool, optional):
Whether to add a direction classifier.
diff_rad_by_sin (bool, optional): Whether to change the difference
into sin difference for box regression loss. Defaults to True.
dir_offset (float, optional): Parameter used in direction
classification. Defaults to 0.
dir_limit_offset (float, optional): Parameter used in direction
classification. Defaults to 0.
loss_cls (dict, optional): Config of classification loss.
loss_bbox (dict, optional): Config of localization loss.
loss_dir (dict, optional): Config of direction classifier loss.
loss_attr (dict, optional): Config of attribute classifier loss,
which is only active when `pred_attrs=True`.
bbox_code_size (int, optional): Dimensions of predicted bounding boxes.
pred_attrs (bool, optional): Whether to predict attributes.
Defaults to False.
num_attrs (int, optional): The number of attributes to be predicted.
Default: 9.
pred_velo (bool, optional): Whether to predict velocity.
Defaults to False.
pred_bbox2d (bool, optional): Whether to predict 2D boxes.
Defaults to False.
group_reg_dims (tuple[int], optional): The dimension of each regression
target group. Default: (2, 1, 3, 1, 2).
cls_branch (tuple[int], optional): Channels for classification branch.
Default: (128, 64).
reg_branch (tuple[tuple]): Channels for regression branch.
reg_branch (tuple[tuple]
, optional
): Channels for regression branch.
Default: (
(128, 64), # offset
(128, 64), # depth
...
...
@@ -54,14 +65,16 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
(64, ), # rot
() # velo
),
dir_branch (tuple[int]): Channels for direction
classification branch.
Default: (64, ).
attr_branch (tuple[int]): Channels for classification branch.
dir_branch (tuple[int]
, optional
): Channels for direction
classification branch.
Default: (64, ).
attr_branch (tuple[int]
, optional
): Channels for classification branch.
Default: (64, ).
conv_cfg (dict): Config dict for convolution layer. Default: None.
norm_cfg (dict): Config dict for normalization layer. Default: None.
train_cfg (dict): Training config of anchor head.
test_cfg (dict): Testing config of anchor head.
conv_cfg (dict, optional): Config dict for convolution layer.
Default: None.
norm_cfg (dict, optional): Config dict for normalization layer.
Default: None.
train_cfg (dict, optional): Training config of anchor head.
test_cfg (dict, optional): Testing config of anchor head.
"""
# noqa: W605
_version
=
1
...
...
@@ -79,6 +92,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
use_direction_classifier
=
True
,
diff_rad_by_sin
=
True
,
dir_offset
=
0
,
dir_limit_offset
=
0
,
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
@@ -125,6 +139,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
self
.
use_direction_classifier
=
use_direction_classifier
self
.
diff_rad_by_sin
=
diff_rad_by_sin
self
.
dir_offset
=
dir_offset
self
.
dir_limit_offset
=
dir_limit_offset
self
.
loss_cls
=
build_loss
(
loss_cls
)
self
.
loss_bbox
=
build_loss
(
loss_bbox
)
self
.
loss_dir
=
build_loss
(
loss_dir
)
...
...
@@ -162,13 +177,6 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
self
.
attr_branch
=
attr_branch
self
.
_init_layers
()
if
init_cfg
is
None
:
self
.
init_cfg
=
dict
(
type
=
'Normal'
,
layer
=
'Conv2d'
,
std
=
0.01
,
override
=
dict
(
type
=
'Normal'
,
name
=
'conv_cls'
,
std
=
0.01
,
bias_prob
=
0.01
))
def
_init_layers
(
self
):
"""Initialize layers of the head."""
...
...
@@ -274,8 +282,34 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
self
.
conv_attr
=
nn
.
Conv2d
(
self
.
attr_branch
[
-
1
],
self
.
num_attrs
,
1
)
def
init_weights
(
self
):
super
().
init_weights
()
"""Initialize weights of the head.
We currently still use the customized defined init_weights because the
default init of DCN triggered by the init_cfg will init
conv_offset.weight, which mistakenly affects the training stability.
"""
for
modules
in
[
self
.
cls_convs
,
self
.
reg_convs
,
self
.
conv_cls_prev
]:
for
m
in
modules
:
if
isinstance
(
m
.
conv
,
nn
.
Conv2d
):
normal_init
(
m
.
conv
,
std
=
0.01
)
for
conv_reg_prev
in
self
.
conv_reg_prevs
:
if
conv_reg_prev
is
None
:
continue
for
m
in
conv_reg_prev
:
if
isinstance
(
m
.
conv
,
nn
.
Conv2d
):
normal_init
(
m
.
conv
,
std
=
0.01
)
if
self
.
use_direction_classifier
:
for
m
in
self
.
conv_dir_cls_prev
:
if
isinstance
(
m
.
conv
,
nn
.
Conv2d
):
normal_init
(
m
.
conv
,
std
=
0.01
)
if
self
.
pred_attrs
:
for
m
in
self
.
conv_attr_prev
:
if
isinstance
(
m
.
conv
,
nn
.
Conv2d
):
normal_init
(
m
.
conv
,
std
=
0.01
)
bias_cls
=
bias_init_with_prob
(
0.01
)
normal_init
(
self
.
conv_cls
,
std
=
0.01
,
bias
=
bias_cls
)
for
conv_reg
in
self
.
conv_regs
:
normal_init
(
conv_reg
,
std
=
0.01
)
if
self
.
use_direction_classifier
:
normal_init
(
self
.
conv_dir_cls
,
std
=
0.01
,
bias
=
bias_cls
)
if
self
.
pred_attrs
:
...
...
@@ -289,7 +323,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
a 4D-tensor.
Returns:
tuple: Usually contain classification scores, bbox predictions,
\
tuple: Usually contain classification scores, bbox predictions,
and direction class predictions.
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
...
...
@@ -307,7 +341,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
return
multi_apply
(
self
.
forward_single
,
feats
)[:
5
]
def
forward_single
(
self
,
x
):
"""Forward features of a single scale lev
l
e.
"""Forward features of a single scale leve
l
.
Args:
x (Tensor): FPN feature maps of the specified stride.
...
...
@@ -401,7 +435,7 @@ class AnchorFreeMono3DHead(BaseMono3DDenseHead):
corresponding to each box
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (
None |
list[Tensor]): specify which bounding
gt_bboxes_ignore (list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
"""
...
...
mmdet3d/models/dense_heads/base_mono3d_dense_head.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
from
abc
import
ABCMeta
,
abstractmethod
from
mmcv.runner
import
BaseModule
...
...
mmdet3d/models/dense_heads/centerpoint_head.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
torch
from
mmcv.cnn
import
ConvModule
,
build_conv_layer
from
mmcv.runner
import
BaseModule
,
force_fp32
...
...
@@ -21,16 +22,16 @@ class SeparateHead(BaseModule):
Args:
in_channels (int): Input channels for conv_layer.
heads (dict): Conv information.
head_conv (int): Output channels.
head_conv (int
, optional
): Output channels.
Default: 64.
final_kern
a
l (int): Kern
a
l size for the last conv layer.
De
a
fult: 1.
init_bias (float): Initial bias. Default: -2.19.
conv_cfg (dict): Config of conv layer.
final_kern
e
l (int
, optional
): Kern
e
l size for the last conv layer.
Def
a
ult: 1.
init_bias (float
, optional
): Initial bias. Default: -2.19.
conv_cfg (dict
, optional
): Config of conv layer.
Default: dict(type='Conv2d')
norm_cfg (dict): Config of norm layer.
norm_cfg (dict
, optional
): Config of norm layer.
Default: dict(type='BN2d').
bias (str): Type of bias. Default: 'auto'.
bias (str
, optional
): Type of bias. Default: 'auto'.
"""
def
__init__
(
self
,
...
...
@@ -100,17 +101,17 @@ class SeparateHead(BaseModule):
Returns:
dict[str: torch.Tensor]: contains the following keys:
-reg (torch.Tensor): 2D regression value with the
\
-reg (torch.Tensor): 2D regression value with the
shape of [B, 2, H, W].
-height (torch.Tensor): Height value with the
\
-height (torch.Tensor): Height value with the
shape of [B, 1, H, W].
-dim (torch.Tensor): Size value with the shape
\
-dim (torch.Tensor): Size value with the shape
of [B, 3, H, W].
-rot (torch.Tensor): Rotation value with the
\
-rot (torch.Tensor): Rotation value with the
shape of [B, 2, H, W].
-vel (torch.Tensor): Velocity value with the
\
-vel (torch.Tensor): Velocity value with the
shape of [B, 2, H, W].
-heatmap (torch.Tensor): Heatmap with the shape of
\
-heatmap (torch.Tensor): Heatmap with the shape of
[B, N, H, W].
"""
ret_dict
=
dict
()
...
...
@@ -131,18 +132,19 @@ class DCNSeparateHead(BaseModule):
Args:
in_channels (int): Input channels for conv_layer.
num_cls (int): Number of classes.
heads (dict): Conv information.
dcn_config (dict): Config of dcn layer.
num_cls (int
): Output channels.
head_conv (int, optional
): Output channels.
Default: 64.
final_kern
a
l (int): Kern
a
l size for the last conv
layer.
Deaf
ult: 1.
init_bias (float): Initial bias. Default: -2.19.
conv_cfg (dict): Config of conv layer.
final_kern
e
l (int
, optional
): Kern
e
l size for the last conv
layer. Defa
ult: 1.
init_bias (float
, optional
): Initial bias. Default: -2.19.
conv_cfg (dict
, optional
): Config of conv layer.
Default: dict(type='Conv2d')
norm_cfg (dict): Config of norm layer.
norm_cfg (dict
, optional
): Config of norm layer.
Default: dict(type='BN2d').
bias (str): Type of bias. Default: 'auto'.
bias (str
, optional
): Type of bias. Default: 'auto'.
"""
# noqa: W605
def
__init__
(
self
,
...
...
@@ -215,17 +217,17 @@ class DCNSeparateHead(BaseModule):
Returns:
dict[str: torch.Tensor]: contains the following keys:
-reg (torch.Tensor): 2D regression value with the
\
-reg (torch.Tensor): 2D regression value with the
shape of [B, 2, H, W].
-height (torch.Tensor): Height value with the
\
-height (torch.Tensor): Height value with the
shape of [B, 1, H, W].
-dim (torch.Tensor): Size value with the shape
\
-dim (torch.Tensor): Size value with the shape
of [B, 3, H, W].
-rot (torch.Tensor): Rotation value with the
\
-rot (torch.Tensor): Rotation value with the
shape of [B, 2, H, W].
-vel (torch.Tensor): Velocity value with the
\
-vel (torch.Tensor): Velocity value with the
shape of [B, 2, H, W].
-heatmap (torch.Tensor): Heatmap with the shape of
\
-heatmap (torch.Tensor): Heatmap with the shape of
[B, N, H, W].
"""
center_feat
=
self
.
feature_adapt_cls
(
x
)
...
...
@@ -243,31 +245,30 @@ class CenterHead(BaseModule):
"""CenterHead for CenterPoint.
Args:
mode (str): Mode of the head. Default: '3d'.
in_channels (list[int] | int): Channels of the input feature map.
Default: [128].
tasks (list[dict]): Task information including class number
in_channels (list[int] | int, optional): Channels of the input
feature map. Default: [128].
tasks (list[dict], optional): Task information including class number
and class names. Default: None.
dataset (str): Name of the dataset
. Default:
'nuscenes'
.
weight (float): Weight for location los
s. Default:
0.25
.
code_weights (list[int]): Code weights for location los
s. Default:
[]
.
common_heads (dict): Conv information for common heads.
train_cfg (dict, optional): Train-time configs
. Default:
None
.
test_cfg (dict, optional): Test-time config
s. Default:
None
.
bbox_coder (dict, optional): Bbox coder config
s. Default:
None
.
common_heads (dict
, optional
): Conv information for common heads.
Default: dict().
loss_cls (dict): Config of classification loss function.
loss_cls (dict
, optional
): Config of classification loss function.
Default: dict(type='GaussianFocalLoss', reduction='mean').
loss_bbox (dict): Config of regression loss function.
loss_bbox (dict
, optional
): Config of regression loss function.
Default: dict(type='L1Loss', reduction='none').
separate_head (dict): Config of separate head. Default: dict(
separate_head (dict
, optional
): Config of separate head. Default: dict(
type='SeparateHead', init_bias=-2.19, final_kernel=3)
share_conv_channel (int): Output channels for share_conv
_layer.
Default: 64.
num_heatmap_convs (int): Number of conv layers for heatmap
conv layer.
Default: 2.
conv_cfg (dict): Config of conv layer.
share_conv_channel (int
, optional
): Output channels for share_conv
layer.
Default: 64.
num_heatmap_convs (int
, optional
): Number of conv layers for heatmap
conv layer.
Default: 2.
conv_cfg (dict
, optional
): Config of conv layer.
Default: dict(type='Conv2d')
norm_cfg (dict): Config of norm layer.
norm_cfg (dict
, optional
): Config of norm layer.
Default: dict(type='BN2d').
bias (str): Type of bias. Default: 'auto'.
bias (str
, optional
): Type of bias. Default: 'auto'.
"""
def
__init__
(
self
,
...
...
@@ -366,8 +367,8 @@ class CenterHead(BaseModule):
feat (torch.tensor): Feature map with the shape of [B, H*W, 10].
ind (torch.Tensor): Index of the ground truth boxes with the
shape of [B, max_obj].
mask (torch.Tensor): Mask of the feature map with the
shape
of [B, max_obj]. Default: None.
mask (torch.Tensor
, optional
): Mask of the feature map with the
shape
of [B, max_obj]. Default: None.
Returns:
torch.Tensor: Feature map after gathering with the shape
...
...
@@ -403,14 +404,14 @@ class CenterHead(BaseModule):
Returns:
Returns:
tuple[list[torch.Tensor]]: Tuple of target including
\
tuple[list[torch.Tensor]]: Tuple of target including
the following results in order.
- list[torch.Tensor]: Heatmap scores.
- list[torch.Tensor]: Ground truth boxes.
- list[torch.Tensor]: Indexes indicating the
\
- list[torch.Tensor]: Indexes indicating the
position of the valid boxes.
- list[torch.Tensor]: Masks indicating which
\
- list[torch.Tensor]: Masks indicating which
boxes are valid.
"""
heatmaps
,
anno_boxes
,
inds
,
masks
=
multi_apply
(
...
...
@@ -437,14 +438,14 @@ class CenterHead(BaseModule):
gt_labels_3d (torch.Tensor): Labels of boxes.
Returns:
tuple[list[torch.Tensor]]: Tuple of target including
\
tuple[list[torch.Tensor]]: Tuple of target including
the following results in order.
- list[torch.Tensor]: Heatmap scores.
- list[torch.Tensor]: Ground truth boxes.
- list[torch.Tensor]: Indexes indicating the position
\
- list[torch.Tensor]: Indexes indicating the position
of the valid boxes.
- list[torch.Tensor]: Masks indicating which boxes
\
- list[torch.Tensor]: Masks indicating which boxes
are valid.
"""
device
=
gt_labels_3d
.
device
...
...
@@ -728,11 +729,11 @@ class CenterHead(BaseModule):
Returns:
list[dict[str: torch.Tensor]]: contains the following keys:
-bboxes (torch.Tensor): Prediction bboxes after nms with the
\
-bboxes (torch.Tensor): Prediction bboxes after nms with the
shape of [N, 9].
-scores (torch.Tensor): Prediction scores after nms with the
\
-scores (torch.Tensor): Prediction scores after nms with the
shape of [N].
-labels (torch.Tensor): Prediction labels after nms with the
\
-labels (torch.Tensor): Prediction labels after nms with the
shape of [N].
"""
predictions_dicts
=
[]
...
...
@@ -781,7 +782,7 @@ class CenterHead(BaseModule):
boxes_for_nms
,
top_scores
,
thresh
=
self
.
test_cfg
[
'nms_thr'
],
pre_maxsize
=
self
.
test_cfg
[
'pre_max_size'
],
pre_max
_
size
=
self
.
test_cfg
[
'pre_max_size'
],
post_max_size
=
self
.
test_cfg
[
'post_max_size'
])
else
:
selected
=
[]
...
...
mmdet3d/models/dense_heads/fcos_mono3d_head.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
from
logging
import
warning
import
numpy
as
np
import
torch
from
mmcv.cnn
import
Scale
from
mmcv.cnn
import
Scale
,
normal_init
from
mmcv.runner
import
force_fp32
from
torch
import
nn
as
nn
from
mmdet3d.core
import
box3d_multiclass_nms
,
limit_period
,
xywhr2xyxyr
from
mmdet3d.core
import
(
box3d_multiclass_nms
,
limit_period
,
points_img2cam
,
xywhr2xyxyr
)
from
mmdet.core
import
multi_apply
from
mmdet.core.bbox.builder
import
build_bbox_coder
from
mmdet.models.builder
import
HEADS
,
build_loss
from
.anchor_free_mono3d_head
import
AnchorFreeMono3DHead
...
...
@@ -21,31 +25,29 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
regress_ranges (tuple[tuple[int, int]]): Regress range of multiple
regress_ranges (tuple[tuple[int, int]]
, optional
): Regress range of multiple
level points.
center_sampling (bool): If true, use center sampling. Default: True.
center_sample_radius (float): Radius of center sampling. Default: 1.5.
norm_on_bbox (bool): If true, normalize the regression targets
center_sampling (bool
, optional
): If true, use center sampling. Default: True.
center_sample_radius (float
, optional
): Radius of center sampling. Default: 1.5.
norm_on_bbox (bool
, optional
): If true, normalize the regression targets
with FPN strides. Default: True.
centerness_on_reg (bool): If true, position centerness on the
centerness_on_reg (bool
, optional
): If true, position centerness on the
regress branch. Please refer to https://github.com/tianzhi0549/FCOS/issues/89#issuecomment-516877042.
Default: True.
centerness_alpha: Parameter used to adjust the intensity
attenuation
from the center to the periphery. Default: 2.5.
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
loss_dir (dict): Config of direction classification loss.
loss_attr (dict): Config of attribute classification loss.
loss_centerness (dict): Config of centerness loss.
norm_cfg (dict): dictionary to construct and config norm layer.
centerness_alpha
(int, optional)
: Parameter used to adjust the intensity
attenuation
from the center to the periphery. Default: 2.5.
loss_cls (dict
, optional
): Config of classification loss.
loss_bbox (dict
, optional
): Config of localization loss.
loss_dir (dict
, optional
): Config of direction classification loss.
loss_attr (dict
, optional
): Config of attribute classification loss.
loss_centerness (dict
, optional
): Config of centerness loss.
norm_cfg (dict
, optional
): dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).
centerness_branch (tuple[int]): Channels for centerness branch.
centerness_branch (tuple[int]
, optional
): Channels for centerness branch.
Default: (64, ).
"""
# noqa: E501
def
__init__
(
self
,
num_classes
,
in_channels
,
regress_ranges
=
((
-
1
,
48
),
(
48
,
96
),
(
96
,
192
),
(
192
,
384
),
(
384
,
INF
)),
center_sampling
=
True
,
...
...
@@ -73,6 +75,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
bbox_coder
=
dict
(
type
=
'FCOS3DBBoxCoder'
,
code_size
=
9
),
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
),
centerness_branch
=
(
64
,
),
init_cfg
=
None
,
...
...
@@ -85,8 +88,6 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
self
.
centerness_alpha
=
centerness_alpha
self
.
centerness_branch
=
centerness_branch
super
().
__init__
(
num_classes
,
in_channels
,
loss_cls
=
loss_cls
,
loss_bbox
=
loss_bbox
,
loss_dir
=
loss_dir
,
...
...
@@ -95,13 +96,8 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
init_cfg
=
init_cfg
,
**
kwargs
)
self
.
loss_centerness
=
build_loss
(
loss_centerness
)
if
init_cfg
is
None
:
self
.
init_cfg
=
dict
(
type
=
'Normal'
,
layer
=
'Conv2d'
,
std
=
0.01
,
override
=
dict
(
type
=
'Normal'
,
name
=
'conv_cls'
,
std
=
0.01
,
bias_prob
=
0.01
))
bbox_coder
[
'code_size'
]
=
self
.
bbox_code_size
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
...
...
@@ -110,9 +106,24 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
conv_channels
=
self
.
centerness_branch
,
conv_strides
=
(
1
,
)
*
len
(
self
.
centerness_branch
))
self
.
conv_centerness
=
nn
.
Conv2d
(
self
.
centerness_branch
[
-
1
],
1
,
1
)
self
.
scale_dim
=
3
# only for offset, depth and size regression
self
.
scales
=
nn
.
ModuleList
([
nn
.
ModuleList
([
Scale
(
1.0
)
for
_
in
range
(
3
)])
for
_
in
self
.
strides
])
# only for offset, depth and size regression
nn
.
ModuleList
([
Scale
(
1.0
)
for
_
in
range
(
self
.
scale_dim
)])
for
_
in
self
.
strides
])
def
init_weights
(
self
):
"""Initialize weights of the head.
We currently still use the customized init_weights because the default
init of DCN triggered by the init_cfg will init conv_offset.weight,
which mistakenly affects the training stability.
"""
super
().
init_weights
()
for
m
in
self
.
conv_centerness_prev
:
if
isinstance
(
m
.
conv
,
nn
.
Conv2d
):
normal_init
(
m
.
conv
,
std
=
0.01
)
normal_init
(
self
.
conv_centerness
,
std
=
0.01
)
def
forward
(
self
,
feats
):
"""Forward features from the upstream network.
...
...
@@ -138,11 +149,12 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
centernesses (list[Tensor]): Centerness for each scale level,
each is a 4D-tensor, the channel number is num_points * 1.
"""
# Note: we use [:5] to filter feats and only return predictions
return
multi_apply
(
self
.
forward_single
,
feats
,
self
.
scales
,
self
.
strides
)
self
.
strides
)
[:
5
]
def
forward_single
(
self
,
x
,
scale
,
stride
):
"""Forward features of a single scale lev
l
e.
"""Forward features of a single scale leve
l
.
Args:
x (Tensor): FPN feature maps of the specified stride.
...
...
@@ -153,7 +165,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
is True.
Returns:
tuple: scores for each class, bbox and direction class
\
tuple: scores for each class, bbox and direction class
predictions, centerness predictions of input feature maps.
"""
cls_score
,
bbox_pred
,
dir_cls_pred
,
attr_pred
,
cls_feat
,
reg_feat
=
\
...
...
@@ -169,26 +181,12 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
for
conv_centerness_prev_layer
in
self
.
conv_centerness_prev
:
clone_cls_feat
=
conv_centerness_prev_layer
(
clone_cls_feat
)
centerness
=
self
.
conv_centerness
(
clone_cls_feat
)
# scale the bbox_pred of different level
# only apply to offset, depth and size prediction
scale_offset
,
scale_depth
,
scale_size
=
scale
[
0
:
3
]
clone_bbox_pred
=
bbox_pred
.
clone
()
bbox_pred
[:,
:
2
]
=
scale_offset
(
clone_bbox_pred
[:,
:
2
]).
float
()
bbox_pred
[:,
2
]
=
scale_depth
(
clone_bbox_pred
[:,
2
]).
float
()
bbox_pred
[:,
3
:
6
]
=
scale_size
(
clone_bbox_pred
[:,
3
:
6
]).
float
()
bbox_pred
=
self
.
bbox_coder
.
decode
(
bbox_pred
,
scale
,
stride
,
self
.
training
,
cls_score
)
bbox_pred
[:,
2
]
=
bbox_pred
[:,
2
].
exp
()
bbox_pred
[:,
3
:
6
]
=
bbox_pred
[:,
3
:
6
].
exp
()
+
1e-6
# avoid size=0
assert
self
.
norm_on_bbox
is
True
,
'Setting norm_on_bbox to False '
\
'has not been thoroughly tested for FCOS3D.'
if
self
.
norm_on_bbox
:
if
not
self
.
training
:
# Note that this line is conducted only when testing
bbox_pred
[:,
:
2
]
*=
stride
return
cls_score
,
bbox_pred
,
dir_cls_pred
,
attr_pred
,
centerness
return
cls_score
,
bbox_pred
,
dir_cls_pred
,
attr_pred
,
centerness
,
\
cls_feat
,
reg_feat
@
staticmethod
def
add_sin_difference
(
boxes1
,
boxes2
):
...
...
@@ -201,7 +199,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
the 7th dimension is rotation dimension.
Returns:
tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th
\
tuple[torch.Tensor]: ``boxes1`` and ``boxes2`` whose 7th
dimensions are changed.
"""
rad_pred_encoding
=
torch
.
sin
(
boxes1
[...,
6
:
7
])
*
torch
.
cos
(
...
...
@@ -217,21 +215,27 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
@
staticmethod
def
get_direction_target
(
reg_targets
,
dir_offset
=
0
,
dir_limit_offset
=
0.0
,
num_bins
=
2
,
one_hot
=
True
):
"""Encode direction to 0 ~ num_bins-1.
Args:
reg_targets (torch.Tensor): Bbox regression targets.
dir_offset (int): Direction offset.
num_bins (int): Number of bins to divide 2*PI.
one_hot (bool): Whether to encode as one hot.
dir_offset (int, optional): Direction offset. Default to 0.
dir_limit_offset (float, optional): Offset to set the direction
range. Default to 0.0.
num_bins (int, optional): Number of bins to divide 2*PI.
Default to 2.
one_hot (bool, optional): Whether to encode as one hot.
Default to True.
Returns:
torch.Tensor: Encoded direction targets.
"""
rot_gt
=
reg_targets
[...,
6
]
offset_rot
=
limit_period
(
rot_gt
-
dir_offset
,
0
,
2
*
np
.
pi
)
offset_rot
=
limit_period
(
rot_gt
-
dir_offset
,
dir_limit_offset
,
2
*
np
.
pi
)
dir_cls_targets
=
torch
.
floor
(
offset_rot
/
(
2
*
np
.
pi
/
num_bins
)).
long
()
dir_cls_targets
=
torch
.
clamp
(
dir_cls_targets
,
min
=
0
,
max
=
num_bins
-
1
)
...
...
@@ -293,7 +297,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
attr_labels (list[Tensor]): Attributes indices of each box.
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (
None |
list[Tensor]): specify which bounding
gt_bboxes_ignore (list[Tensor]): specify which bounding
boxes can be ignored when computing the loss.
Returns:
...
...
@@ -377,7 +381,10 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
if
self
.
use_direction_classifier
:
pos_dir_cls_targets
=
self
.
get_direction_target
(
pos_bbox_targets_3d
,
self
.
dir_offset
,
one_hot
=
False
)
pos_bbox_targets_3d
,
self
.
dir_offset
,
self
.
dir_limit_offset
,
one_hot
=
False
)
if
self
.
diff_rad_by_sin
:
pos_bbox_preds
,
pos_bbox_targets_3d
=
self
.
add_sin_difference
(
...
...
@@ -502,11 +509,11 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
rescale (bool): If True, return boxes in original image space
Returns:
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
\
The first item is an (n, 5) tensor, where the first 4 columns
\
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
\
5-th column is a score between 0 and 1. The second item is a
\
(n,) tensor where each item is the predicted class label of
\
list[tuple[Tensor, Tensor]]: Each item in result_list is 2-tuple.
The first item is an (n, 5) tensor, where the first 4 columns
are bounding box positions (tl_x, tl_y, br_x, br_y) and the
5-th column is a score between 0 and 1. The second item is a
(n,) tensor where each item is the predicted class label of
the corresponding box.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
dir_cls_preds
)
==
\
...
...
@@ -575,7 +582,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
bbox_preds (list[Tensor]): Box energies / deltas for a single scale
level with shape (num_points * bbox_code_size, H, W).
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on a single scale level with shape
\
predictions on a single scale level with shape
(num_points * 2, H, W)
attr_preds (list[Tensor]): Attribute scores for each scale level
Has shape (N, num_points * num_attrs, H, W)
...
...
@@ -634,7 +641,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
if
rescale
:
bbox_pred
[:,
:
2
]
/=
bbox_pred
[:,
:
2
].
new_tensor
(
scale_factor
)
pred_center2d
=
bbox_pred
[:,
:
3
].
clone
()
bbox_pred
[:,
:
3
]
=
self
.
pts2Dto3D
(
bbox_pred
[:,
:
3
],
view
)
bbox_pred
[:,
:
3
]
=
points_img2cam
(
bbox_pred
[:,
:
3
],
view
)
mlvl_centers2d
.
append
(
pred_center2d
)
mlvl_bboxes
.
append
(
bbox_pred
)
mlvl_scores
.
append
(
scores
)
...
...
@@ -647,19 +654,13 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
mlvl_dir_scores
=
torch
.
cat
(
mlvl_dir_scores
)
# change local yaw to global yaw for 3D nms
if
mlvl_bboxes
.
shape
[
0
]
>
0
:
dir_rot
=
limit_period
(
mlvl_bboxes
[...,
6
]
-
self
.
dir_offset
,
0
,
np
.
pi
)
mlvl_bboxes
[...,
6
]
=
(
dir_rot
+
self
.
dir_offset
+
np
.
pi
*
mlvl_dir_scores
.
to
(
mlvl_bboxes
.
dtype
))
cam_intrinsic
=
mlvl_centers2d
.
new_zeros
((
4
,
4
))
cam_intrinsic
[:
view
.
shape
[
0
],
:
view
.
shape
[
1
]]
=
\
cam2img
=
mlvl_centers2d
.
new_zeros
((
4
,
4
))
cam2img
[:
view
.
shape
[
0
],
:
view
.
shape
[
1
]]
=
\
mlvl_centers2d
.
new_tensor
(
view
)
mlvl_bboxes
[:,
6
]
=
torch
.
atan2
(
mlvl_centers2d
[:,
0
]
-
cam_intrinsic
[
0
,
2
],
cam_intrinsic
[
0
,
0
])
+
mlvl_bboxes
[:,
6
]
mlvl_bboxes
=
self
.
bbox_coder
.
decode_yaw
(
mlvl_bboxes
,
mlvl_centers2d
,
mlvl_dir_scores
,
self
.
dir_offset
,
cam2img
)
mlvl_bboxes_for_nms
=
xywhr2xyxyr
(
input_meta
[
'box_type_3d'
](
mlvl_bboxes
,
box_dim
=
self
.
bbox_code_size
,
origin
=
(
0.5
,
0.5
,
0.5
)).
bev
)
...
...
@@ -695,14 +696,18 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
def
pts2Dto3D
(
points
,
view
):
"""
Args:
points (torch.Tensor): points in 2D images, [N, 3],
\
points (torch.Tensor): points in 2D images, [N, 3],
3 corresponds with x, y in the image and depth.
view (np.ndarray): camera in
s
trinsic, [3, 3]
view (np.ndarray): camera intrinsic, [3, 3]
Returns:
torch.Tensor: points in 3D space. [N, 3],
\
torch.Tensor: points in 3D space. [N, 3],
3 corresponds with x, y, z in 3D space.
"""
warning
.
warn
(
'DeprecationWarning: This static method has been moved '
'out of this class to mmdet3d/core. The function '
'pts2Dto3D will be deprecated.'
)
assert
view
.
shape
[
0
]
<=
4
assert
view
.
shape
[
1
]
<=
4
assert
points
.
shape
[
1
]
==
3
...
...
@@ -715,7 +720,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
viewpad
[:
view
.
shape
[
0
],
:
view
.
shape
[
1
]]
=
points2D
.
new_tensor
(
view
)
inv_viewpad
=
torch
.
inverse
(
viewpad
).
transpose
(
0
,
1
)
# Do operation in homogenous coordinates.
# Do operation in homogen
e
ous coordinates.
nbr_points
=
unnorm_points2D
.
shape
[
0
]
homo_points2D
=
torch
.
cat
(
[
unnorm_points2D
,
...
...
@@ -762,8 +767,8 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
Returns:
tuple:
concat_lvl_labels (list[Tensor]): Labels of each level.
\
concat_lvl_bbox_targets (list[Tensor]): BBox targets of each
\
concat_lvl_labels (list[Tensor]): Labels of each level.
concat_lvl_bbox_targets (list[Tensor]): BBox targets of each
level.
"""
assert
len
(
points
)
==
len
(
self
.
regress_ranges
)
...
...
mmdet3d/models/dense_heads/free_anchor3d_head.py
View file @
32a4328b
...
...
@@ -195,6 +195,7 @@ class FreeAnchor3DHead(Anchor3DHead):
matched_anchors
,
matched_object_targets
,
self
.
dir_offset
,
self
.
dir_limit_offset
,
one_hot
=
False
)
loss_dir
=
self
.
loss_dir
(
dir_cls_preds_
[
matched
].
transpose
(
-
2
,
-
1
),
...
...
mmdet3d/models/dense_heads/groupfree3d_head.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
import
numpy
as
np
import
torch
from
mmcv
import
ConfigDict
...
...
@@ -25,13 +26,13 @@ class PointsObjClsModule(BaseModule):
Args:
in_channel (int): number of channels of seed point features.
num_convs (int): number of conv layers.
num_convs (int
, optional
): number of conv layers.
Default: 3.
conv_cfg (dict): Config of convolution.
conv_cfg (dict
, optional
): Config of convolution.
Default: dict(type='Conv1d').
norm_cfg (dict): Config of normalization.
norm_cfg (dict
, optional
): Config of normalization.
Default: dict(type='BN1d').
act_cfg (dict): Config of activation.
act_cfg (dict
, optional
): Config of activation.
Default: dict(type='ReLU').
"""
...
...
@@ -299,7 +300,7 @@ class GroupFree3DHead(BaseModule):
"""Forward pass.
Note:
The forward of GroupFree3DHead is d
e
vided into 2 steps:
The forward of GroupFree3DHead is d
i
vided into 2 steps:
1. Initial object candidates sampling.
2. Iterative object box prediction by transformer decoder.
...
...
@@ -405,15 +406,15 @@ class GroupFree3DHead(BaseModule):
Args:
bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
\
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (
None |
list[torch.Tensor]): Point-wise
pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask.
pts_instance_mask (
None |
list[torch.Tensor]): Point-wise
pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask.
img_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
ret_target (Bool): Return targets or not.
...
...
@@ -545,12 +546,12 @@ class GroupFree3DHead(BaseModule):
Args:
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
\
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (
None |
list[torch.Tensor]): Point-wise semantic
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch.
pts_instance_mask (
None |
list[torch.Tensor]): Point-wise instance
pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch.
bbox_preds (torch.Tensor): Bounding box predictions of vote head.
max_gt_num (int): Max number of GTs for single batch.
...
...
@@ -657,12 +658,12 @@ class GroupFree3DHead(BaseModule):
Args:
points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
\
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (
None |
torch.Tensor): Point-wise semantic
pts_semantic_mask (torch.Tensor): Point-wise semantic
label of each batch.
pts_instance_mask (
None |
torch.Tensor): Point-wise instance
pts_instance_mask (torch.Tensor): Point-wise instance
label of each batch.
max_gt_nums (int): Max number of GTs for single batch.
seed_points (torch.Tensor): Coordinates of seed points.
...
...
@@ -710,7 +711,7 @@ class GroupFree3DHead(BaseModule):
if
self
.
bbox_coder
.
with_rot
:
vote_targets
=
points
.
new_zeros
([
num_points
,
4
*
self
.
gt_per_seed
])
vote_target_idx
=
points
.
new_zeros
([
num_points
],
dtype
=
torch
.
long
)
box_indices_all
=
gt_bboxes_3d
.
points_in_boxes
(
points
)
box_indices_all
=
gt_bboxes_3d
.
points_in_boxes
_part
(
points
)
for
i
in
range
(
gt_labels_3d
.
shape
[
0
]):
box_indices
=
box_indices_all
[:,
i
]
indices
=
torch
.
nonzero
(
...
...
@@ -880,7 +881,7 @@ class GroupFree3DHead(BaseModule):
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
"""
# support multi-stage predic
i
tons
# support multi-stage predict
i
ons
assert
self
.
test_cfg
[
'prediction_stages'
]
in
\
[
'last'
,
'all'
,
'last_three'
]
...
...
@@ -951,7 +952,7 @@ class GroupFree3DHead(BaseModule):
box_dim
=
bbox
.
shape
[
-
1
],
with_yaw
=
self
.
bbox_coder
.
with_rot
,
origin
=
(
0.5
,
0.5
,
0.5
))
box_indices
=
bbox
.
points_in_boxes
(
points
)
box_indices
=
bbox
.
points_in_boxes
_all
(
points
)
corner3d
=
bbox
.
corners
minmax_box3d
=
corner3d
.
new
(
torch
.
Size
((
corner3d
.
shape
[
0
],
6
)))
...
...
mmdet3d/models/dense_heads/monoflex_head.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
mmcv.cnn
import
xavier_init
from
torch
import
nn
as
nn
from
mmdet3d.core.utils
import
get_ellip_gaussian_2D
from
mmdet3d.models.model_utils
import
EdgeFusionModule
from
mmdet3d.models.utils
import
(
filter_outside_objs
,
get_edge_indices
,
get_keypoints
,
handle_proj_objs
)
from
mmdet.core
import
multi_apply
from
mmdet.core.bbox.builder
import
build_bbox_coder
from
mmdet.models.builder
import
HEADS
,
build_loss
from
mmdet.models.utils
import
gaussian_radius
,
gen_gaussian_target
from
mmdet.models.utils.gaussian_target
import
(
get_local_maximum
,
get_topk_from_heatmap
,
transpose_and_gather_feat
)
from
.anchor_free_mono3d_head
import
AnchorFreeMono3DHead
@
HEADS
.
register_module
()
class
MonoFlexHead
(
AnchorFreeMono3DHead
):
r
"""MonoFlex head used in `MonoFlex <https://arxiv.org/abs/2104.02323>`_
.. code-block:: none
/ --> 3 x 3 conv --> 1 x 1 conv --> [edge fusion] --> cls
|
| --> 3 x 3 conv --> 1 x 1 conv --> 2d bbox
|
| --> 3 x 3 conv --> 1 x 1 conv --> [edge fusion] --> 2d offsets
|
| --> 3 x 3 conv --> 1 x 1 conv --> keypoints offsets
|
| --> 3 x 3 conv --> 1 x 1 conv --> keypoints uncertainty
feature
| --> 3 x 3 conv --> 1 x 1 conv --> keypoints uncertainty
|
| --> 3 x 3 conv --> 1 x 1 conv --> 3d dimensions
|
| |--- 1 x 1 conv --> ori cls
| --> 3 x 3 conv --|
| |--- 1 x 1 conv --> ori offsets
|
| --> 3 x 3 conv --> 1 x 1 conv --> depth
|
\ --> 3 x 3 conv --> 1 x 1 conv --> depth uncertainty
Args:
use_edge_fusion (bool): Whether to use edge fusion module while
feature extraction.
edge_fusion_inds (list[tuple]): Indices of feature to use edge fusion.
edge_heatmap_ratio (float): Ratio of generating target heatmap.
filter_outside_objs (bool, optional): Whether to filter the
outside objects. Default: True.
loss_cls (dict, optional): Config of classification loss.
Default: loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0).
loss_bbox (dict, optional): Config of localization loss.
Default: loss_bbox=dict(type='IOULoss', loss_weight=10.0).
loss_dir (dict, optional): Config of direction classification loss.
Default: dict(type='MultibinLoss', loss_weight=0.1).
loss_keypoints (dict, optional): Config of keypoints loss.
Default: dict(type='L1Loss', loss_weight=0.1).
loss_dims: (dict, optional): Config of dimensions loss.
Default: dict(type='L1Loss', loss_weight=0.1).
loss_offsets2d: (dict, optional): Config of offsets2d loss.
Default: dict(type='L1Loss', loss_weight=0.1).
loss_direct_depth: (dict, optional): Config of directly regression depth loss.
Default: dict(type='L1Loss', loss_weight=0.1).
loss_keypoints_depth: (dict, optional): Config of keypoints decoded depth loss.
Default: dict(type='L1Loss', loss_weight=0.1).
loss_combined_depth: (dict, optional): Config of combined depth loss.
Default: dict(type='L1Loss', loss_weight=0.1).
loss_attr (dict, optional): Config of attribute classification loss.
In MonoFlex, Default: None.
bbox_coder (dict, optional): Bbox coder for encoding and decoding boxes.
Default: dict(type='MonoFlexCoder', code_size=7).
norm_cfg (dict, optional): Dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).
init_cfg (dict): Initialization config dict. Default: None.
"""
# noqa: E501
def
__init__
(
self
,
num_classes
,
in_channels
,
use_edge_fusion
,
edge_fusion_inds
,
edge_heatmap_ratio
,
filter_outside_objs
=
True
,
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'IoULoss'
,
loss_weight
=
0.1
),
loss_dir
=
dict
(
type
=
'MultiBinLoss'
,
loss_weight
=
0.1
),
loss_keypoints
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.1
),
loss_dims
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.1
),
loss_offsets2d
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.1
),
loss_direct_depth
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.1
),
loss_keypoints_depth
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.1
),
loss_combined_depth
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.1
),
loss_attr
=
None
,
bbox_coder
=
dict
(
type
=
'MonoFlexCoder'
,
code_size
=
7
),
norm_cfg
=
dict
(
type
=
'BN'
),
init_cfg
=
None
,
init_bias
=-
2.19
,
**
kwargs
):
self
.
use_edge_fusion
=
use_edge_fusion
self
.
edge_fusion_inds
=
edge_fusion_inds
super
().
__init__
(
num_classes
,
in_channels
,
loss_cls
=
loss_cls
,
loss_bbox
=
loss_bbox
,
loss_dir
=
loss_dir
,
loss_attr
=
loss_attr
,
norm_cfg
=
norm_cfg
,
init_cfg
=
init_cfg
,
**
kwargs
)
self
.
filter_outside_objs
=
filter_outside_objs
self
.
edge_heatmap_ratio
=
edge_heatmap_ratio
self
.
init_bias
=
init_bias
self
.
loss_dir
=
build_loss
(
loss_dir
)
self
.
loss_keypoints
=
build_loss
(
loss_keypoints
)
self
.
loss_dims
=
build_loss
(
loss_dims
)
self
.
loss_offsets2d
=
build_loss
(
loss_offsets2d
)
self
.
loss_direct_depth
=
build_loss
(
loss_direct_depth
)
self
.
loss_keypoints_depth
=
build_loss
(
loss_keypoints_depth
)
self
.
loss_combined_depth
=
build_loss
(
loss_combined_depth
)
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
def
_init_edge_module
(
self
):
"""Initialize edge fusion module for feature extraction."""
self
.
edge_fuse_cls
=
EdgeFusionModule
(
self
.
num_classes
,
256
)
for
i
in
range
(
len
(
self
.
edge_fusion_inds
)):
reg_inds
,
out_inds
=
self
.
edge_fusion_inds
[
i
]
out_channels
=
self
.
group_reg_dims
[
reg_inds
][
out_inds
]
fusion_layer
=
EdgeFusionModule
(
out_channels
,
256
)
layer_name
=
f
'edge_fuse_reg_
{
reg_inds
}
_
{
out_inds
}
'
self
.
add_module
(
layer_name
,
fusion_layer
)
def
init_weights
(
self
):
"""Initialize weights."""
super
().
init_weights
()
self
.
conv_cls
.
bias
.
data
.
fill_
(
self
.
init_bias
)
xavier_init
(
self
.
conv_regs
[
4
][
0
],
gain
=
0.01
)
xavier_init
(
self
.
conv_regs
[
7
][
0
],
gain
=
0.01
)
for
m
in
self
.
conv_regs
.
modules
():
if
isinstance
(
m
,
nn
.
Conv2d
):
if
m
.
bias
is
not
None
:
nn
.
init
.
constant_
(
m
.
bias
,
0
)
def
_init_predictor
(
self
):
"""Initialize predictor layers of the head."""
self
.
conv_cls_prev
=
self
.
_init_branch
(
conv_channels
=
self
.
cls_branch
,
conv_strides
=
(
1
,
)
*
len
(
self
.
cls_branch
))
self
.
conv_cls
=
nn
.
Conv2d
(
self
.
cls_branch
[
-
1
],
self
.
cls_out_channels
,
1
)
# init regression head
self
.
conv_reg_prevs
=
nn
.
ModuleList
()
# init output head
self
.
conv_regs
=
nn
.
ModuleList
()
# group_reg_dims:
# ((4, ), (2, ), (20, ), (3, ), (3, ), (8, 8), (1, ), (1, ))
for
i
in
range
(
len
(
self
.
group_reg_dims
)):
reg_dims
=
self
.
group_reg_dims
[
i
]
reg_branch_channels
=
self
.
reg_branch
[
i
]
out_channel
=
self
.
out_channels
[
i
]
reg_list
=
nn
.
ModuleList
()
if
len
(
reg_branch_channels
)
>
0
:
self
.
conv_reg_prevs
.
append
(
self
.
_init_branch
(
conv_channels
=
reg_branch_channels
,
conv_strides
=
(
1
,
)
*
len
(
reg_branch_channels
)))
for
reg_dim
in
reg_dims
:
reg_list
.
append
(
nn
.
Conv2d
(
out_channel
,
reg_dim
,
1
))
self
.
conv_regs
.
append
(
reg_list
)
else
:
self
.
conv_reg_prevs
.
append
(
None
)
for
reg_dim
in
reg_dims
:
reg_list
.
append
(
nn
.
Conv2d
(
self
.
feat_channels
,
reg_dim
,
1
))
self
.
conv_regs
.
append
(
reg_list
)
def
_init_layers
(
self
):
"""Initialize layers of the head."""
self
.
_init_predictor
()
if
self
.
use_edge_fusion
:
self
.
_init_edge_module
()
def
forward_train
(
self
,
x
,
input_metas
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
,
gt_bboxes_ignore
,
proposal_cfg
,
**
kwargs
):
"""
Args:
x (list[Tensor]): Features from FPN.
input_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes (list[Tensor]): Ground truth bboxes of the image,
shape (num_gts, 4).
gt_labels (list[Tensor]): Ground truth labels of each box,
shape (num_gts,).
gt_bboxes_3d (list[Tensor]): 3D ground truth bboxes of the image,
shape (num_gts, self.bbox_code_size).
gt_labels_3d (list[Tensor]): 3D ground truth labels of each box,
shape (num_gts,).
centers2d (list[Tensor]): Projected 3D center of each box,
shape (num_gts, 2).
depths (list[Tensor]): Depth of projected 3D center of each box,
shape (num_gts,).
attr_labels (list[Tensor]): Attribute labels of each box,
shape (num_gts,).
gt_bboxes_ignore (list[Tensor]): Ground truth bboxes to be
ignored, shape (num_ignored_gts, 4).
proposal_cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used
Returns:
tuple:
losses: (dict[str, Tensor]): A dictionary of loss components.
proposal_list (list[Tensor]): Proposals of each image.
"""
outs
=
self
(
x
,
input_metas
)
if
gt_labels
is
None
:
loss_inputs
=
outs
+
(
gt_bboxes
,
gt_bboxes_3d
,
centers2d
,
depths
,
attr_labels
,
input_metas
)
else
:
loss_inputs
=
outs
+
(
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
,
input_metas
)
losses
=
self
.
loss
(
*
loss_inputs
,
gt_bboxes_ignore
=
gt_bboxes_ignore
)
if
proposal_cfg
is
None
:
return
losses
else
:
proposal_list
=
self
.
get_bboxes
(
*
outs
,
input_metas
,
cfg
=
proposal_cfg
)
return
losses
,
proposal_list
def
forward
(
self
,
feats
,
input_metas
):
"""Forward features from the upstream network.
Args:
feats (list[Tensor]): Features from the upstream network, each is
a 4D-tensor.
input_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
"""
mlvl_input_metas
=
[
input_metas
for
i
in
range
(
len
(
feats
))]
return
multi_apply
(
self
.
forward_single
,
feats
,
mlvl_input_metas
)
def
forward_single
(
self
,
x
,
input_metas
):
"""Forward features of a single scale level.
Args:
x (Tensor): Feature maps from a specific FPN feature level.
input_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple: Scores for each class, bbox predictions.
"""
img_h
,
img_w
=
input_metas
[
0
][
'pad_shape'
][:
2
]
batch_size
,
_
,
feat_h
,
feat_w
=
x
.
shape
downsample_ratio
=
img_h
/
feat_h
for
conv_cls_prev_layer
in
self
.
conv_cls_prev
:
cls_feat
=
conv_cls_prev_layer
(
x
)
out_cls
=
self
.
conv_cls
(
cls_feat
)
if
self
.
use_edge_fusion
:
# calculate the edge indices for the batch data
edge_indices_list
=
get_edge_indices
(
input_metas
,
downsample_ratio
,
device
=
x
.
device
)
edge_lens
=
[
edge_indices
.
shape
[
0
]
for
edge_indices
in
edge_indices_list
]
max_edge_len
=
max
(
edge_lens
)
edge_indices
=
x
.
new_zeros
((
batch_size
,
max_edge_len
,
2
),
dtype
=
torch
.
long
)
for
i
in
range
(
batch_size
):
edge_indices
[
i
,
:
edge_lens
[
i
]]
=
edge_indices_list
[
i
]
# cls feature map edge fusion
out_cls
=
self
.
edge_fuse_cls
(
cls_feat
,
out_cls
,
edge_indices
,
edge_lens
,
feat_h
,
feat_w
)
bbox_pred
=
[]
for
i
in
range
(
len
(
self
.
group_reg_dims
)):
reg_feat
=
x
.
clone
()
# feature regression head
if
len
(
self
.
reg_branch
[
i
])
>
0
:
for
conv_reg_prev_layer
in
self
.
conv_reg_prevs
[
i
]:
reg_feat
=
conv_reg_prev_layer
(
reg_feat
)
for
j
,
conv_reg
in
enumerate
(
self
.
conv_regs
[
i
]):
out_reg
=
conv_reg
(
reg_feat
)
# Use Edge Fusion Module
if
self
.
use_edge_fusion
and
(
i
,
j
)
in
self
.
edge_fusion_inds
:
# reg feature map edge fusion
out_reg
=
getattr
(
self
,
'edge_fuse_reg_{}_{}'
.
format
(
i
,
j
))(
reg_feat
,
out_reg
,
edge_indices
,
edge_lens
,
feat_h
,
feat_w
)
bbox_pred
.
append
(
out_reg
)
bbox_pred
=
torch
.
cat
(
bbox_pred
,
dim
=
1
)
cls_score
=
out_cls
.
sigmoid
()
# turn to 0-1
cls_score
=
cls_score
.
clamp
(
min
=
1e-4
,
max
=
1
-
1e-4
)
return
cls_score
,
bbox_pred
def
get_bboxes
(
self
,
cls_scores
,
bbox_preds
,
input_metas
):
"""Generate bboxes from bbox head predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level.
bbox_preds (list[Tensor]): Box regression for each scale.
input_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
rescale (bool): If True, return boxes in original image space.
Returns:
list[tuple[:obj:`CameraInstance3DBoxes`, Tensor, Tensor, None]]:
Each item in result_list is 4-tuple.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
1
cam2imgs
=
torch
.
stack
([
cls_scores
[
0
].
new_tensor
(
input_meta
[
'cam2img'
])
for
input_meta
in
input_metas
])
batch_bboxes
,
batch_scores
,
batch_topk_labels
=
self
.
decode_heatmap
(
cls_scores
[
0
],
bbox_preds
[
0
],
input_metas
,
cam2imgs
=
cam2imgs
,
topk
=
100
,
kernel
=
3
)
result_list
=
[]
for
img_id
in
range
(
len
(
input_metas
)):
bboxes
=
batch_bboxes
[
img_id
]
scores
=
batch_scores
[
img_id
]
labels
=
batch_topk_labels
[
img_id
]
keep_idx
=
scores
>
0.25
bboxes
=
bboxes
[
keep_idx
]
scores
=
scores
[
keep_idx
]
labels
=
labels
[
keep_idx
]
bboxes
=
input_metas
[
img_id
][
'box_type_3d'
](
bboxes
,
box_dim
=
self
.
bbox_code_size
,
origin
=
(
0.5
,
0.5
,
0.5
))
attrs
=
None
result_list
.
append
((
bboxes
,
scores
,
labels
,
attrs
))
return
result_list
def
decode_heatmap
(
self
,
cls_score
,
reg_pred
,
input_metas
,
cam2imgs
,
topk
=
100
,
kernel
=
3
):
"""Transform outputs into detections raw bbox predictions.
Args:
class_score (Tensor): Center predict heatmap,
shape (B, num_classes, H, W).
reg_pred (Tensor): Box regression map.
shape (B, channel, H , W).
input_metas (List[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
cam2imgs (Tensor): Camera intrinsic matrix.
shape (N, 4, 4)
topk (int, optional): Get top k center keypoints from heatmap.
Default 100.
kernel (int, optional): Max pooling kernel for extract local
maximum pixels. Default 3.
Returns:
tuple[torch.Tensor]: Decoded output of SMOKEHead, containing
the following Tensors:
- batch_bboxes (Tensor): Coords of each 3D box.
shape (B, k, 7)
- batch_scores (Tensor): Scores of each 3D box.
shape (B, k)
- batch_topk_labels (Tensor): Categories of each 3D box.
shape (B, k)
"""
img_h
,
img_w
=
input_metas
[
0
][
'pad_shape'
][:
2
]
batch_size
,
_
,
feat_h
,
feat_w
=
cls_score
.
shape
downsample_ratio
=
img_h
/
feat_h
center_heatmap_pred
=
get_local_maximum
(
cls_score
,
kernel
=
kernel
)
*
batch_dets
,
topk_ys
,
topk_xs
=
get_topk_from_heatmap
(
center_heatmap_pred
,
k
=
topk
)
batch_scores
,
batch_index
,
batch_topk_labels
=
batch_dets
regression
=
transpose_and_gather_feat
(
reg_pred
,
batch_index
)
regression
=
regression
.
view
(
-
1
,
8
)
pred_base_centers2d
=
torch
.
cat
(
[
topk_xs
.
view
(
-
1
,
1
),
topk_ys
.
view
(
-
1
,
1
).
float
()],
dim
=
1
)
preds
=
self
.
bbox_coder
.
decode
(
regression
,
batch_topk_labels
,
downsample_ratio
,
cam2imgs
)
pred_locations
=
self
.
bbox_coder
.
decode_location
(
pred_base_centers2d
,
preds
[
'offsets2d'
],
preds
[
'combined_depth'
],
cam2imgs
,
downsample_ratio
)
pred_yaws
=
self
.
bbox_coder
.
decode_orientation
(
preds
[
'orientations'
]).
unsqueeze
(
-
1
)
pred_dims
=
preds
[
'dimensions'
]
batch_bboxes
=
torch
.
cat
((
pred_locations
,
pred_dims
,
pred_yaws
),
dim
=
1
)
batch_bboxes
=
batch_bboxes
.
view
(
batch_size
,
-
1
,
self
.
bbox_code_size
)
return
batch_bboxes
,
batch_scores
,
batch_topk_labels
def
get_predictions
(
self
,
pred_reg
,
labels3d
,
centers2d
,
reg_mask
,
batch_indices
,
input_metas
,
downsample_ratio
):
"""Prepare predictions for computing loss.
Args:
pred_reg (Tensor): Box regression map.
shape (B, channel, H , W).
labels3d (Tensor): Labels of each 3D box.
shape (B * max_objs, )
centers2d (Tensor): Coords of each projected 3D box
center on image. shape (N, 2)
reg_mask (Tensor): Indexes of the existence of the 3D box.
shape (B * max_objs, )
batch_indices (Tenosr): Batch indices of the 3D box.
shape (N, 3)
input_metas (list[dict]): Meta information of each image,
e.g., image size, scaling factor, etc.
downsample_ratio (int): The stride of feature map.
Returns:
dict: The predictions for computing loss.
"""
batch
,
channel
=
pred_reg
.
shape
[
0
],
pred_reg
.
shape
[
1
]
w
=
pred_reg
.
shape
[
3
]
cam2imgs
=
torch
.
stack
([
centers2d
.
new_tensor
(
input_meta
[
'cam2img'
])
for
input_meta
in
input_metas
])
# (batch_size, 4, 4) -> (N, 4, 4)
cam2imgs
=
cam2imgs
[
batch_indices
,
:,
:]
centers2d_inds
=
centers2d
[:,
1
]
*
w
+
centers2d
[:,
0
]
centers2d_inds
=
centers2d_inds
.
view
(
batch
,
-
1
)
pred_regression
=
transpose_and_gather_feat
(
pred_reg
,
centers2d_inds
)
pred_regression_pois
=
pred_regression
.
view
(
-
1
,
channel
)[
reg_mask
]
preds
=
self
.
bbox_coder
.
decode
(
pred_regression_pois
,
labels3d
,
downsample_ratio
,
cam2imgs
)
return
preds
def
get_targets
(
self
,
gt_bboxes_list
,
gt_labels_list
,
gt_bboxes_3d_list
,
gt_labels_3d_list
,
centers2d_list
,
depths_list
,
feat_shape
,
img_shape
,
input_metas
):
"""Get training targets for batch images.
``
Args:
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each
image, shape (num_gt, 4).
gt_labels_list (list[Tensor]): Ground truth labels of each
box, shape (num_gt,).
gt_bboxes_3d_list (list[:obj:`CameraInstance3DBoxes`]): 3D
Ground truth bboxes of each image,
shape (num_gt, bbox_code_size).
gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of
each box, shape (num_gt,).
centers2d_list (list[Tensor]): Projected 3D centers onto 2D
image, shape (num_gt, 2).
depths_list (list[Tensor]): Depth of projected 3D centers onto 2D
image, each has shape (num_gt, 1).
feat_shape (tuple[int]): Feature map shape with value,
shape (B, _, H, W).
img_shape (tuple[int]): Image shape in [h, w] format.
input_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple[Tensor, dict]: The Tensor value is the targets of
center heatmap, the dict has components below:
- base_centers2d_target (Tensor): Coords of each projected 3D box
center on image. shape (B * max_objs, 2), [dtype: int]
- labels3d (Tensor): Labels of each 3D box.
shape (N, )
- reg_mask (Tensor): Mask of the existence of the 3D box.
shape (B * max_objs, )
- batch_indices (Tensor): Batch id of the 3D box.
shape (N, )
- depth_target (Tensor): Depth target of each 3D box.
shape (N, )
- keypoints2d_target (Tensor): Keypoints of each projected 3D box
on image. shape (N, 10, 2)
- keypoints_mask (Tensor): Keypoints mask of each projected 3D
box on image. shape (N, 10)
- keypoints_depth_mask (Tensor): Depths decoded from keypoints
of each 3D box. shape (N, 3)
- orientations_target (Tensor): Orientation (encoded local yaw)
target of each 3D box. shape (N, )
- offsets2d_target (Tensor): Offsets target of each projected
3D box. shape (N, 2)
- dimensions_target (Tensor): Dimensions target of each 3D box.
shape (N, 3)
- downsample_ratio (int): The stride of feature map.
"""
img_h
,
img_w
=
img_shape
[:
2
]
batch_size
,
_
,
feat_h
,
feat_w
=
feat_shape
width_ratio
=
float
(
feat_w
/
img_w
)
# 1/4
height_ratio
=
float
(
feat_h
/
img_h
)
# 1/4
assert
width_ratio
==
height_ratio
# Whether to filter the objects which are not in FOV.
if
self
.
filter_outside_objs
:
filter_outside_objs
(
gt_bboxes_list
,
gt_labels_list
,
gt_bboxes_3d_list
,
gt_labels_3d_list
,
centers2d_list
,
input_metas
)
# transform centers2d to base centers2d for regression and
# heatmap generation.
# centers2d = int(base_centers2d) + offsets2d
base_centers2d_list
,
offsets2d_list
,
trunc_mask_list
=
\
handle_proj_objs
(
centers2d_list
,
gt_bboxes_list
,
input_metas
)
keypoints2d_list
,
keypoints_mask_list
,
keypoints_depth_mask_list
=
\
get_keypoints
(
gt_bboxes_3d_list
,
centers2d_list
,
input_metas
)
center_heatmap_target
=
gt_bboxes_list
[
-
1
].
new_zeros
(
[
batch_size
,
self
.
num_classes
,
feat_h
,
feat_w
])
for
batch_id
in
range
(
batch_size
):
# project gt_bboxes from input image to feat map
gt_bboxes
=
gt_bboxes_list
[
batch_id
]
*
width_ratio
gt_labels
=
gt_labels_list
[
batch_id
]
# project base centers2d from input image to feat map
gt_base_centers2d
=
base_centers2d_list
[
batch_id
]
*
width_ratio
trunc_masks
=
trunc_mask_list
[
batch_id
]
for
j
,
base_center2d
in
enumerate
(
gt_base_centers2d
):
if
trunc_masks
[
j
]:
# for outside objects, generate ellipse heatmap
base_center2d_x_int
,
base_center2d_y_int
=
\
base_center2d
.
int
()
scale_box_w
=
min
(
base_center2d_x_int
-
gt_bboxes
[
j
][
0
],
gt_bboxes
[
j
][
2
]
-
base_center2d_x_int
)
scale_box_h
=
min
(
base_center2d_y_int
-
gt_bboxes
[
j
][
1
],
gt_bboxes
[
j
][
3
]
-
base_center2d_y_int
)
radius_x
=
scale_box_w
*
self
.
edge_heatmap_ratio
radius_y
=
scale_box_h
*
self
.
edge_heatmap_ratio
radius_x
,
radius_y
=
max
(
0
,
int
(
radius_x
)),
max
(
0
,
int
(
radius_y
))
assert
min
(
radius_x
,
radius_y
)
==
0
ind
=
gt_labels
[
j
]
get_ellip_gaussian_2D
(
center_heatmap_target
[
batch_id
,
ind
],
[
base_center2d_x_int
,
base_center2d_y_int
],
radius_x
,
radius_y
)
else
:
base_center2d_x_int
,
base_center2d_y_int
=
\
base_center2d
.
int
()
scale_box_h
=
(
gt_bboxes
[
j
][
3
]
-
gt_bboxes
[
j
][
1
])
scale_box_w
=
(
gt_bboxes
[
j
][
2
]
-
gt_bboxes
[
j
][
0
])
radius
=
gaussian_radius
([
scale_box_h
,
scale_box_w
],
min_overlap
=
0.7
)
radius
=
max
(
0
,
int
(
radius
))
ind
=
gt_labels
[
j
]
gen_gaussian_target
(
center_heatmap_target
[
batch_id
,
ind
],
[
base_center2d_x_int
,
base_center2d_y_int
],
radius
)
avg_factor
=
max
(
1
,
center_heatmap_target
.
eq
(
1
).
sum
())
num_ctrs
=
[
centers2d
.
shape
[
0
]
for
centers2d
in
centers2d_list
]
max_objs
=
max
(
num_ctrs
)
batch_indices
=
[
centers2d_list
[
0
].
new_full
((
num_ctrs
[
i
],
),
i
)
for
i
in
range
(
batch_size
)
]
batch_indices
=
torch
.
cat
(
batch_indices
,
dim
=
0
)
reg_mask
=
torch
.
zeros
(
(
batch_size
,
max_objs
),
dtype
=
torch
.
bool
).
to
(
base_centers2d_list
[
0
].
device
)
gt_bboxes_3d
=
input_metas
[
'box_type_3d'
].
cat
(
gt_bboxes_3d_list
)
gt_bboxes_3d
=
gt_bboxes_3d
.
to
(
base_centers2d_list
[
0
].
device
)
# encode original local yaw to multibin format
orienations_target
=
self
.
bbox_coder
.
encode
(
gt_bboxes_3d
)
batch_base_centers2d
=
base_centers2d_list
[
0
].
new_zeros
(
(
batch_size
,
max_objs
,
2
))
for
i
in
range
(
batch_size
):
reg_mask
[
i
,
:
num_ctrs
[
i
]]
=
1
batch_base_centers2d
[
i
,
:
num_ctrs
[
i
]]
=
base_centers2d_list
[
i
]
flatten_reg_mask
=
reg_mask
.
flatten
()
# transform base centers2d from input scale to output scale
batch_base_centers2d
=
batch_base_centers2d
.
view
(
-
1
,
2
)
*
width_ratio
dimensions_target
=
gt_bboxes_3d
.
tensor
[:,
3
:
6
]
labels_3d
=
torch
.
cat
(
gt_labels_3d_list
)
keypoints2d_target
=
torch
.
cat
(
keypoints2d_list
)
keypoints_mask
=
torch
.
cat
(
keypoints_mask_list
)
keypoints_depth_mask
=
torch
.
cat
(
keypoints_depth_mask_list
)
offsets2d_target
=
torch
.
cat
(
offsets2d_list
)
bboxes2d
=
torch
.
cat
(
gt_bboxes_list
)
# transform FCOS style bbox into [x1, y1, x2, y2] format.
bboxes2d_target
=
torch
.
cat
([
bboxes2d
[:,
0
:
2
]
*
-
1
,
bboxes2d
[:,
2
:]],
dim
=-
1
)
depths
=
torch
.
cat
(
depths_list
)
target_labels
=
dict
(
base_centers2d_target
=
batch_base_centers2d
.
int
(),
labels3d
=
labels_3d
,
reg_mask
=
flatten_reg_mask
,
batch_indices
=
batch_indices
,
bboxes2d_target
=
bboxes2d_target
,
depth_target
=
depths
,
keypoints2d_target
=
keypoints2d_target
,
keypoints_mask
=
keypoints_mask
,
keypoints_depth_mask
=
keypoints_depth_mask
,
orienations_target
=
orienations_target
,
offsets2d_target
=
offsets2d_target
,
dimensions_target
=
dimensions_target
,
downsample_ratio
=
1
/
width_ratio
)
return
center_heatmap_target
,
avg_factor
,
target_labels
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
,
input_metas
,
gt_bboxes_ignore
=
None
):
"""Compute loss of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level.
shape (num_gt, 4).
bbox_preds (list[Tensor]): Box dims is a 4D-tensor, the channel
number is bbox_code_size.
shape (B, 7, H, W).
gt_bboxes (list[Tensor]): Ground truth bboxes for each image.
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): Class indices corresponding to each box.
shape (num_gts, ).
gt_bboxes_3d (list[:obj:`CameraInstance3DBoxes`]): 3D boxes ground
truth. it is the flipped gt_bboxes
gt_labels_3d (list[Tensor]): Same as gt_labels.
centers2d (list[Tensor]): 2D centers on the image.
shape (num_gts, 2).
depths (list[Tensor]): Depth ground truth.
shape (num_gts, ).
attr_labels (list[Tensor]): Attributes indices of each box.
In kitti it's None.
input_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
boxes can be ignored when computing the loss.
Default: None.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
1
assert
attr_labels
is
None
assert
gt_bboxes_ignore
is
None
center2d_heatmap
=
cls_scores
[
0
]
pred_reg
=
bbox_preds
[
0
]
center2d_heatmap_target
,
avg_factor
,
target_labels
=
\
self
.
get_targets
(
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
center2d_heatmap
.
shape
,
input_metas
[
0
][
'pad_shape'
],
input_metas
)
preds
=
self
.
get_predictions
(
pred_reg
=
pred_reg
,
labels3d
=
target_labels
[
'labels3d'
],
centers2d
=
target_labels
[
'base_centers2d_target'
],
reg_mask
=
target_labels
[
'reg_mask'
],
batch_indices
=
target_labels
[
'batch_indices'
],
input_metas
=
input_metas
,
downsample_ratio
=
target_labels
[
'downsample_ratio'
])
# heatmap loss
loss_cls
=
self
.
loss_cls
(
center2d_heatmap
,
center2d_heatmap_target
,
avg_factor
=
avg_factor
)
# bbox2d regression loss
loss_bbox
=
self
.
loss_bbox
(
preds
[
'bboxes2d'
],
target_labels
[
'bboxes2d_target'
])
# keypoints loss, the keypoints in predictions and target are all
# local coordinates. Check the mask dtype should be bool, not int
# or float to ensure the indexing is bool index
keypoints2d_mask
=
target_labels
[
'keypoints2d_mask'
]
loss_keypoints
=
self
.
loss_keypoints
(
preds
[
'keypoints2d'
][
keypoints2d_mask
],
target_labels
[
'keypoints2d_target'
][
keypoints2d_mask
])
# orientations loss
loss_dir
=
self
.
loss_dir
(
preds
[
'orientations'
],
target_labels
[
'orientations_target'
])
# dimensions loss
loss_dims
=
self
.
loss_dims
(
preds
[
'dimensions'
],
target_labels
[
'dimensions_target'
])
# offsets for center heatmap
loss_offsets2d
=
self
.
loss_offsets2d
(
preds
[
'offsets2d'
],
target_labels
[
'offsets2d_target'
])
# directly regressed depth loss with direct depth uncertainty loss
direct_depth_weights
=
torch
.
exp
(
-
preds
[
'direct_depth_uncertainty'
])
loss_weight_1
=
self
.
loss_direct_depth
.
loss_weight
loss_direct_depth
=
self
.
loss_direct_depth
(
preds
[
'direct_depth'
],
target_labels
[
'depth_target'
],
direct_depth_weights
)
loss_uncertainty_1
=
\
preds
[
'direct_depth_uncertainty'
]
*
loss_weight_1
loss_direct_depth
=
loss_direct_depth
+
loss_uncertainty_1
.
mean
()
# keypoints decoded depth loss with keypoints depth uncertainty loss
depth_mask
=
target_labels
[
'keypoints_depth_mask'
]
depth_target
=
target_labels
[
'depth_target'
].
unsqueeze
(
-
1
).
repeat
(
1
,
3
)
valid_keypoints_depth_uncertainty
=
preds
[
'keypoints_depth_uncertainty'
][
depth_mask
]
valid_keypoints_depth_weights
=
torch
.
exp
(
-
valid_keypoints_depth_uncertainty
)
loss_keypoints_depth
=
self
.
loss_keypoint_depth
(
preds
[
'keypoints_depth'
][
depth_mask
],
depth_target
[
depth_mask
],
valid_keypoints_depth_weights
)
loss_weight_2
=
self
.
loss_keypoints_depth
.
loss_weight
loss_uncertainty_2
=
\
valid_keypoints_depth_uncertainty
*
loss_weight_2
loss_keypoints_depth
=
loss_keypoints_depth
+
loss_uncertainty_2
.
mean
()
# combined depth loss for optimiaze the uncertainty
loss_combined_depth
=
self
.
loss_combined_depth
(
preds
[
'combined_depth'
],
target_labels
[
'depth_target'
])
loss_dict
=
dict
(
loss_cls
=
loss_cls
,
loss_bbox
=
loss_bbox
,
loss_keypoints
=
loss_keypoints
,
loss_dir
=
loss_dir
,
loss_dims
=
loss_dims
,
loss_offsets2d
=
loss_offsets2d
,
loss_direct_depth
=
loss_direct_depth
,
loss_keypoints_depth
=
loss_keypoints_depth
,
loss_combined_depth
=
loss_combined_depth
)
return
loss_dict
mmdet3d/models/dense_heads/parta2_rpn_head.py
View file @
32a4328b
...
...
@@ -60,15 +60,15 @@ class PartA2RPNHead(Anchor3DHead):
type
=
'Anchor3DRangeGenerator'
,
range
=
[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
sizes
=
[[
3.9
,
1.6
,
1.56
]],
rotations
=
[
0
,
1.57
],
custom_values
=
[],
reshape_out
=
False
),
assigner_per_size
=
False
,
assign_per_class
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0
,
dir_limit_offset
=
1
,
dir_offset
=
-
np
.
pi
/
2
,
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
...
...
@@ -100,20 +100,20 @@ class PartA2RPNHead(Anchor3DHead):
bbox_preds (list[torch.Tensor]): Multi-level bbox predictions.
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes
\
gt_bboxes (list[:obj:`BaseInstance3DBoxes`]): Ground truth boxes
of each sample.
gt_labels (list[torch.Tensor]): Labels of each sample.
input_metas (list[dict]): Point cloud and image's meta info.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
Returns:
dict[str, list[torch.Tensor]]: Classification, bbox, and
\
dict[str, list[torch.Tensor]]: Classification, bbox, and
direction losses of each level.
- loss_rpn_cls (list[torch.Tensor]): Classification losses.
- loss_rpn_bbox (list[torch.Tensor]): Box regression losses.
- loss_rpn_dir (list[torch.Tensor]): Direction classification
\
- loss_rpn_dir (list[torch.Tensor]): Direction classification
losses.
"""
loss_dict
=
super
().
loss
(
cls_scores
,
bbox_preds
,
dir_cls_preds
,
...
...
@@ -143,7 +143,7 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_anchors (List[torch.Tensor]): Multi-level anchors
in single batch.
input_meta (list[dict]): Contain pcd and img's meta info.
cfg (
None |
:obj:`ConfigDict`): Training or testing config.
cfg (:obj:`ConfigDict`): Training or testing config.
rescale (list[torch.Tensor]): whether th rescale bbox.
Returns:
...
...
@@ -207,7 +207,7 @@ class PartA2RPNHead(Anchor3DHead):
mlvl_dir_scores
=
torch
.
cat
(
mlvl_dir_scores
)
# shape [k, num_class] before sigmoid
# PartA2 need to keep raw classification score
# becase the bbox head in the second stage does not have
# beca
u
se the bbox head in the second stage does not have
# classification branch,
# roi head need this score as classification score
mlvl_cls_score
=
torch
.
cat
(
mlvl_cls_score
)
...
...
@@ -240,7 +240,7 @@ class PartA2RPNHead(Anchor3DHead):
Multi-level bbox.
score_thr (int): Score threshold.
max_num (int): Max number of bboxes after nms.
cfg (
None |
:obj:`ConfigDict`): Training or testing config.
cfg (:obj:`ConfigDict`): Training or testing config.
input_meta (dict): Contain pcd and img's meta info.
Returns:
...
...
Prev
1
…
7
8
9
10
11
12
13
14
15
…
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment