Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
32a4328b
"examples/vscode:/vscode.git/clone" did not exist on "e5914eb320d2c30bfc84a4da926ad8298e7078fd"
Unverified
Commit
32a4328b
authored
Feb 24, 2022
by
Wenwei Zhang
Committed by
GitHub
Feb 24, 2022
Browse files
Bump version to V1.0.0rc0
Bump version to V1.0.0rc0
parents
86cc487c
a8817998
Changes
414
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2412 additions
and
129 deletions
+2412
-129
mmdet3d/models/dense_heads/pgd_head.py
mmdet3d/models/dense_heads/pgd_head.py
+1229
-0
mmdet3d/models/dense_heads/point_rpn_head.py
mmdet3d/models/dense_heads/point_rpn_head.py
+377
-0
mmdet3d/models/dense_heads/shape_aware_head.py
mmdet3d/models/dense_heads/shape_aware_head.py
+27
-24
mmdet3d/models/dense_heads/smoke_mono3d_head.py
mmdet3d/models/dense_heads/smoke_mono3d_head.py
+516
-0
mmdet3d/models/dense_heads/ssd_3d_head.py
mmdet3d/models/dense_heads/ssd_3d_head.py
+23
-44
mmdet3d/models/dense_heads/train_mixins.py
mmdet3d/models/dense_heads/train_mixins.py
+5
-3
mmdet3d/models/dense_heads/vote_head.py
mmdet3d/models/dense_heads/vote_head.py
+13
-13
mmdet3d/models/detectors/__init__.py
mmdet3d/models/detectors/__init__.py
+3
-1
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+3
-2
mmdet3d/models/detectors/centerpoint.py
mmdet3d/models/detectors/centerpoint.py
+6
-5
mmdet3d/models/detectors/groupfree3dnet.py
mmdet3d/models/detectors/groupfree3dnet.py
+3
-3
mmdet3d/models/detectors/h3dnet.py
mmdet3d/models/detectors/h3dnet.py
+3
-3
mmdet3d/models/detectors/imvotenet.py
mmdet3d/models/detectors/imvotenet.py
+12
-11
mmdet3d/models/detectors/mvx_two_stage.py
mmdet3d/models/detectors/mvx_two_stage.py
+11
-11
mmdet3d/models/detectors/point_rcnn.py
mmdet3d/models/detectors/point_rcnn.py
+148
-0
mmdet3d/models/detectors/single_stage_mono3d.py
mmdet3d/models/detectors/single_stage_mono3d.py
+5
-3
mmdet3d/models/detectors/smoke_mono3d.py
mmdet3d/models/detectors/smoke_mono3d.py
+21
-0
mmdet3d/models/detectors/votenet.py
mmdet3d/models/detectors/votenet.py
+3
-3
mmdet3d/models/fusion_layers/coord_transform.py
mmdet3d/models/fusion_layers/coord_transform.py
+2
-1
mmdet3d/models/fusion_layers/point_fusion.py
mmdet3d/models/fusion_layers/point_fusion.py
+2
-2
No files found.
mmdet3d/models/dense_heads/pgd_head.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
import
torch
from
mmcv.cnn
import
Scale
,
bias_init_with_prob
,
normal_init
from
mmcv.runner
import
force_fp32
from
torch
import
nn
as
nn
from
torch.nn
import
functional
as
F
from
mmdet3d.core
import
box3d_multiclass_nms
,
xywhr2xyxyr
from
mmdet3d.core.bbox
import
points_cam2img
,
points_img2cam
from
mmdet.core
import
distance2bbox
,
multi_apply
from
mmdet.models.builder
import
HEADS
,
build_loss
from
.fcos_mono3d_head
import
FCOSMono3DHead
@
HEADS
.
register_module
()
class
PGDHead
(
FCOSMono3DHead
):
r
"""Anchor-free head used in `PGD <https://arxiv.org/abs/2107.14160>`_.
Args:
use_depth_classifer (bool, optional): Whether to use depth classifier.
Defaults to True.
use_only_reg_proj (bool, optional): Whether to use only direct
regressed depth in the re-projection (to make the network easier
to learn). Defaults to False.
weight_dim (int, optional): Dimension of the location-aware weight
map. Defaults to -1.
weight_branch (tuple[tuple[int]], optional): Feature map channels of
the convolutional branch for weight map. Defaults to ((256, ), ).
depth_branch (tuple[int], optional): Feature map channels of the
branch for probabilistic depth estimation. Defaults to (64, ),
depth_range (tuple[float], optional): Range of depth estimation.
Defaults to (0, 70),
depth_unit (int, optional): Unit of depth range division. Defaults to
10.
division (str, optional): Depth division method. Options include
'uniform', 'linear', 'log', 'loguniform'. Defaults to 'uniform'.
depth_bins (int, optional): Discrete bins of depth division. Defaults
to 8.
loss_depth (dict, optional): Depth loss. Defaults to dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0).
loss_bbox2d (dict, optional): Loss for 2D box estimation. Defaults to
dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0).
loss_consistency (dict, optional): Consistency loss. Defaults to
dict(type='GIoULoss', loss_weight=1.0),
pred_velo (bool, optional): Whether to predict velocity. Defaults to
False.
pred_bbox2d (bool, optional): Whether to predict 2D bounding boxes.
Defaults to True.
pred_keypoints (bool, optional): Whether to predict keypoints.
Defaults to False,
bbox_coder (dict, optional): Bounding box coder. Defaults to
dict(type='PGDBBoxCoder', base_depths=((28.01, 16.32), ),
base_dims=((0.8, 1.73, 0.6), (1.76, 1.73, 0.6), (3.9, 1.56, 1.6)),
code_size=7).
"""
def
__init__
(
self
,
use_depth_classifier
=
True
,
use_onlyreg_proj
=
False
,
weight_dim
=-
1
,
weight_branch
=
((
256
,
),
),
depth_branch
=
(
64
,
),
depth_range
=
(
0
,
70
),
depth_unit
=
10
,
division
=
'uniform'
,
depth_bins
=
8
,
loss_depth
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_bbox2d
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_consistency
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
1.0
),
pred_bbox2d
=
True
,
pred_keypoints
=
False
,
bbox_coder
=
dict
(
type
=
'PGDBBoxCoder'
,
base_depths
=
((
28.01
,
16.32
),
),
base_dims
=
((
0.8
,
1.73
,
0.6
),
(
1.76
,
1.73
,
0.6
),
(
3.9
,
1.56
,
1.6
)),
code_size
=
7
),
**
kwargs
):
self
.
use_depth_classifier
=
use_depth_classifier
self
.
use_onlyreg_proj
=
use_onlyreg_proj
self
.
depth_branch
=
depth_branch
self
.
pred_keypoints
=
pred_keypoints
self
.
weight_dim
=
weight_dim
self
.
weight_branch
=
weight_branch
self
.
weight_out_channels
=
[]
for
weight_branch_channels
in
weight_branch
:
if
len
(
weight_branch_channels
)
>
0
:
self
.
weight_out_channels
.
append
(
weight_branch_channels
[
-
1
])
else
:
self
.
weight_out_channels
.
append
(
-
1
)
self
.
depth_range
=
depth_range
self
.
depth_unit
=
depth_unit
self
.
division
=
division
if
self
.
division
==
'uniform'
:
self
.
num_depth_cls
=
int
(
(
depth_range
[
1
]
-
depth_range
[
0
])
/
depth_unit
)
+
1
if
self
.
num_depth_cls
!=
depth_bins
:
print
(
'Warning: The number of bins computed from '
+
'depth_unit is different from given parameter! '
+
'Depth_unit will be considered with priority in '
+
'Uniform Division.'
)
else
:
self
.
num_depth_cls
=
depth_bins
super
().
__init__
(
pred_bbox2d
=
pred_bbox2d
,
bbox_coder
=
bbox_coder
,
**
kwargs
)
self
.
loss_depth
=
build_loss
(
loss_depth
)
if
self
.
pred_bbox2d
:
self
.
loss_bbox2d
=
build_loss
(
loss_bbox2d
)
self
.
loss_consistency
=
build_loss
(
loss_consistency
)
if
self
.
pred_keypoints
:
self
.
kpts_start
=
9
if
self
.
pred_velo
else
7
def
_init_layers
(
self
):
"""Initialize layers of the head."""
super
().
_init_layers
()
if
self
.
pred_bbox2d
:
self
.
scale_dim
+=
1
if
self
.
pred_keypoints
:
self
.
scale_dim
+=
1
self
.
scales
=
nn
.
ModuleList
([
nn
.
ModuleList
([
Scale
(
1.0
)
for
_
in
range
(
self
.
scale_dim
)])
for
_
in
self
.
strides
])
def
_init_predictor
(
self
):
"""Initialize predictor layers of the head."""
super
().
_init_predictor
()
if
self
.
use_depth_classifier
:
self
.
conv_depth_cls_prev
=
self
.
_init_branch
(
conv_channels
=
self
.
depth_branch
,
conv_strides
=
(
1
,
)
*
len
(
self
.
depth_branch
))
self
.
conv_depth_cls
=
nn
.
Conv2d
(
self
.
depth_branch
[
-
1
],
self
.
num_depth_cls
,
1
)
# Data-agnostic single param lambda for local depth fusion
self
.
fuse_lambda
=
nn
.
Parameter
(
torch
.
tensor
(
10e-5
))
if
self
.
weight_dim
!=
-
1
:
self
.
conv_weight_prevs
=
nn
.
ModuleList
()
self
.
conv_weights
=
nn
.
ModuleList
()
for
i
in
range
(
self
.
weight_dim
):
weight_branch_channels
=
self
.
weight_branch
[
i
]
weight_out_channel
=
self
.
weight_out_channels
[
i
]
if
len
(
weight_branch_channels
)
>
0
:
self
.
conv_weight_prevs
.
append
(
self
.
_init_branch
(
conv_channels
=
weight_branch_channels
,
conv_strides
=
(
1
,
)
*
len
(
weight_branch_channels
)))
self
.
conv_weights
.
append
(
nn
.
Conv2d
(
weight_out_channel
,
1
,
1
))
else
:
self
.
conv_weight_prevs
.
append
(
None
)
self
.
conv_weights
.
append
(
nn
.
Conv2d
(
self
.
feat_channels
,
1
,
1
))
def
init_weights
(
self
):
"""Initialize weights of the head.
We currently still use the customized defined init_weights because the
default init of DCN triggered by the init_cfg will init
conv_offset.weight, which mistakenly affects the training stability.
"""
super
().
init_weights
()
bias_cls
=
bias_init_with_prob
(
0.01
)
if
self
.
use_depth_classifier
:
for
m
in
self
.
conv_depth_cls_prev
:
if
isinstance
(
m
.
conv
,
nn
.
Conv2d
):
normal_init
(
m
.
conv
,
std
=
0.01
)
normal_init
(
self
.
conv_depth_cls
,
std
=
0.01
,
bias
=
bias_cls
)
if
self
.
weight_dim
!=
-
1
:
for
conv_weight_prev
in
self
.
conv_weight_prevs
:
if
conv_weight_prev
is
None
:
continue
for
m
in
conv_weight_prev
:
if
isinstance
(
m
.
conv
,
nn
.
Conv2d
):
normal_init
(
m
.
conv
,
std
=
0.01
)
for
conv_weight
in
self
.
conv_weights
:
normal_init
(
conv_weight
,
std
=
0.01
)
def
forward
(
self
,
feats
):
"""Forward features from the upstream network.
Args:
feats (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
tuple:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * 2. (bin = 2).
weight (list[Tensor]): Location-aware weight maps on each
scale level, each is a 4D-tensor, the channel number is
num_points * 1.
depth_cls_preds (list[Tensor]): Box scores for depth class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * self.num_depth_cls.
attr_preds (list[Tensor]): Attribute scores for each scale
level, each is a 4D-tensor, the channel number is
num_points * num_attrs.
centernesses (list[Tensor]): Centerness for each scale level,
each is a 4D-tensor, the channel number is num_points * 1.
"""
return
multi_apply
(
self
.
forward_single
,
feats
,
self
.
scales
,
self
.
strides
)
def
forward_single
(
self
,
x
,
scale
,
stride
):
"""Forward features of a single scale level.
Args:
x (Tensor): FPN feature maps of the specified stride.
scale (:obj: `mmcv.cnn.Scale`): Learnable scale module to resize
the bbox prediction.
stride (int): The corresponding stride for feature maps, only
used to normalize the bbox prediction when self.norm_on_bbox
is True.
Returns:
tuple: scores for each class, bbox and direction class
predictions, depth class predictions, location-aware weights,
attribute and centerness predictions of input feature maps.
"""
cls_score
,
bbox_pred
,
dir_cls_pred
,
attr_pred
,
centerness
,
cls_feat
,
\
reg_feat
=
super
().
forward_single
(
x
,
scale
,
stride
)
max_regress_range
=
stride
*
self
.
regress_ranges
[
0
][
1
]
/
\
self
.
strides
[
0
]
bbox_pred
=
self
.
bbox_coder
.
decode_2d
(
bbox_pred
,
scale
,
stride
,
max_regress_range
,
self
.
training
,
self
.
pred_keypoints
,
self
.
pred_bbox2d
)
depth_cls_pred
=
None
if
self
.
use_depth_classifier
:
clone_reg_feat
=
reg_feat
.
clone
()
for
conv_depth_cls_prev_layer
in
self
.
conv_depth_cls_prev
:
clone_reg_feat
=
conv_depth_cls_prev_layer
(
clone_reg_feat
)
depth_cls_pred
=
self
.
conv_depth_cls
(
clone_reg_feat
)
weight
=
None
if
self
.
weight_dim
!=
-
1
:
weight
=
[]
for
i
in
range
(
self
.
weight_dim
):
clone_reg_feat
=
reg_feat
.
clone
()
if
len
(
self
.
weight_branch
[
i
])
>
0
:
for
conv_weight_prev_layer
in
self
.
conv_weight_prevs
[
i
]:
clone_reg_feat
=
conv_weight_prev_layer
(
clone_reg_feat
)
weight
.
append
(
self
.
conv_weights
[
i
](
clone_reg_feat
))
weight
=
torch
.
cat
(
weight
,
dim
=
1
)
return
cls_score
,
bbox_pred
,
dir_cls_pred
,
depth_cls_pred
,
weight
,
\
attr_pred
,
centerness
def
get_proj_bbox2d
(
self
,
bbox_preds
,
pos_dir_cls_preds
,
labels_3d
,
bbox_targets_3d
,
pos_points
,
pos_inds
,
img_metas
,
pos_depth_cls_preds
=
None
,
pos_weights
=
None
,
pos_cls_scores
=
None
,
with_kpts
=
False
):
"""Decode box predictions and get projected 2D attributes.
Args:
bbox_preds (list[Tensor]): Box predictions for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
pos_dir_cls_preds (Tensor): Box scores for direction class
predictions of positive boxes on all the scale levels in shape
(num_pos_points, 2).
labels_3d (list[Tensor]): 3D box category labels for each scale
level, each is a 4D-tensor.
bbox_targets_3d (list[Tensor]): 3D box targets for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
pos_points (Tensor): Foreground points.
pos_inds (Tensor): Index of foreground points from flattened
tensors.
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
pos_depth_cls_preds (Tensor, optional): Probabilistic depth map of
positive boxes on all the scale levels in shape
(num_pos_points, self.num_depth_cls). Defaults to None.
pos_weights (Tensor, optional): Location-aware weights of positive
boxes in shape (num_pos_points, self.weight_dim). Defaults to
None.
pos_cls_scores (Tensor, optional): Classification scores of
positive boxes in shape (num_pos_points, self.num_classes).
Defaults to None.
with_kpts (bool, optional): Whether to output keypoints targets.
Defaults to False.
Returns:
tuple[Tensor]: Exterior 2D boxes from projected 3D boxes,
predicted 2D boxes and keypoint targets (if necessary).
"""
views
=
[
np
.
array
(
img_meta
[
'cam2img'
])
for
img_meta
in
img_metas
]
num_imgs
=
len
(
img_metas
)
img_idx
=
[]
for
label
in
labels_3d
:
for
idx
in
range
(
num_imgs
):
img_idx
.
append
(
labels_3d
[
0
].
new_ones
(
int
(
len
(
label
)
/
num_imgs
))
*
idx
)
img_idx
=
torch
.
cat
(
img_idx
)
pos_img_idx
=
img_idx
[
pos_inds
]
flatten_strided_bbox_preds
=
[]
flatten_strided_bbox2d_preds
=
[]
flatten_bbox_targets_3d
=
[]
flatten_strides
=
[]
for
stride_idx
,
bbox_pred
in
enumerate
(
bbox_preds
):
flatten_bbox_pred
=
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
sum
(
self
.
group_reg_dims
))
flatten_bbox_pred
[:,
:
2
]
*=
self
.
strides
[
stride_idx
]
flatten_bbox_pred
[:,
-
4
:]
*=
self
.
strides
[
stride_idx
]
flatten_strided_bbox_preds
.
append
(
flatten_bbox_pred
[:,
:
self
.
bbox_coder
.
bbox_code_size
])
flatten_strided_bbox2d_preds
.
append
(
flatten_bbox_pred
[:,
-
4
:])
bbox_target_3d
=
bbox_targets_3d
[
stride_idx
].
clone
()
bbox_target_3d
[:,
:
2
]
*=
self
.
strides
[
stride_idx
]
bbox_target_3d
[:,
-
4
:]
*=
self
.
strides
[
stride_idx
]
flatten_bbox_targets_3d
.
append
(
bbox_target_3d
)
flatten_stride
=
flatten_bbox_pred
.
new_ones
(
*
flatten_bbox_pred
.
shape
[:
-
1
],
1
)
*
self
.
strides
[
stride_idx
]
flatten_strides
.
append
(
flatten_stride
)
flatten_strided_bbox_preds
=
torch
.
cat
(
flatten_strided_bbox_preds
)
flatten_strided_bbox2d_preds
=
torch
.
cat
(
flatten_strided_bbox2d_preds
)
flatten_bbox_targets_3d
=
torch
.
cat
(
flatten_bbox_targets_3d
)
flatten_strides
=
torch
.
cat
(
flatten_strides
)
pos_strided_bbox_preds
=
flatten_strided_bbox_preds
[
pos_inds
]
pos_strided_bbox2d_preds
=
flatten_strided_bbox2d_preds
[
pos_inds
]
pos_bbox_targets_3d
=
flatten_bbox_targets_3d
[
pos_inds
]
pos_strides
=
flatten_strides
[
pos_inds
]
pos_decoded_bbox2d_preds
=
distance2bbox
(
pos_points
,
pos_strided_bbox2d_preds
)
pos_strided_bbox_preds
[:,
:
2
]
=
\
pos_points
-
pos_strided_bbox_preds
[:,
:
2
]
pos_bbox_targets_3d
[:,
:
2
]
=
\
pos_points
-
pos_bbox_targets_3d
[:,
:
2
]
if
self
.
use_depth_classifier
and
(
not
self
.
use_onlyreg_proj
):
pos_prob_depth_preds
=
self
.
bbox_coder
.
decode_prob_depth
(
pos_depth_cls_preds
,
self
.
depth_range
,
self
.
depth_unit
,
self
.
division
,
self
.
num_depth_cls
)
sig_alpha
=
torch
.
sigmoid
(
self
.
fuse_lambda
)
pos_strided_bbox_preds
[:,
2
]
=
\
sig_alpha
*
pos_strided_bbox_preds
.
clone
()[:,
2
]
+
\
(
1
-
sig_alpha
)
*
pos_prob_depth_preds
box_corners_in_image
=
pos_strided_bbox_preds
.
new_zeros
(
(
*
pos_strided_bbox_preds
.
shape
[:
-
1
],
8
,
2
))
box_corners_in_image_gt
=
pos_strided_bbox_preds
.
new_zeros
(
(
*
pos_strided_bbox_preds
.
shape
[:
-
1
],
8
,
2
))
for
idx
in
range
(
num_imgs
):
mask
=
(
pos_img_idx
==
idx
)
if
pos_strided_bbox_preds
[
mask
].
shape
[
0
]
==
0
:
continue
cam2img
=
torch
.
eye
(
4
,
dtype
=
pos_strided_bbox_preds
.
dtype
,
device
=
pos_strided_bbox_preds
.
device
)
view_shape
=
views
[
idx
].
shape
cam2img
[:
view_shape
[
0
],
:
view_shape
[
1
]]
=
\
pos_strided_bbox_preds
.
new_tensor
(
views
[
idx
])
centers2d_preds
=
pos_strided_bbox_preds
.
clone
()[
mask
,
:
2
]
centers2d_targets
=
pos_bbox_targets_3d
.
clone
()[
mask
,
:
2
]
centers3d_targets
=
points_img2cam
(
pos_bbox_targets_3d
[
mask
,
:
3
],
views
[
idx
])
# use predicted depth to re-project the 2.5D centers
pos_strided_bbox_preds
[
mask
,
:
3
]
=
points_img2cam
(
pos_strided_bbox_preds
[
mask
,
:
3
],
views
[
idx
])
pos_bbox_targets_3d
[
mask
,
:
3
]
=
centers3d_targets
# depth fixed when computing re-project 3D bboxes
pos_strided_bbox_preds
[
mask
,
2
]
=
\
pos_bbox_targets_3d
.
clone
()[
mask
,
2
]
# decode yaws
if
self
.
use_direction_classifier
:
pos_dir_cls_scores
=
torch
.
max
(
pos_dir_cls_preds
[
mask
],
dim
=-
1
)[
1
]
pos_strided_bbox_preds
[
mask
]
=
self
.
bbox_coder
.
decode_yaw
(
pos_strided_bbox_preds
[
mask
],
centers2d_preds
,
pos_dir_cls_scores
,
self
.
dir_offset
,
cam2img
)
pos_bbox_targets_3d
[
mask
,
6
]
=
torch
.
atan2
(
centers2d_targets
[:,
0
]
-
cam2img
[
0
,
2
],
cam2img
[
0
,
0
])
+
pos_bbox_targets_3d
[
mask
,
6
]
corners
=
img_metas
[
0
][
'box_type_3d'
](
pos_strided_bbox_preds
[
mask
],
box_dim
=
self
.
bbox_coder
.
bbox_code_size
,
origin
=
(
0.5
,
0.5
,
0.5
)).
corners
box_corners_in_image
[
mask
]
=
points_cam2img
(
corners
,
cam2img
)
corners_gt
=
img_metas
[
0
][
'box_type_3d'
](
pos_bbox_targets_3d
[
mask
,
:
self
.
bbox_code_size
],
box_dim
=
self
.
bbox_coder
.
bbox_code_size
,
origin
=
(
0.5
,
0.5
,
0.5
)).
corners
box_corners_in_image_gt
[
mask
]
=
points_cam2img
(
corners_gt
,
cam2img
)
minxy
=
torch
.
min
(
box_corners_in_image
,
dim
=
1
)[
0
]
maxxy
=
torch
.
max
(
box_corners_in_image
,
dim
=
1
)[
0
]
proj_bbox2d_preds
=
torch
.
cat
([
minxy
,
maxxy
],
dim
=
1
)
outputs
=
(
proj_bbox2d_preds
,
pos_decoded_bbox2d_preds
)
if
with_kpts
:
norm_strides
=
pos_strides
*
self
.
regress_ranges
[
0
][
1
]
/
\
self
.
strides
[
0
]
kpts_targets
=
box_corners_in_image_gt
-
pos_points
[...,
None
,
:]
kpts_targets
=
kpts_targets
.
view
(
(
*
pos_strided_bbox_preds
.
shape
[:
-
1
],
16
))
kpts_targets
/=
norm_strides
outputs
+=
(
kpts_targets
,
)
return
outputs
def
get_pos_predictions
(
self
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
pos_inds
,
img_metas
):
"""Flatten predictions and get positive ones.
Args:
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * 2. (bin = 2)
depth_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * self.num_depth_cls.
attr_preds (list[Tensor]): Attribute scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_attrs.
centernesses (list[Tensor]): Centerness for each scale level, each
is a 4D-tensor, the channel number is num_points * 1.
pos_inds (Tensor): Index of foreground points from flattened
tensors.
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple[Tensor]: Box predictions, direction classes, probabilistic
depth maps, location-aware weight maps, attributes and
centerness predictions.
"""
flatten_bbox_preds
=
[
bbox_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
sum
(
self
.
group_reg_dims
))
for
bbox_pred
in
bbox_preds
]
flatten_dir_cls_preds
=
[
dir_cls_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
2
)
for
dir_cls_pred
in
dir_cls_preds
]
flatten_centerness
=
[
centerness
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
)
for
centerness
in
centernesses
]
flatten_bbox_preds
=
torch
.
cat
(
flatten_bbox_preds
)
flatten_dir_cls_preds
=
torch
.
cat
(
flatten_dir_cls_preds
)
flatten_centerness
=
torch
.
cat
(
flatten_centerness
)
pos_bbox_preds
=
flatten_bbox_preds
[
pos_inds
]
pos_dir_cls_preds
=
flatten_dir_cls_preds
[
pos_inds
]
pos_centerness
=
flatten_centerness
[
pos_inds
]
pos_depth_cls_preds
=
None
if
self
.
use_depth_classifier
:
flatten_depth_cls_preds
=
[
depth_cls_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
num_depth_cls
)
for
depth_cls_pred
in
depth_cls_preds
]
flatten_depth_cls_preds
=
torch
.
cat
(
flatten_depth_cls_preds
)
pos_depth_cls_preds
=
flatten_depth_cls_preds
[
pos_inds
]
pos_weights
=
None
if
self
.
weight_dim
!=
-
1
:
flatten_weights
=
[
weight
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
weight_dim
)
for
weight
in
weights
]
flatten_weights
=
torch
.
cat
(
flatten_weights
)
pos_weights
=
flatten_weights
[
pos_inds
]
pos_attr_preds
=
None
if
self
.
pred_attrs
:
flatten_attr_preds
=
[
attr_pred
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
num_attrs
)
for
attr_pred
in
attr_preds
]
flatten_attr_preds
=
torch
.
cat
(
flatten_attr_preds
)
pos_attr_preds
=
flatten_attr_preds
[
pos_inds
]
return
pos_bbox_preds
,
pos_dir_cls_preds
,
pos_depth_cls_preds
,
\
pos_weights
,
pos_attr_preds
,
pos_centerness
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'dir_cls_preds'
,
'depth_cls_preds'
,
'weights'
,
'attr_preds'
,
'centernesses'
))
def
loss
(
self
,
cls_scores
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute loss of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * 2. (bin = 2)
depth_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * self.num_depth_cls.
weights (list[Tensor]): Location-aware weights for each scale
level, each is a 4D-tensor, the channel number is
num_points * self.weight_dim.
attr_preds (list[Tensor]): Attribute scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_attrs.
centernesses (list[Tensor]): Centerness for each scale level, each
is a 4D-tensor, the channel number is num_points * 1.
gt_bboxes (list[Tensor]): Ground truth bboxes for each image with
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): class indices corresponding to each box
gt_bboxes_3d (list[Tensor]): 3D boxes ground truth with shape of
(num_gts, code_size).
gt_labels_3d (list[Tensor]): same as gt_labels
centers2d (list[Tensor]): 2D centers on the image with shape of
(num_gts, 2).
depths (list[Tensor]): Depth ground truth with shape of
(num_gts, ).
attr_labels (list[Tensor]): Attributes indices of each box.
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (list[Tensor]): specify which bounding boxes can
be ignored when computing the loss. Defaults to None.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
dir_cls_preds
)
==
\
len
(
depth_cls_preds
)
==
len
(
weights
)
==
len
(
centernesses
)
==
\
len
(
attr_preds
),
'The length of cls_scores, bbox_preds, '
\
'dir_cls_preds, depth_cls_preds, weights, centernesses, and'
\
f
'attr_preds:
{
len
(
cls_scores
)
}
,
{
len
(
bbox_preds
)
}
, '
\
f
'
{
len
(
dir_cls_preds
)
}
,
{
len
(
depth_cls_preds
)
}
,
{
len
(
weights
)
}
'
\
f
'
{
len
(
centernesses
)
}
,
{
len
(
attr_preds
)
}
are inconsistent.'
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
all_level_points
=
self
.
get_points
(
featmap_sizes
,
bbox_preds
[
0
].
dtype
,
bbox_preds
[
0
].
device
)
labels_3d
,
bbox_targets_3d
,
centerness_targets
,
attr_targets
=
\
self
.
get_targets
(
all_level_points
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
)
num_imgs
=
cls_scores
[
0
].
size
(
0
)
# flatten cls_scores and targets
flatten_cls_scores
=
[
cls_score
.
permute
(
0
,
2
,
3
,
1
).
reshape
(
-
1
,
self
.
cls_out_channels
)
for
cls_score
in
cls_scores
]
flatten_cls_scores
=
torch
.
cat
(
flatten_cls_scores
)
flatten_labels_3d
=
torch
.
cat
(
labels_3d
)
flatten_bbox_targets_3d
=
torch
.
cat
(
bbox_targets_3d
)
flatten_centerness_targets
=
torch
.
cat
(
centerness_targets
)
flatten_points
=
torch
.
cat
(
[
points
.
repeat
(
num_imgs
,
1
)
for
points
in
all_level_points
])
if
self
.
pred_attrs
:
flatten_attr_targets
=
torch
.
cat
(
attr_targets
)
# FG cat_id: [0, num_classes -1], BG cat_id: num_classes
bg_class_ind
=
self
.
num_classes
pos_inds
=
((
flatten_labels_3d
>=
0
)
&
(
flatten_labels_3d
<
bg_class_ind
)).
nonzero
().
reshape
(
-
1
)
num_pos
=
len
(
pos_inds
)
loss_dict
=
dict
()
loss_dict
[
'loss_cls'
]
=
self
.
loss_cls
(
flatten_cls_scores
,
flatten_labels_3d
,
avg_factor
=
num_pos
+
num_imgs
)
# avoid num_pos is 0
pos_bbox_preds
,
pos_dir_cls_preds
,
pos_depth_cls_preds
,
pos_weights
,
\
pos_attr_preds
,
pos_centerness
=
self
.
get_pos_predictions
(
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
pos_inds
,
img_metas
)
if
num_pos
>
0
:
pos_bbox_targets_3d
=
flatten_bbox_targets_3d
[
pos_inds
]
pos_centerness_targets
=
flatten_centerness_targets
[
pos_inds
]
pos_points
=
flatten_points
[
pos_inds
]
if
self
.
pred_attrs
:
pos_attr_targets
=
flatten_attr_targets
[
pos_inds
]
if
self
.
use_direction_classifier
:
pos_dir_cls_targets
=
self
.
get_direction_target
(
pos_bbox_targets_3d
,
self
.
dir_offset
,
one_hot
=
False
)
bbox_weights
=
pos_centerness_targets
.
new_ones
(
len
(
pos_centerness_targets
),
sum
(
self
.
group_reg_dims
))
equal_weights
=
pos_centerness_targets
.
new_ones
(
pos_centerness_targets
.
shape
)
code_weight
=
self
.
train_cfg
.
get
(
'code_weight'
,
None
)
if
code_weight
:
assert
len
(
code_weight
)
==
sum
(
self
.
group_reg_dims
)
bbox_weights
=
bbox_weights
*
bbox_weights
.
new_tensor
(
code_weight
)
if
self
.
diff_rad_by_sin
:
pos_bbox_preds
,
pos_bbox_targets_3d
=
self
.
add_sin_difference
(
pos_bbox_preds
,
pos_bbox_targets_3d
)
loss_dict
[
'loss_offset'
]
=
self
.
loss_bbox
(
pos_bbox_preds
[:,
:
2
],
pos_bbox_targets_3d
[:,
:
2
],
weight
=
bbox_weights
[:,
:
2
],
avg_factor
=
equal_weights
.
sum
())
loss_dict
[
'loss_size'
]
=
self
.
loss_bbox
(
pos_bbox_preds
[:,
3
:
6
],
pos_bbox_targets_3d
[:,
3
:
6
],
weight
=
bbox_weights
[:,
3
:
6
],
avg_factor
=
equal_weights
.
sum
())
loss_dict
[
'loss_rotsin'
]
=
self
.
loss_bbox
(
pos_bbox_preds
[:,
6
],
pos_bbox_targets_3d
[:,
6
],
weight
=
bbox_weights
[:,
6
],
avg_factor
=
equal_weights
.
sum
())
if
self
.
pred_velo
:
loss_dict
[
'loss_velo'
]
=
self
.
loss_bbox
(
pos_bbox_preds
[:,
7
:
9
],
pos_bbox_targets_3d
[:,
7
:
9
],
weight
=
bbox_weights
[:,
7
:
9
],
avg_factor
=
equal_weights
.
sum
())
proj_bbox2d_inputs
=
(
bbox_preds
,
pos_dir_cls_preds
,
labels_3d
,
bbox_targets_3d
,
pos_points
,
pos_inds
,
img_metas
)
# direction classification loss
# TODO: add more check for use_direction_classifier
if
self
.
use_direction_classifier
:
loss_dict
[
'loss_dir'
]
=
self
.
loss_dir
(
pos_dir_cls_preds
,
pos_dir_cls_targets
,
equal_weights
,
avg_factor
=
equal_weights
.
sum
())
# init depth loss with the one computed from direct regression
loss_dict
[
'loss_depth'
]
=
self
.
loss_bbox
(
pos_bbox_preds
[:,
2
],
pos_bbox_targets_3d
[:,
2
],
weight
=
bbox_weights
[:,
2
],
avg_factor
=
equal_weights
.
sum
())
# depth classification loss
if
self
.
use_depth_classifier
:
pos_prob_depth_preds
=
self
.
bbox_coder
.
decode_prob_depth
(
pos_depth_cls_preds
,
self
.
depth_range
,
self
.
depth_unit
,
self
.
division
,
self
.
num_depth_cls
)
sig_alpha
=
torch
.
sigmoid
(
self
.
fuse_lambda
)
if
self
.
weight_dim
!=
-
1
:
loss_fuse_depth
=
self
.
loss_depth
(
sig_alpha
*
pos_bbox_preds
[:,
2
]
+
(
1
-
sig_alpha
)
*
pos_prob_depth_preds
,
pos_bbox_targets_3d
[:,
2
],
sigma
=
pos_weights
[:,
0
],
weight
=
bbox_weights
[:,
2
],
avg_factor
=
equal_weights
.
sum
())
else
:
loss_fuse_depth
=
self
.
loss_depth
(
sig_alpha
*
pos_bbox_preds
[:,
2
]
+
(
1
-
sig_alpha
)
*
pos_prob_depth_preds
,
pos_bbox_targets_3d
[:,
2
],
weight
=
bbox_weights
[:,
2
],
avg_factor
=
equal_weights
.
sum
())
loss_dict
[
'loss_depth'
]
=
loss_fuse_depth
proj_bbox2d_inputs
+=
(
pos_depth_cls_preds
,
)
if
self
.
pred_keypoints
:
# use smoothL1 to compute consistency loss for keypoints
# normalize the offsets with strides
proj_bbox2d_preds
,
pos_decoded_bbox2d_preds
,
kpts_targets
=
\
self
.
get_proj_bbox2d
(
*
proj_bbox2d_inputs
,
with_kpts
=
True
)
loss_dict
[
'loss_kpts'
]
=
self
.
loss_bbox
(
pos_bbox_preds
[:,
self
.
kpts_start
:
self
.
kpts_start
+
16
],
kpts_targets
,
weight
=
bbox_weights
[:,
self
.
kpts_start
:
self
.
kpts_start
+
16
],
avg_factor
=
equal_weights
.
sum
())
if
self
.
pred_bbox2d
:
loss_dict
[
'loss_bbox2d'
]
=
self
.
loss_bbox2d
(
pos_bbox_preds
[:,
-
4
:],
pos_bbox_targets_3d
[:,
-
4
:],
weight
=
bbox_weights
[:,
-
4
:],
avg_factor
=
equal_weights
.
sum
())
if
not
self
.
pred_keypoints
:
proj_bbox2d_preds
,
pos_decoded_bbox2d_preds
=
\
self
.
get_proj_bbox2d
(
*
proj_bbox2d_inputs
)
loss_dict
[
'loss_consistency'
]
=
self
.
loss_consistency
(
proj_bbox2d_preds
,
pos_decoded_bbox2d_preds
,
weight
=
bbox_weights
[:,
-
4
:],
avg_factor
=
equal_weights
.
sum
())
loss_dict
[
'loss_centerness'
]
=
self
.
loss_centerness
(
pos_centerness
,
pos_centerness_targets
)
# attribute classification loss
if
self
.
pred_attrs
:
loss_dict
[
'loss_attr'
]
=
self
.
loss_attr
(
pos_attr_preds
,
pos_attr_targets
,
pos_centerness_targets
,
avg_factor
=
pos_centerness_targets
.
sum
())
else
:
# need absolute due to possible negative delta x/y
loss_dict
[
'loss_offset'
]
=
pos_bbox_preds
[:,
:
2
].
sum
()
loss_dict
[
'loss_size'
]
=
pos_bbox_preds
[:,
3
:
6
].
sum
()
loss_dict
[
'loss_rotsin'
]
=
pos_bbox_preds
[:,
6
].
sum
()
loss_dict
[
'loss_depth'
]
=
pos_bbox_preds
[:,
2
].
sum
()
if
self
.
pred_velo
:
loss_dict
[
'loss_velo'
]
=
pos_bbox_preds
[:,
7
:
9
].
sum
()
if
self
.
pred_keypoints
:
loss_dict
[
'loss_kpts'
]
=
pos_bbox_preds
[:,
self
.
kpts_start
:
self
.
kpts_start
+
16
].
sum
()
if
self
.
pred_bbox2d
:
loss_dict
[
'loss_bbox2d'
]
=
pos_bbox_preds
[:,
-
4
:].
sum
()
loss_dict
[
'loss_consistency'
]
=
pos_bbox_preds
[:,
-
4
:].
sum
()
loss_dict
[
'loss_centerness'
]
=
pos_centerness
.
sum
()
if
self
.
use_direction_classifier
:
loss_dict
[
'loss_dir'
]
=
pos_dir_cls_preds
.
sum
()
if
self
.
use_depth_classifier
:
sig_alpha
=
torch
.
sigmoid
(
self
.
fuse_lambda
)
loss_fuse_depth
=
\
sig_alpha
*
pos_bbox_preds
[:,
2
].
sum
()
+
\
(
1
-
sig_alpha
)
*
pos_depth_cls_preds
.
sum
()
if
self
.
weight_dim
!=
-
1
:
loss_fuse_depth
*=
torch
.
exp
(
-
pos_weights
[:,
0
].
sum
())
loss_dict
[
'loss_depth'
]
=
loss_fuse_depth
if
self
.
pred_attrs
:
loss_dict
[
'loss_attr'
]
=
pos_attr_preds
.
sum
()
return
loss_dict
@
force_fp32
(
apply_to
=
(
'cls_scores'
,
'bbox_preds'
,
'dir_cls_preds'
,
'depth_cls_preds'
,
'weights'
,
'attr_preds'
,
'centernesses'
))
def
get_bboxes
(
self
,
cls_scores
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
img_metas
,
cfg
=
None
,
rescale
=
None
):
"""Transform network output for a batch into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level
Has shape (N, num_points * num_classes, H, W)
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level with shape (N, num_points * 4, H, W)
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * 2. (bin = 2)
depth_cls_preds (list[Tensor]): Box scores for direction class
predictions on each scale level, each is a 4D-tensor,
the channel number is num_points * self.num_depth_cls.
weights (list[Tensor]): Location-aware weights for each scale
level, each is a 4D-tensor, the channel number is
num_points * self.weight_dim.
attr_preds (list[Tensor]): Attribute scores for each scale level
Has shape (N, num_points * num_attrs, H, W)
centernesses (list[Tensor]): Centerness for each scale level with
shape (N, num_points * 1, H, W)
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
cfg (mmcv.Config, optional): Test / postprocessing configuration,
if None, test_cfg would be used. Defaults to None.
rescale (bool, optional): If True, return boxes in original image
space. Defaults to None.
Returns:
list[tuple[Tensor]]: Each item in result_list is a tuple, which
consists of predicted 3D boxes, scores, labels, attributes and
2D boxes (if necessary).
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
dir_cls_preds
)
==
\
len
(
depth_cls_preds
)
==
len
(
weights
)
==
len
(
centernesses
)
==
\
len
(
attr_preds
),
'The length of cls_scores, bbox_preds, '
\
'dir_cls_preds, depth_cls_preds, weights, centernesses, and'
\
f
'attr_preds:
{
len
(
cls_scores
)
}
,
{
len
(
bbox_preds
)
}
, '
\
f
'
{
len
(
dir_cls_preds
)
}
,
{
len
(
depth_cls_preds
)
}
,
{
len
(
weights
)
}
'
\
f
'
{
len
(
centernesses
)
}
,
{
len
(
attr_preds
)
}
are inconsistent.'
num_levels
=
len
(
cls_scores
)
featmap_sizes
=
[
featmap
.
size
()[
-
2
:]
for
featmap
in
cls_scores
]
mlvl_points
=
self
.
get_points
(
featmap_sizes
,
bbox_preds
[
0
].
dtype
,
bbox_preds
[
0
].
device
)
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
cls_score_list
=
[
cls_scores
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
bbox_pred_list
=
[
bbox_preds
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
if
self
.
use_direction_classifier
:
dir_cls_pred_list
=
[
dir_cls_preds
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
else
:
dir_cls_pred_list
=
[
cls_scores
[
i
][
img_id
].
new_full
(
[
2
,
*
cls_scores
[
i
][
img_id
].
shape
[
1
:]],
0
).
detach
()
for
i
in
range
(
num_levels
)
]
if
self
.
use_depth_classifier
:
depth_cls_pred_list
=
[
depth_cls_preds
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
else
:
depth_cls_pred_list
=
[
cls_scores
[
i
][
img_id
].
new_full
(
[
self
.
num_depth_cls
,
*
cls_scores
[
i
][
img_id
].
shape
[
1
:]],
0
).
detach
()
for
i
in
range
(
num_levels
)
]
if
self
.
weight_dim
!=
-
1
:
weight_list
=
[
weights
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
else
:
weight_list
=
[
cls_scores
[
i
][
img_id
].
new_full
(
[
1
,
*
cls_scores
[
i
][
img_id
].
shape
[
1
:]],
0
).
detach
()
for
i
in
range
(
num_levels
)
]
if
self
.
pred_attrs
:
attr_pred_list
=
[
attr_preds
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
else
:
attr_pred_list
=
[
cls_scores
[
i
][
img_id
].
new_full
(
[
self
.
num_attrs
,
*
cls_scores
[
i
][
img_id
].
shape
[
1
:]],
self
.
attr_background_label
).
detach
()
for
i
in
range
(
num_levels
)
]
centerness_pred_list
=
[
centernesses
[
i
][
img_id
].
detach
()
for
i
in
range
(
num_levels
)
]
input_meta
=
img_metas
[
img_id
]
det_bboxes
=
self
.
_get_bboxes_single
(
cls_score_list
,
bbox_pred_list
,
dir_cls_pred_list
,
depth_cls_pred_list
,
weight_list
,
attr_pred_list
,
centerness_pred_list
,
mlvl_points
,
input_meta
,
cfg
,
rescale
)
result_list
.
append
(
det_bboxes
)
return
result_list
def
_get_bboxes_single
(
self
,
cls_scores
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
mlvl_points
,
input_meta
,
cfg
,
rescale
=
False
):
"""Transform outputs for a single batch item into bbox predictions.
Args:
cls_scores (list[Tensor]): Box scores for a single scale level
Has shape (num_points * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for a single scale
level with shape (num_points * bbox_code_size, H, W).
dir_cls_preds (list[Tensor]): Box scores for direction class
predictions on a single scale level with shape
(num_points * 2, H, W)
depth_cls_preds (list[Tensor]): Box scores for probabilistic depth
predictions on a single scale level with shape
(num_points * self.num_depth_cls, H, W)
weights (list[Tensor]): Location-aware weight maps on a single
scale level with shape (num_points * self.weight_dim, H, W).
attr_preds (list[Tensor]): Attribute scores for each scale level
Has shape (N, num_points * num_attrs, H, W)
centernesses (list[Tensor]): Centerness for a single scale level
with shape (num_points, H, W).
mlvl_points (list[Tensor]): Box reference for a single scale level
with shape (num_total_points, 2).
input_meta (dict): Metadata of input image.
cfg (mmcv.Config): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool, optional): If True, return boxes in original image
space. Defaults to False.
Returns:
tuples[Tensor]: Predicted 3D boxes, scores, labels, attributes and
2D boxes (if necessary).
"""
view
=
np
.
array
(
input_meta
[
'cam2img'
])
scale_factor
=
input_meta
[
'scale_factor'
]
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
len
(
mlvl_points
)
mlvl_centers2d
=
[]
mlvl_bboxes
=
[]
mlvl_scores
=
[]
mlvl_dir_scores
=
[]
mlvl_attr_scores
=
[]
mlvl_centerness
=
[]
mlvl_depth_cls_scores
=
[]
mlvl_depth_uncertainty
=
[]
mlvl_bboxes2d
=
None
if
self
.
pred_bbox2d
:
mlvl_bboxes2d
=
[]
for
cls_score
,
bbox_pred
,
dir_cls_pred
,
depth_cls_pred
,
weight
,
\
attr_pred
,
centerness
,
points
in
zip
(
cls_scores
,
bbox_preds
,
dir_cls_preds
,
depth_cls_preds
,
weights
,
attr_preds
,
centernesses
,
mlvl_points
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
scores
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
cls_out_channels
).
sigmoid
()
dir_cls_pred
=
dir_cls_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
2
)
dir_cls_score
=
torch
.
max
(
dir_cls_pred
,
dim
=-
1
)[
1
]
depth_cls_pred
=
depth_cls_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
num_depth_cls
)
depth_cls_score
=
F
.
softmax
(
depth_cls_pred
,
dim
=-
1
).
topk
(
k
=
2
,
dim
=-
1
)[
0
].
mean
(
dim
=-
1
)
if
self
.
weight_dim
!=
-
1
:
weight
=
weight
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
weight_dim
)
else
:
weight
=
weight
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
1
)
depth_uncertainty
=
torch
.
exp
(
-
weight
[:,
-
1
])
attr_pred
=
attr_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
num_attrs
)
attr_score
=
torch
.
max
(
attr_pred
,
dim
=-
1
)[
1
]
centerness
=
centerness
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
).
sigmoid
()
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
sum
(
self
.
group_reg_dims
))
bbox_pred3d
=
bbox_pred
[:,
:
self
.
bbox_coder
.
bbox_code_size
]
if
self
.
pred_bbox2d
:
bbox_pred2d
=
bbox_pred
[:,
-
4
:]
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
merged_scores
=
scores
*
centerness
[:,
None
]
if
self
.
use_depth_classifier
:
merged_scores
*=
depth_cls_score
[:,
None
]
if
self
.
weight_dim
!=
-
1
:
merged_scores
*=
depth_uncertainty
[:,
None
]
max_scores
,
_
=
merged_scores
.
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
points
=
points
[
topk_inds
,
:]
bbox_pred3d
=
bbox_pred3d
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
dir_cls_pred
=
dir_cls_pred
[
topk_inds
,
:]
depth_cls_pred
=
depth_cls_pred
[
topk_inds
,
:]
centerness
=
centerness
[
topk_inds
]
dir_cls_score
=
dir_cls_score
[
topk_inds
]
depth_cls_score
=
depth_cls_score
[
topk_inds
]
depth_uncertainty
=
depth_uncertainty
[
topk_inds
]
attr_score
=
attr_score
[
topk_inds
]
if
self
.
pred_bbox2d
:
bbox_pred2d
=
bbox_pred2d
[
topk_inds
,
:]
# change the offset to actual center predictions
bbox_pred3d
[:,
:
2
]
=
points
-
bbox_pred3d
[:,
:
2
]
if
rescale
:
bbox_pred3d
[:,
:
2
]
/=
bbox_pred3d
[:,
:
2
].
new_tensor
(
scale_factor
)
if
self
.
pred_bbox2d
:
bbox_pred2d
/=
bbox_pred2d
.
new_tensor
(
scale_factor
)
if
self
.
use_depth_classifier
:
prob_depth_pred
=
self
.
bbox_coder
.
decode_prob_depth
(
depth_cls_pred
,
self
.
depth_range
,
self
.
depth_unit
,
self
.
division
,
self
.
num_depth_cls
)
sig_alpha
=
torch
.
sigmoid
(
self
.
fuse_lambda
)
bbox_pred3d
[:,
2
]
=
sig_alpha
*
bbox_pred3d
[:,
2
]
+
\
(
1
-
sig_alpha
)
*
prob_depth_pred
pred_center2d
=
bbox_pred3d
[:,
:
3
].
clone
()
bbox_pred3d
[:,
:
3
]
=
points_img2cam
(
bbox_pred3d
[:,
:
3
],
view
)
mlvl_centers2d
.
append
(
pred_center2d
)
mlvl_bboxes
.
append
(
bbox_pred3d
)
mlvl_scores
.
append
(
scores
)
mlvl_dir_scores
.
append
(
dir_cls_score
)
mlvl_depth_cls_scores
.
append
(
depth_cls_score
)
mlvl_attr_scores
.
append
(
attr_score
)
mlvl_centerness
.
append
(
centerness
)
mlvl_depth_uncertainty
.
append
(
depth_uncertainty
)
if
self
.
pred_bbox2d
:
bbox_pred2d
=
distance2bbox
(
points
,
bbox_pred2d
,
max_shape
=
input_meta
[
'img_shape'
])
mlvl_bboxes2d
.
append
(
bbox_pred2d
)
mlvl_centers2d
=
torch
.
cat
(
mlvl_centers2d
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
mlvl_dir_scores
=
torch
.
cat
(
mlvl_dir_scores
)
if
self
.
pred_bbox2d
:
mlvl_bboxes2d
=
torch
.
cat
(
mlvl_bboxes2d
)
# change local yaw to global yaw for 3D nms
cam2img
=
torch
.
eye
(
4
,
dtype
=
mlvl_centers2d
.
dtype
,
device
=
mlvl_centers2d
.
device
)
cam2img
[:
view
.
shape
[
0
],
:
view
.
shape
[
1
]]
=
\
mlvl_centers2d
.
new_tensor
(
view
)
mlvl_bboxes
=
self
.
bbox_coder
.
decode_yaw
(
mlvl_bboxes
,
mlvl_centers2d
,
mlvl_dir_scores
,
self
.
dir_offset
,
cam2img
)
mlvl_bboxes_for_nms
=
xywhr2xyxyr
(
input_meta
[
'box_type_3d'
](
mlvl_bboxes
,
box_dim
=
self
.
bbox_coder
.
bbox_code_size
,
origin
=
(
0.5
,
0.5
,
0.5
)).
bev
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
# remind that we set FG labels to [0, num_class-1] since mmdet v2.0
# BG cat_id: num_class
mlvl_scores
=
torch
.
cat
([
mlvl_scores
,
padding
],
dim
=
1
)
mlvl_attr_scores
=
torch
.
cat
(
mlvl_attr_scores
)
mlvl_centerness
=
torch
.
cat
(
mlvl_centerness
)
# no scale_factors in box3d_multiclass_nms
# Then we multiply it from outside
mlvl_nms_scores
=
mlvl_scores
*
mlvl_centerness
[:,
None
]
if
self
.
use_depth_classifier
:
# multiply the depth confidence
mlvl_depth_cls_scores
=
torch
.
cat
(
mlvl_depth_cls_scores
)
mlvl_nms_scores
*=
mlvl_depth_cls_scores
[:,
None
]
if
self
.
weight_dim
!=
-
1
:
mlvl_depth_uncertainty
=
torch
.
cat
(
mlvl_depth_uncertainty
)
mlvl_nms_scores
*=
mlvl_depth_uncertainty
[:,
None
]
results
=
box3d_multiclass_nms
(
mlvl_bboxes
,
mlvl_bboxes_for_nms
,
mlvl_nms_scores
,
cfg
.
score_thr
,
cfg
.
max_per_img
,
cfg
,
mlvl_dir_scores
,
mlvl_attr_scores
,
mlvl_bboxes2d
)
bboxes
,
scores
,
labels
,
dir_scores
,
attrs
=
results
[
0
:
5
]
attrs
=
attrs
.
to
(
labels
.
dtype
)
# change data type to int
bboxes
=
input_meta
[
'box_type_3d'
](
bboxes
,
box_dim
=
self
.
bbox_coder
.
bbox_code_size
,
origin
=
(
0.5
,
0.5
,
0.5
))
# Note that the predictions use origin (0.5, 0.5, 0.5)
# Due to the ground truth centers2d are the gravity center of objects
# v0.10.0 fix inplace operation to the input tensor of cam_box3d
# So here we also need to add origin=(0.5, 0.5, 0.5)
if
not
self
.
pred_attrs
:
attrs
=
None
outputs
=
(
bboxes
,
scores
,
labels
,
attrs
)
if
self
.
pred_bbox2d
:
bboxes2d
=
results
[
-
1
]
bboxes2d
=
torch
.
cat
([
bboxes2d
,
scores
[:,
None
]],
dim
=
1
)
outputs
=
outputs
+
(
bboxes2d
,
)
return
outputs
def
get_targets
(
self
,
points
,
gt_bboxes_list
,
gt_labels_list
,
gt_bboxes_3d_list
,
gt_labels_3d_list
,
centers2d_list
,
depths_list
,
attr_labels_list
):
"""Compute regression, classification and centerss targets for points
in multiple images.
Args:
points (list[Tensor]): Points of each fpn level, each has shape
(num_points, 2).
gt_bboxes_list (list[Tensor]): Ground truth bboxes of each image,
each has shape (num_gt, 4).
gt_labels_list (list[Tensor]): Ground truth labels of each box,
each has shape (num_gt,).
gt_bboxes_3d_list (list[Tensor]): 3D Ground truth bboxes of each
image, each has shape (num_gt, bbox_code_size).
gt_labels_3d_list (list[Tensor]): 3D Ground truth labels of each
box, each has shape (num_gt,).
centers2d_list (list[Tensor]): Projected 3D centers onto 2D image,
each has shape (num_gt, 2).
depths_list (list[Tensor]): Depth of projected 3D centers onto 2D
image, each has shape (num_gt, 1).
attr_labels_list (list[Tensor]): Attribute labels of each box,
each has shape (num_gt,).
Returns:
tuple:
concat_lvl_labels (list[Tensor]): Labels of each level.
\
concat_lvl_bbox_targets (list[Tensor]): BBox targets of each
\
level.
"""
assert
len
(
points
)
==
len
(
self
.
regress_ranges
)
num_levels
=
len
(
points
)
# expand regress ranges to align with points
expanded_regress_ranges
=
[
points
[
i
].
new_tensor
(
self
.
regress_ranges
[
i
])[
None
].
expand_as
(
points
[
i
])
for
i
in
range
(
num_levels
)
]
# concat all levels points and regress ranges
concat_regress_ranges
=
torch
.
cat
(
expanded_regress_ranges
,
dim
=
0
)
concat_points
=
torch
.
cat
(
points
,
dim
=
0
)
# the number of points per img, per lvl
num_points
=
[
center
.
size
(
0
)
for
center
in
points
]
if
attr_labels_list
is
None
:
attr_labels_list
=
[
gt_labels
.
new_full
(
gt_labels
.
shape
,
self
.
attr_background_label
)
for
gt_labels
in
gt_labels_list
]
# get labels and bbox_targets of each image
_
,
bbox_targets_list
,
labels_3d_list
,
bbox_targets_3d_list
,
\
centerness_targets_list
,
attr_targets_list
=
multi_apply
(
self
.
_get_target_single
,
gt_bboxes_list
,
gt_labels_list
,
gt_bboxes_3d_list
,
gt_labels_3d_list
,
centers2d_list
,
depths_list
,
attr_labels_list
,
points
=
concat_points
,
regress_ranges
=
concat_regress_ranges
,
num_points_per_lvl
=
num_points
)
# split to per img, per level
bbox_targets_list
=
[
bbox_targets
.
split
(
num_points
,
0
)
for
bbox_targets
in
bbox_targets_list
]
labels_3d_list
=
[
labels_3d
.
split
(
num_points
,
0
)
for
labels_3d
in
labels_3d_list
]
bbox_targets_3d_list
=
[
bbox_targets_3d
.
split
(
num_points
,
0
)
for
bbox_targets_3d
in
bbox_targets_3d_list
]
centerness_targets_list
=
[
centerness_targets
.
split
(
num_points
,
0
)
for
centerness_targets
in
centerness_targets_list
]
attr_targets_list
=
[
attr_targets
.
split
(
num_points
,
0
)
for
attr_targets
in
attr_targets_list
]
# concat per level image
concat_lvl_labels_3d
=
[]
concat_lvl_bbox_targets_3d
=
[]
concat_lvl_centerness_targets
=
[]
concat_lvl_attr_targets
=
[]
for
i
in
range
(
num_levels
):
concat_lvl_labels_3d
.
append
(
torch
.
cat
([
labels
[
i
]
for
labels
in
labels_3d_list
]))
concat_lvl_centerness_targets
.
append
(
torch
.
cat
([
centerness_targets
[
i
]
for
centerness_targets
in
centerness_targets_list
]))
bbox_targets_3d
=
torch
.
cat
([
bbox_targets_3d
[
i
]
for
bbox_targets_3d
in
bbox_targets_3d_list
])
if
self
.
pred_bbox2d
:
bbox_targets
=
torch
.
cat
(
[
bbox_targets
[
i
]
for
bbox_targets
in
bbox_targets_list
])
bbox_targets_3d
=
torch
.
cat
([
bbox_targets_3d
,
bbox_targets
],
dim
=
1
)
concat_lvl_attr_targets
.
append
(
torch
.
cat
(
[
attr_targets
[
i
]
for
attr_targets
in
attr_targets_list
]))
if
self
.
norm_on_bbox
:
bbox_targets_3d
[:,
:
2
]
=
\
bbox_targets_3d
[:,
:
2
]
/
self
.
strides
[
i
]
if
self
.
pred_bbox2d
:
bbox_targets_3d
[:,
-
4
:]
=
\
bbox_targets_3d
[:,
-
4
:]
/
self
.
strides
[
i
]
concat_lvl_bbox_targets_3d
.
append
(
bbox_targets_3d
)
return
concat_lvl_labels_3d
,
concat_lvl_bbox_targets_3d
,
\
concat_lvl_centerness_targets
,
concat_lvl_attr_targets
mmdet3d/models/dense_heads/point_rpn_head.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
mmcv.runner
import
BaseModule
,
force_fp32
from
torch
import
nn
as
nn
from
mmdet3d.core.bbox.structures
import
(
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
)
from
mmdet3d.ops.iou3d.iou3d_utils
import
nms_gpu
,
nms_normal_gpu
from
mmdet.core
import
build_bbox_coder
,
multi_apply
from
mmdet.models
import
HEADS
,
build_loss
@
HEADS
.
register_module
()
class
PointRPNHead
(
BaseModule
):
"""RPN module for PointRCNN.
Args:
num_classes (int): Number of classes.
train_cfg (dict): Train configs.
test_cfg (dict): Test configs.
pred_layer_cfg (dict, optional): Config of classfication and
regression prediction layers. Defaults to None.
enlarge_width (float, optional): Enlarge bbox for each side to ignore
close points. Defaults to 0.1.
cls_loss (dict, optional): Config of direction classification loss.
Defaults to None.
bbox_loss (dict, optional): Config of localization loss.
Defaults to None.
bbox_coder (dict, optional): Config dict of box coders.
Defaults to None.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def
__init__
(
self
,
num_classes
,
train_cfg
,
test_cfg
,
pred_layer_cfg
=
None
,
enlarge_width
=
0.1
,
cls_loss
=
None
,
bbox_loss
=
None
,
bbox_coder
=
None
,
init_cfg
=
None
):
super
().
__init__
(
init_cfg
=
init_cfg
)
self
.
num_classes
=
num_classes
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
self
.
enlarge_width
=
enlarge_width
# build loss function
self
.
bbox_loss
=
build_loss
(
bbox_loss
)
self
.
cls_loss
=
build_loss
(
cls_loss
)
# build box coder
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
# build pred conv
self
.
cls_layers
=
self
.
_make_fc_layers
(
fc_cfg
=
pred_layer_cfg
.
cls_linear_channels
,
input_channels
=
pred_layer_cfg
.
in_channels
,
output_channels
=
self
.
_get_cls_out_channels
())
self
.
reg_layers
=
self
.
_make_fc_layers
(
fc_cfg
=
pred_layer_cfg
.
reg_linear_channels
,
input_channels
=
pred_layer_cfg
.
in_channels
,
output_channels
=
self
.
_get_reg_out_channels
())
def
_make_fc_layers
(
self
,
fc_cfg
,
input_channels
,
output_channels
):
"""Make fully connect layers.
Args:
fc_cfg (dict): Config of fully connect.
input_channels (int): Input channels for fc_layers.
output_channels (int): Input channels for fc_layers.
Returns:
nn.Sequential: Fully connect layers.
"""
fc_layers
=
[]
c_in
=
input_channels
for
k
in
range
(
0
,
fc_cfg
.
__len__
()):
fc_layers
.
extend
([
nn
.
Linear
(
c_in
,
fc_cfg
[
k
],
bias
=
False
),
nn
.
BatchNorm1d
(
fc_cfg
[
k
]),
nn
.
ReLU
(),
])
c_in
=
fc_cfg
[
k
]
fc_layers
.
append
(
nn
.
Linear
(
c_in
,
output_channels
,
bias
=
True
))
return
nn
.
Sequential
(
*
fc_layers
)
def
_get_cls_out_channels
(
self
):
"""Return the channel number of classification outputs."""
# Class numbers (k) + objectness (1)
return
self
.
num_classes
def
_get_reg_out_channels
(
self
):
"""Return the channel number of regression outputs."""
# Bbox classification and regression
# (center residual (3), size regression (3)
# torch.cos(yaw) (1), torch.sin(yaw) (1)
return
self
.
bbox_coder
.
code_size
def
forward
(
self
,
feat_dict
):
"""Forward pass.
Args:
feat_dict (dict): Feature dict from backbone.
Returns:
tuple[list[torch.Tensor]]: Predicted boxes and classification
scores.
"""
point_features
=
feat_dict
[
'fp_features'
]
point_features
=
point_features
.
permute
(
0
,
2
,
1
).
contiguous
()
batch_size
=
point_features
.
shape
[
0
]
feat_cls
=
point_features
.
view
(
-
1
,
point_features
.
shape
[
-
1
])
feat_reg
=
point_features
.
view
(
-
1
,
point_features
.
shape
[
-
1
])
point_cls_preds
=
self
.
cls_layers
(
feat_cls
).
reshape
(
batch_size
,
-
1
,
self
.
_get_cls_out_channels
())
point_box_preds
=
self
.
reg_layers
(
feat_reg
).
reshape
(
batch_size
,
-
1
,
self
.
_get_reg_out_channels
())
return
(
point_box_preds
,
point_cls_preds
)
@
force_fp32
(
apply_to
=
(
'bbox_preds'
))
def
loss
(
self
,
bbox_preds
,
cls_preds
,
points
,
gt_bboxes_3d
,
gt_labels_3d
,
img_metas
=
None
):
"""Compute loss.
Args:
bbox_preds (dict): Predictions from forward of PointRCNN RPN_Head.
cls_preds (dict): Classification from forward of PointRCNN
RPN_Head.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
img_metas (list[dict], Optional): Contain pcd and img's meta info.
Defaults to None.
Returns:
dict: Losses of PointRCNN RPN module.
"""
targets
=
self
.
get_targets
(
points
,
gt_bboxes_3d
,
gt_labels_3d
)
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
box_loss_weights
,
point_targets
)
=
targets
# bbox loss
bbox_loss
=
self
.
bbox_loss
(
bbox_preds
,
bbox_targets
,
box_loss_weights
.
unsqueeze
(
-
1
))
# calculate semantic loss
semantic_points
=
cls_preds
.
reshape
(
-
1
,
self
.
num_classes
)
semantic_targets
=
mask_targets
semantic_targets
[
negative_mask
]
=
self
.
num_classes
semantic_points_label
=
semantic_targets
# for ignore, but now we do not have ignore label
semantic_loss_weight
=
negative_mask
.
float
()
+
positive_mask
.
float
()
semantic_loss
=
self
.
cls_loss
(
semantic_points
,
semantic_points_label
.
reshape
(
-
1
),
semantic_loss_weight
.
reshape
(
-
1
))
semantic_loss
/=
positive_mask
.
float
().
sum
()
losses
=
dict
(
bbox_loss
=
bbox_loss
,
semantic_loss
=
semantic_loss
)
return
losses
def
get_targets
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Generate targets of PointRCNN RPN head.
Args:
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
Returns:
tuple[torch.Tensor]: Targets of PointRCNN RPN head.
"""
# find empty example
for
index
in
range
(
len
(
gt_labels_3d
)):
if
len
(
gt_labels_3d
[
index
])
==
0
:
fake_box
=
gt_bboxes_3d
[
index
].
tensor
.
new_zeros
(
1
,
gt_bboxes_3d
[
index
].
tensor
.
shape
[
-
1
])
gt_bboxes_3d
[
index
]
=
gt_bboxes_3d
[
index
].
new_box
(
fake_box
)
gt_labels_3d
[
index
]
=
gt_labels_3d
[
index
].
new_zeros
(
1
)
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
point_targets
)
=
multi_apply
(
self
.
get_targets_single
,
points
,
gt_bboxes_3d
,
gt_labels_3d
)
bbox_targets
=
torch
.
stack
(
bbox_targets
)
mask_targets
=
torch
.
stack
(
mask_targets
)
positive_mask
=
torch
.
stack
(
positive_mask
)
negative_mask
=
torch
.
stack
(
negative_mask
)
box_loss_weights
=
positive_mask
/
(
positive_mask
.
sum
()
+
1e-6
)
return
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
box_loss_weights
,
point_targets
)
def
get_targets_single
(
self
,
points
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Generate targets of PointRCNN RPN head for single batch.
Args:
points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
Returns:
tuple[torch.Tensor]: Targets of ssd3d head.
"""
gt_bboxes_3d
=
gt_bboxes_3d
.
to
(
points
.
device
)
valid_gt
=
gt_labels_3d
!=
-
1
gt_bboxes_3d
=
gt_bboxes_3d
[
valid_gt
]
gt_labels_3d
=
gt_labels_3d
[
valid_gt
]
# transform the bbox coordinate to the pointcloud coordinate
gt_bboxes_3d_tensor
=
gt_bboxes_3d
.
tensor
.
clone
()
gt_bboxes_3d_tensor
[...,
2
]
+=
gt_bboxes_3d_tensor
[...,
5
]
/
2
points_mask
,
assignment
=
self
.
_assign_targets_by_points_inside
(
gt_bboxes_3d
,
points
)
gt_bboxes_3d_tensor
=
gt_bboxes_3d_tensor
[
assignment
]
mask_targets
=
gt_labels_3d
[
assignment
]
bbox_targets
=
self
.
bbox_coder
.
encode
(
gt_bboxes_3d_tensor
,
points
[...,
0
:
3
],
mask_targets
)
positive_mask
=
(
points_mask
.
max
(
1
)[
0
]
>
0
)
negative_mask
=
(
points_mask
.
max
(
1
)[
0
]
==
0
)
# add ignore_mask
extend_gt_bboxes_3d
=
gt_bboxes_3d
.
enlarged_box
(
self
.
enlarge_width
)
points_mask
,
_
=
self
.
_assign_targets_by_points_inside
(
extend_gt_bboxes_3d
,
points
)
negative_mask
=
(
points_mask
.
max
(
1
)[
0
]
==
0
)
point_targets
=
points
[...,
0
:
3
]
return
(
bbox_targets
,
mask_targets
,
positive_mask
,
negative_mask
,
point_targets
)
def
get_bboxes
(
self
,
points
,
bbox_preds
,
cls_preds
,
input_metas
,
rescale
=
False
):
"""Generate bboxes from RPN head predictions.
Args:
points (torch.Tensor): Input points.
bbox_preds (dict): Regression predictions from PointRCNN head.
cls_preds (dict): Class scores predictions from PointRCNN head.
input_metas (list[dict]): Point cloud and image's meta info.
rescale (bool, optional): Whether to rescale bboxes.
Defaults to False.
Returns:
list[tuple[torch.Tensor]]: Bounding boxes, scores and labels.
"""
sem_scores
=
cls_preds
.
sigmoid
()
obj_scores
=
sem_scores
.
max
(
-
1
)[
0
]
object_class
=
sem_scores
.
argmax
(
dim
=-
1
)
batch_size
=
sem_scores
.
shape
[
0
]
results
=
list
()
for
b
in
range
(
batch_size
):
bbox3d
=
self
.
bbox_coder
.
decode
(
bbox_preds
[
b
],
points
[
b
,
...,
:
3
],
object_class
[
b
])
bbox_selected
,
score_selected
,
labels
,
cls_preds_selected
=
\
self
.
class_agnostic_nms
(
obj_scores
[
b
],
sem_scores
[
b
],
bbox3d
,
points
[
b
,
...,
:
3
],
input_metas
[
b
])
bbox
=
input_metas
[
b
][
'box_type_3d'
](
bbox_selected
.
clone
(),
box_dim
=
bbox_selected
.
shape
[
-
1
],
with_yaw
=
True
)
results
.
append
((
bbox
,
score_selected
,
labels
,
cls_preds_selected
))
return
results
def
class_agnostic_nms
(
self
,
obj_scores
,
sem_scores
,
bbox
,
points
,
input_meta
):
"""Class agnostic nms.
Args:
obj_scores (torch.Tensor): Objectness score of bounding boxes.
sem_scores (torch.Tensor): Semantic class score of bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes.
Returns:
tuple[torch.Tensor]: Bounding boxes, scores and labels.
"""
nms_cfg
=
self
.
test_cfg
.
nms_cfg
if
not
self
.
training
\
else
self
.
train_cfg
.
nms_cfg
if
nms_cfg
.
use_rotate_nms
:
nms_func
=
nms_gpu
else
:
nms_func
=
nms_normal_gpu
num_bbox
=
bbox
.
shape
[
0
]
bbox
=
input_meta
[
'box_type_3d'
](
bbox
.
clone
(),
box_dim
=
bbox
.
shape
[
-
1
],
with_yaw
=
True
,
origin
=
(
0.5
,
0.5
,
0.5
))
if
isinstance
(
bbox
,
LiDARInstance3DBoxes
):
box_idx
=
bbox
.
points_in_boxes
(
points
)
box_indices
=
box_idx
.
new_zeros
([
num_bbox
+
1
])
box_idx
[
box_idx
==
-
1
]
=
num_bbox
box_indices
.
scatter_add_
(
0
,
box_idx
.
long
(),
box_idx
.
new_ones
(
box_idx
.
shape
))
box_indices
=
box_indices
[:
-
1
]
nonempty_box_mask
=
box_indices
>=
0
elif
isinstance
(
bbox
,
DepthInstance3DBoxes
):
box_indices
=
bbox
.
points_in_boxes
(
points
)
nonempty_box_mask
=
box_indices
.
T
.
sum
(
1
)
>=
0
else
:
raise
NotImplementedError
(
'Unsupported bbox type!'
)
bbox
=
bbox
.
tensor
[
nonempty_box_mask
]
if
self
.
test_cfg
.
score_thr
is
not
None
:
score_thr
=
self
.
test_cfg
.
score_thr
keep
=
(
obj_scores
>=
score_thr
)
obj_scores
=
obj_scores
[
keep
]
sem_scores
=
sem_scores
[
keep
]
bbox
=
bbox
[
keep
]
if
obj_scores
.
shape
[
0
]
>
0
:
topk
=
min
(
nms_cfg
.
nms_pre
,
obj_scores
.
shape
[
0
])
obj_scores_nms
,
indices
=
torch
.
topk
(
obj_scores
,
k
=
topk
)
bbox_for_nms
=
bbox
[
indices
]
sem_scores_nms
=
sem_scores
[
indices
]
keep
=
nms_func
(
bbox_for_nms
[:,
0
:
7
],
obj_scores_nms
,
nms_cfg
.
iou_thr
)
keep
=
keep
[:
nms_cfg
.
nms_post
]
bbox_selected
=
bbox_for_nms
[
keep
]
score_selected
=
obj_scores_nms
[
keep
]
cls_preds
=
sem_scores_nms
[
keep
]
labels
=
torch
.
argmax
(
cls_preds
,
-
1
)
return
bbox_selected
,
score_selected
,
labels
,
cls_preds
def
_assign_targets_by_points_inside
(
self
,
bboxes_3d
,
points
):
"""Compute assignment by checking whether point is inside bbox.
Args:
bboxes_3d (:obj:`BaseInstance3DBoxes`): Instance of bounding boxes.
points (torch.Tensor): Points of a batch.
Returns:
tuple[torch.Tensor]: Flags indicating whether each point is
inside bbox and the index of box where each point are in.
"""
# TODO: align points_in_boxes function in each box_structures
num_bbox
=
bboxes_3d
.
tensor
.
shape
[
0
]
if
isinstance
(
bboxes_3d
,
LiDARInstance3DBoxes
):
assignment
=
bboxes_3d
.
points_in_boxes
(
points
[:,
0
:
3
]).
long
()
points_mask
=
assignment
.
new_zeros
(
[
assignment
.
shape
[
0
],
num_bbox
+
1
])
assignment
[
assignment
==
-
1
]
=
num_bbox
points_mask
.
scatter_
(
1
,
assignment
.
unsqueeze
(
1
),
1
)
points_mask
=
points_mask
[:,
:
-
1
]
assignment
[
assignment
==
num_bbox
]
=
num_bbox
-
1
elif
isinstance
(
bboxes_3d
,
DepthInstance3DBoxes
):
points_mask
=
bboxes_3d
.
points_in_boxes
(
points
)
assignment
=
points_mask
.
argmax
(
dim
=-
1
)
else
:
raise
NotImplementedError
(
'Unsupported bbox type!'
)
return
points_mask
,
assignment
mmdet3d/models/dense_heads/shape_aware_head.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
import
warnings
from
mmcv.cnn
import
ConvModule
from
mmcv.cnn
import
ConvModule
from
mmcv.runner
import
BaseModule
from
mmcv.runner
import
BaseModule
from
torch
import
nn
as
nn
from
torch
import
nn
as
nn
...
@@ -30,15 +31,17 @@ class BaseShapeHead(BaseModule):
...
@@ -30,15 +31,17 @@ class BaseShapeHead(BaseModule):
num_base_anchors (int): Number of anchors per location.
num_base_anchors (int): Number of anchors per location.
box_code_size (int): The dimension of boxes to be encoded.
box_code_size (int): The dimension of boxes to be encoded.
in_channels (int): Input channels for convolutional layers.
in_channels (int): Input channels for convolutional layers.
shared_conv_channels (tuple): Channels for shared
convolutional
\
shared_conv_channels (tuple
, optional
): Channels for shared
layers. Default: (64, 64).
\
convolutional
layers. Default: (64, 64).
shared_conv_strides (tuple): Strides for shared
convolutional
\
shared_conv_strides (tuple
, optional
): Strides for shared
layers. Default: (1, 1).
convolutional
layers. Default: (1, 1).
use_direction_classifier (bool, optional): Whether to use direction
\
use_direction_classifier (bool, optional): Whether to use direction
classifier. Default: True.
classifier. Default: True.
conv_cfg (dict): Config of conv layer. Default: dict(type='Conv2d')
conv_cfg (dict, optional): Config of conv layer.
norm_cfg (dict): Config of norm layer. Default: dict(type='BN2d').
Default: dict(type='Conv2d')
bias (bool|str, optional): Type of bias. Default: False.
norm_cfg (dict, optional): Config of norm layer.
Default: dict(type='BN2d').
bias (bool | str, optional): Type of bias. Default: False.
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
...
@@ -127,11 +130,11 @@ class BaseShapeHead(BaseModule):
...
@@ -127,11 +130,11 @@ class BaseShapeHead(BaseModule):
[B, C, H, W].
[B, C, H, W].
Returns:
Returns:
dict[torch.Tensor]: Contain score of each class, bbox
\
dict[torch.Tensor]: Contain score of each class, bbox
regression and direction classification predictions.
\
regression and direction classification predictions.
Note that all the returned tensors are reshaped as
\
Note that all the returned tensors are reshaped as
[bs*num_base_anchors*H*W, num_cls/box_code_size/dir_bins].
\
[bs*num_base_anchors*H*W, num_cls/box_code_size/dir_bins].
It is more convenient to concat anchors for different
\
It is more convenient to concat anchors for different
classes even though they have different feature map sizes.
classes even though they have different feature map sizes.
"""
"""
x
=
self
.
shared_conv
(
x
)
x
=
self
.
shared_conv
(
x
)
...
@@ -168,9 +171,9 @@ class ShapeAwareHead(Anchor3DHead):
...
@@ -168,9 +171,9 @@ class ShapeAwareHead(Anchor3DHead):
Args:
Args:
tasks (dict): Shape-aware groups of multi-class objects.
tasks (dict): Shape-aware groups of multi-class objects.
assign_per_class (bool, optional): Whether to do assignment for each
\
assign_per_class (bool, optional): Whether to do assignment for each
class. Default: True.
class. Default: True.
kwargs (dict): Other arguments are the same as those in
\
kwargs (dict): Other arguments are the same as those in
:class:`Anchor3DHead`.
:class:`Anchor3DHead`.
"""
"""
...
@@ -217,7 +220,7 @@ class ShapeAwareHead(Anchor3DHead):
...
@@ -217,7 +220,7 @@ class ShapeAwareHead(Anchor3DHead):
Args:
Args:
x (torch.Tensor): Input features.
x (torch.Tensor): Input features.
Returns:
Returns:
tuple[torch.Tensor]: Contain score of each class, bbox
\
tuple[torch.Tensor]: Contain score of each class, bbox
regression and direction classification predictions.
regression and direction classification predictions.
"""
"""
results
=
[]
results
=
[]
...
@@ -263,7 +266,7 @@ class ShapeAwareHead(Anchor3DHead):
...
@@ -263,7 +266,7 @@ class ShapeAwareHead(Anchor3DHead):
num_total_samples (int): The number of valid samples.
num_total_samples (int): The number of valid samples.
Returns:
Returns:
tuple[torch.Tensor]: Losses of class, bbox
\
tuple[torch.Tensor]: Losses of class, bbox
and direction, respectively.
and direction, respectively.
"""
"""
# classification loss
# classification loss
...
@@ -325,16 +328,16 @@ class ShapeAwareHead(Anchor3DHead):
...
@@ -325,16 +328,16 @@ class ShapeAwareHead(Anchor3DHead):
of each sample.
of each sample.
gt_labels (list[torch.Tensor]): Gt labels of each sample.
gt_labels (list[torch.Tensor]): Gt labels of each sample.
input_metas (list[dict]): Contain pcd and img's meta info.
input_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
which bounding.
Returns:
Returns:
dict[str, list[torch.Tensor]]: Classification, bbox, and
\
dict[str, list[torch.Tensor]]: Classification, bbox, and
direction losses of each level.
direction losses of each level.
- loss_cls (list[torch.Tensor]): Classification losses.
- loss_cls (list[torch.Tensor]): Classification losses.
- loss_bbox (list[torch.Tensor]): Box regression losses.
- loss_bbox (list[torch.Tensor]): Box regression losses.
- loss_dir (list[torch.Tensor]): Direction classification
\
- loss_dir (list[torch.Tensor]): Direction classification
losses.
losses.
"""
"""
device
=
cls_scores
[
0
].
device
device
=
cls_scores
[
0
].
device
...
@@ -388,7 +391,7 @@ class ShapeAwareHead(Anchor3DHead):
...
@@ -388,7 +391,7 @@ class ShapeAwareHead(Anchor3DHead):
dir_cls_preds (list[torch.Tensor]): Multi-level direction
dir_cls_preds (list[torch.Tensor]): Multi-level direction
class predictions.
class predictions.
input_metas (list[dict]): Contain pcd and img's meta info.
input_metas (list[dict]): Contain pcd and img's meta info.
cfg (
None |
:obj:`ConfigDict`): Training or testing config.
cfg (:obj:`ConfigDict`
, optional
): Training or testing config.
Default: None.
Default: None.
rescale (list[torch.Tensor], optional): Whether to rescale bbox.
rescale (list[torch.Tensor], optional): Whether to rescale bbox.
Default: False.
Default: False.
...
@@ -443,8 +446,8 @@ class ShapeAwareHead(Anchor3DHead):
...
@@ -443,8 +446,8 @@ class ShapeAwareHead(Anchor3DHead):
mlvl_anchors (List[torch.Tensor]): Multi-level anchors
mlvl_anchors (List[torch.Tensor]): Multi-level anchors
in single batch.
in single batch.
input_meta (list[dict]): Contain pcd and img's meta info.
input_meta (list[dict]): Contain pcd and img's meta info.
cfg (
None |
:obj:`ConfigDict`): Training or testing config.
cfg (:obj:`ConfigDict`): Training or testing config.
rescale (list[torch.Tensor], optional): whether to rescale bbox.
\
rescale (list[torch.Tensor], optional): whether to rescale bbox.
Default: False.
Default: False.
Returns:
Returns:
...
...
mmdet3d/models/dense_heads/smoke_mono3d_head.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
torch.nn
import
functional
as
F
from
mmdet.core
import
multi_apply
from
mmdet.core.bbox.builder
import
build_bbox_coder
from
mmdet.models.builder
import
HEADS
from
mmdet.models.utils
import
gaussian_radius
,
gen_gaussian_target
from
mmdet.models.utils.gaussian_target
import
(
get_local_maximum
,
get_topk_from_heatmap
,
transpose_and_gather_feat
)
from
.anchor_free_mono3d_head
import
AnchorFreeMono3DHead
@
HEADS
.
register_module
()
class
SMOKEMono3DHead
(
AnchorFreeMono3DHead
):
r
"""Anchor-free head used in `SMOKE <https://arxiv.org/abs/2002.10111>`_
.. code-block:: none
/-----> 3*3 conv -----> 1*1 conv -----> cls
feature
\-----> 3*3 conv -----> 1*1 conv -----> reg
Args:
num_classes (int): Number of categories excluding the background
category.
in_channels (int): Number of channels in the input feature map.
dim_channel (list[int]): indices of dimension offset preds in
regression heatmap channels.
ori_channel (list[int]): indices of orientation offset pred in
regression heatmap channels.
bbox_coder (:obj:`CameraInstance3DBoxes`): Bbox coder
for encoding and decoding boxes.
loss_cls (dict, optional): Config of classification loss.
Default: loss_cls=dict(type='GaussionFocalLoss', loss_weight=1.0).
loss_bbox (dict, optional): Config of localization loss.
Default: loss_bbox=dict(type='L1Loss', loss_weight=10.0).
loss_dir (dict, optional): Config of direction classification loss.
In SMOKE, Default: None.
loss_attr (dict, optional): Config of attribute classification loss.
In SMOKE, Default: None.
loss_centerness (dict): Config of centerness loss.
norm_cfg (dict): Dictionary to construct and config norm layer.
Default: norm_cfg=dict(type='GN', num_groups=32, requires_grad=True).
init_cfg (dict): Initialization config dict. Default: None.
"""
# noqa: E501
def
__init__
(
self
,
num_classes
,
in_channels
,
dim_channel
,
ori_channel
,
bbox_coder
,
loss_cls
=
dict
(
type
=
'GaussionFocalLoss'
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.1
),
loss_dir
=
None
,
loss_attr
=
None
,
norm_cfg
=
dict
(
type
=
'GN'
,
num_groups
=
32
,
requires_grad
=
True
),
init_cfg
=
None
,
**
kwargs
):
super
().
__init__
(
num_classes
,
in_channels
,
loss_cls
=
loss_cls
,
loss_bbox
=
loss_bbox
,
loss_dir
=
loss_dir
,
loss_attr
=
loss_attr
,
norm_cfg
=
norm_cfg
,
init_cfg
=
init_cfg
,
**
kwargs
)
self
.
dim_channel
=
dim_channel
self
.
ori_channel
=
ori_channel
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
def
forward
(
self
,
feats
):
"""Forward features from the upstream network.
Args:
feats (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
Returns:
tuple:
cls_scores (list[Tensor]): Box scores for each scale level,
each is a 4D-tensor, the channel number is
num_points * num_classes.
bbox_preds (list[Tensor]): Box energies / deltas for each scale
level, each is a 4D-tensor, the channel number is
num_points * bbox_code_size.
"""
return
multi_apply
(
self
.
forward_single
,
feats
)
def
forward_single
(
self
,
x
):
"""Forward features of a single scale level.
Args:
x (Tensor): Input feature map.
Returns:
tuple: Scores for each class, bbox of input feature maps.
"""
cls_score
,
bbox_pred
,
dir_cls_pred
,
attr_pred
,
cls_feat
,
reg_feat
=
\
super
().
forward_single
(
x
)
cls_score
=
cls_score
.
sigmoid
()
# turn to 0-1
cls_score
=
cls_score
.
clamp
(
min
=
1e-4
,
max
=
1
-
1e-4
)
# (N, C, H, W)
offset_dims
=
bbox_pred
[:,
self
.
dim_channel
,
...]
bbox_pred
[:,
self
.
dim_channel
,
...]
=
offset_dims
.
sigmoid
()
-
0.5
# (N, C, H, W)
vector_ori
=
bbox_pred
[:,
self
.
ori_channel
,
...]
bbox_pred
[:,
self
.
ori_channel
,
...]
=
F
.
normalize
(
vector_ori
)
return
cls_score
,
bbox_pred
def
get_bboxes
(
self
,
cls_scores
,
bbox_preds
,
img_metas
,
rescale
=
None
):
"""Generate bboxes from bbox head predictions.
Args:
cls_scores (list[Tensor]): Box scores for each scale level.
bbox_preds (list[Tensor]): Box regression for each scale.
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
rescale (bool): If True, return boxes in original image space.
Returns:
list[tuple[:obj:`CameraInstance3DBoxes`, Tensor, Tensor, None]]:
Each item in result_list is 4-tuple.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
1
cam2imgs
=
torch
.
stack
([
cls_scores
[
0
].
new_tensor
(
img_meta
[
'cam2img'
])
for
img_meta
in
img_metas
])
trans_mats
=
torch
.
stack
([
cls_scores
[
0
].
new_tensor
(
img_meta
[
'trans_mat'
])
for
img_meta
in
img_metas
])
batch_bboxes
,
batch_scores
,
batch_topk_labels
=
self
.
decode_heatmap
(
cls_scores
[
0
],
bbox_preds
[
0
],
img_metas
,
cam2imgs
=
cam2imgs
,
trans_mats
=
trans_mats
,
topk
=
100
,
kernel
=
3
)
result_list
=
[]
for
img_id
in
range
(
len
(
img_metas
)):
bboxes
=
batch_bboxes
[
img_id
]
scores
=
batch_scores
[
img_id
]
labels
=
batch_topk_labels
[
img_id
]
keep_idx
=
scores
>
0.25
bboxes
=
bboxes
[
keep_idx
]
scores
=
scores
[
keep_idx
]
labels
=
labels
[
keep_idx
]
bboxes
=
img_metas
[
img_id
][
'box_type_3d'
](
bboxes
,
box_dim
=
self
.
bbox_code_size
,
origin
=
(
0.5
,
0.5
,
0.5
))
attrs
=
None
result_list
.
append
((
bboxes
,
scores
,
labels
,
attrs
))
return
result_list
def
decode_heatmap
(
self
,
cls_score
,
reg_pred
,
img_metas
,
cam2imgs
,
trans_mats
,
topk
=
100
,
kernel
=
3
):
"""Transform outputs into detections raw bbox predictions.
Args:
class_score (Tensor): Center predict heatmap,
shape (B, num_classes, H, W).
reg_pred (Tensor): Box regression map.
shape (B, channel, H , W).
img_metas (List[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
cam2imgs (Tensor): Camera intrinsic matrixs.
shape (B, 4, 4)
trans_mats (Tensor): Transformation matrix from original image
to feature map.
shape: (batch, 3, 3)
topk (int): Get top k center keypoints from heatmap. Default 100.
kernel (int): Max pooling kernel for extract local maximum pixels.
Default 3.
Returns:
tuple[torch.Tensor]: Decoded output of SMOKEHead, containing
the following Tensors:
- batch_bboxes (Tensor): Coords of each 3D box.
shape (B, k, 7)
- batch_scores (Tensor): Scores of each 3D box.
shape (B, k)
- batch_topk_labels (Tensor): Categories of each 3D box.
shape (B, k)
"""
img_h
,
img_w
=
img_metas
[
0
][
'pad_shape'
][:
2
]
bs
,
_
,
feat_h
,
feat_w
=
cls_score
.
shape
center_heatmap_pred
=
get_local_maximum
(
cls_score
,
kernel
=
kernel
)
*
batch_dets
,
topk_ys
,
topk_xs
=
get_topk_from_heatmap
(
center_heatmap_pred
,
k
=
topk
)
batch_scores
,
batch_index
,
batch_topk_labels
=
batch_dets
regression
=
transpose_and_gather_feat
(
reg_pred
,
batch_index
)
regression
=
regression
.
view
(
-
1
,
8
)
points
=
torch
.
cat
([
topk_xs
.
view
(
-
1
,
1
),
topk_ys
.
view
(
-
1
,
1
).
float
()],
dim
=
1
)
locations
,
dimensions
,
orientations
=
self
.
bbox_coder
.
decode
(
regression
,
points
,
batch_topk_labels
,
cam2imgs
,
trans_mats
)
batch_bboxes
=
torch
.
cat
((
locations
,
dimensions
,
orientations
),
dim
=
1
)
batch_bboxes
=
batch_bboxes
.
view
(
bs
,
-
1
,
self
.
bbox_code_size
)
return
batch_bboxes
,
batch_scores
,
batch_topk_labels
def
get_predictions
(
self
,
labels3d
,
centers2d
,
gt_locations
,
gt_dimensions
,
gt_orientations
,
indices
,
img_metas
,
pred_reg
):
"""Prepare predictions for computing loss.
Args:
labels3d (Tensor): Labels of each 3D box.
shape (B, max_objs, )
centers2d (Tensor): Coords of each projected 3D box
center on image. shape (B * max_objs, 2)
gt_locations (Tensor): Coords of each 3D box's location.
shape (B * max_objs, 3)
gt_dimensions (Tensor): Dimensions of each 3D box.
shape (N, 3)
gt_orientations (Tensor): Orientation(yaw) of each 3D box.
shape (N, 1)
indices (Tensor): Indices of the existence of the 3D box.
shape (B * max_objs, )
img_metas (list[dict]): Meta information of each image,
e.g., image size, scaling factor, etc.
pre_reg (Tensor): Box regression map.
shape (B, channel, H , W).
Returns:
dict: the dict has components below:
- bbox3d_yaws (:obj:`CameraInstance3DBoxes`):
bbox calculated using pred orientations.
- bbox3d_dims (:obj:`CameraInstance3DBoxes`):
bbox calculated using pred dimensions.
- bbox3d_locs (:obj:`CameraInstance3DBoxes`):
bbox calculated using pred locations.
"""
batch
,
channel
=
pred_reg
.
shape
[
0
],
pred_reg
.
shape
[
1
]
w
=
pred_reg
.
shape
[
3
]
cam2imgs
=
torch
.
stack
([
gt_locations
.
new_tensor
(
img_meta
[
'cam2img'
])
for
img_meta
in
img_metas
])
trans_mats
=
torch
.
stack
([
gt_locations
.
new_tensor
(
img_meta
[
'trans_mat'
])
for
img_meta
in
img_metas
])
centers2d_inds
=
centers2d
[:,
1
]
*
w
+
centers2d
[:,
0
]
centers2d_inds
=
centers2d_inds
.
view
(
batch
,
-
1
)
pred_regression
=
transpose_and_gather_feat
(
pred_reg
,
centers2d_inds
)
pred_regression_pois
=
pred_regression
.
view
(
-
1
,
channel
)
locations
,
dimensions
,
orientations
=
self
.
bbox_coder
.
decode
(
pred_regression_pois
,
centers2d
,
labels3d
,
cam2imgs
,
trans_mats
,
gt_locations
)
locations
,
dimensions
,
orientations
=
locations
[
indices
],
dimensions
[
indices
],
orientations
[
indices
]
locations
[:,
1
]
+=
dimensions
[:,
1
]
/
2
gt_locations
=
gt_locations
[
indices
]
assert
len
(
locations
)
==
len
(
gt_locations
)
assert
len
(
dimensions
)
==
len
(
gt_dimensions
)
assert
len
(
orientations
)
==
len
(
gt_orientations
)
bbox3d_yaws
=
self
.
bbox_coder
.
encode
(
gt_locations
,
gt_dimensions
,
orientations
,
img_metas
)
bbox3d_dims
=
self
.
bbox_coder
.
encode
(
gt_locations
,
dimensions
,
gt_orientations
,
img_metas
)
bbox3d_locs
=
self
.
bbox_coder
.
encode
(
locations
,
gt_dimensions
,
gt_orientations
,
img_metas
)
pred_bboxes
=
dict
(
ori
=
bbox3d_yaws
,
dim
=
bbox3d_dims
,
loc
=
bbox3d_locs
)
return
pred_bboxes
def
get_targets
(
self
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
feat_shape
,
img_shape
,
img_metas
):
"""Get training targets for batch images.
Args:
gt_bboxes (list[Tensor]): Ground truth bboxes of each image,
shape (num_gt, 4).
gt_labels (list[Tensor]): Ground truth labels of each box,
shape (num_gt,).
gt_bboxes_3d (list[:obj:`CameraInstance3DBoxes`]): 3D Ground
truth bboxes of each image,
shape (num_gt, bbox_code_size).
gt_labels_3d (list[Tensor]): 3D Ground truth labels of each
box, shape (num_gt,).
centers2d (list[Tensor]): Projected 3D centers onto 2D image,
shape (num_gt, 2).
feat_shape (tuple[int]): Feature map shape with value,
shape (B, _, H, W).
img_shape (tuple[int]): Image shape in [h, w] format.
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
Returns:
tuple[Tensor, dict]: The Tensor value is the targets of
center heatmap, the dict has components below:
- gt_centers2d (Tensor): Coords of each projected 3D box
center on image. shape (B * max_objs, 2)
- gt_labels3d (Tensor): Labels of each 3D box.
shape (B, max_objs, )
- indices (Tensor): Indices of the existence of the 3D box.
shape (B * max_objs, )
- affine_indices (Tensor): Indices of the affine of the 3D box.
shape (N, )
- gt_locs (Tensor): Coords of each 3D box's location.
shape (N, 3)
- gt_dims (Tensor): Dimensions of each 3D box.
shape (N, 3)
- gt_yaws (Tensor): Orientation(yaw) of each 3D box.
shape (N, 1)
- gt_cors (Tensor): Coords of the corners of each 3D box.
shape (N, 8, 3)
"""
reg_mask
=
torch
.
stack
([
gt_bboxes
[
0
].
new_tensor
(
not
img_meta
[
'affine_aug'
],
dtype
=
torch
.
bool
)
for
img_meta
in
img_metas
])
img_h
,
img_w
=
img_shape
[:
2
]
bs
,
_
,
feat_h
,
feat_w
=
feat_shape
width_ratio
=
float
(
feat_w
/
img_w
)
# 1/4
height_ratio
=
float
(
feat_h
/
img_h
)
# 1/4
assert
width_ratio
==
height_ratio
center_heatmap_target
=
gt_bboxes
[
-
1
].
new_zeros
(
[
bs
,
self
.
num_classes
,
feat_h
,
feat_w
])
gt_centers2d
=
centers2d
.
copy
()
for
batch_id
in
range
(
bs
):
gt_bbox
=
gt_bboxes
[
batch_id
]
gt_label
=
gt_labels
[
batch_id
]
# project centers2d from input image to feat map
gt_center2d
=
gt_centers2d
[
batch_id
]
*
width_ratio
for
j
,
center
in
enumerate
(
gt_center2d
):
center_x_int
,
center_y_int
=
center
.
int
()
scale_box_h
=
(
gt_bbox
[
j
][
3
]
-
gt_bbox
[
j
][
1
])
*
height_ratio
scale_box_w
=
(
gt_bbox
[
j
][
2
]
-
gt_bbox
[
j
][
0
])
*
width_ratio
radius
=
gaussian_radius
([
scale_box_h
,
scale_box_w
],
min_overlap
=
0.7
)
radius
=
max
(
0
,
int
(
radius
))
ind
=
gt_label
[
j
]
gen_gaussian_target
(
center_heatmap_target
[
batch_id
,
ind
],
[
center_x_int
,
center_y_int
],
radius
)
avg_factor
=
max
(
1
,
center_heatmap_target
.
eq
(
1
).
sum
())
num_ctrs
=
[
center2d
.
shape
[
0
]
for
center2d
in
centers2d
]
max_objs
=
max
(
num_ctrs
)
reg_inds
=
torch
.
cat
(
[
reg_mask
[
i
].
repeat
(
num_ctrs
[
i
])
for
i
in
range
(
bs
)])
inds
=
torch
.
zeros
((
bs
,
max_objs
),
dtype
=
torch
.
bool
).
to
(
centers2d
[
0
].
device
)
# put gt 3d bboxes to gpu
gt_bboxes_3d
=
[
gt_bbox_3d
.
to
(
centers2d
[
0
].
device
)
for
gt_bbox_3d
in
gt_bboxes_3d
]
batch_centers2d
=
centers2d
[
0
].
new_zeros
((
bs
,
max_objs
,
2
))
batch_labels_3d
=
gt_labels_3d
[
0
].
new_zeros
((
bs
,
max_objs
))
batch_gt_locations
=
\
gt_bboxes_3d
[
0
].
tensor
.
new_zeros
((
bs
,
max_objs
,
3
))
for
i
in
range
(
bs
):
inds
[
i
,
:
num_ctrs
[
i
]]
=
1
batch_centers2d
[
i
,
:
num_ctrs
[
i
]]
=
centers2d
[
i
]
batch_labels_3d
[
i
,
:
num_ctrs
[
i
]]
=
gt_labels_3d
[
i
]
batch_gt_locations
[
i
,
:
num_ctrs
[
i
]]
=
\
gt_bboxes_3d
[
i
].
tensor
[:,
:
3
]
inds
=
inds
.
flatten
()
batch_centers2d
=
batch_centers2d
.
view
(
-
1
,
2
)
*
width_ratio
batch_gt_locations
=
batch_gt_locations
.
view
(
-
1
,
3
)
# filter the empty image, without gt_bboxes_3d
gt_bboxes_3d
=
[
gt_bbox_3d
for
gt_bbox_3d
in
gt_bboxes_3d
if
gt_bbox_3d
.
tensor
.
shape
[
0
]
>
0
]
gt_dimensions
=
torch
.
cat
(
[
gt_bbox_3d
.
tensor
[:,
3
:
6
]
for
gt_bbox_3d
in
gt_bboxes_3d
])
gt_orientations
=
torch
.
cat
([
gt_bbox_3d
.
tensor
[:,
6
].
unsqueeze
(
-
1
)
for
gt_bbox_3d
in
gt_bboxes_3d
])
gt_corners
=
torch
.
cat
(
[
gt_bbox_3d
.
corners
for
gt_bbox_3d
in
gt_bboxes_3d
])
target_labels
=
dict
(
gt_centers2d
=
batch_centers2d
.
long
(),
gt_labels3d
=
batch_labels_3d
,
indices
=
inds
,
reg_indices
=
reg_inds
,
gt_locs
=
batch_gt_locations
,
gt_dims
=
gt_dimensions
,
gt_yaws
=
gt_orientations
,
gt_cors
=
gt_corners
)
return
center_heatmap_target
,
avg_factor
,
target_labels
def
loss
(
self
,
cls_scores
,
bbox_preds
,
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
depths
,
attr_labels
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Compute loss of the head.
Args:
cls_scores (list[Tensor]): Box scores for each scale level.
shape (num_gt, 4).
bbox_preds (list[Tensor]): Box dims is a 4D-tensor, the channel
number is bbox_code_size.
shape (B, 7, H, W).
gt_bboxes (list[Tensor]): Ground truth bboxes for each image.
shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): Class indices corresponding to each box.
shape (num_gts, ).
gt_bboxes_3d (list[:obj:`CameraInstance3DBoxes`]): 3D boxes ground
truth. it is the flipped gt_bboxes
gt_labels_3d (list[Tensor]): Same as gt_labels.
centers2d (list[Tensor]): 2D centers on the image.
shape (num_gts, 2).
depths (list[Tensor]): Depth ground truth.
shape (num_gts, ).
attr_labels (list[Tensor]): Attributes indices of each box.
In kitti it's None.
img_metas (list[dict]): Meta information of each image, e.g.,
image size, scaling factor, etc.
gt_bboxes_ignore (None | list[Tensor]): Specify which bounding
boxes can be ignored when computing the loss.
Default: None.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
==
1
assert
attr_labels
is
None
assert
gt_bboxes_ignore
is
None
center2d_heatmap
=
cls_scores
[
0
]
pred_reg
=
bbox_preds
[
0
]
center2d_heatmap_target
,
avg_factor
,
target_labels
=
\
self
.
get_targets
(
gt_bboxes
,
gt_labels
,
gt_bboxes_3d
,
gt_labels_3d
,
centers2d
,
center2d_heatmap
.
shape
,
img_metas
[
0
][
'pad_shape'
],
img_metas
)
pred_bboxes
=
self
.
get_predictions
(
labels3d
=
target_labels
[
'gt_labels3d'
],
centers2d
=
target_labels
[
'gt_centers2d'
],
gt_locations
=
target_labels
[
'gt_locs'
],
gt_dimensions
=
target_labels
[
'gt_dims'
],
gt_orientations
=
target_labels
[
'gt_yaws'
],
indices
=
target_labels
[
'indices'
],
img_metas
=
img_metas
,
pred_reg
=
pred_reg
)
loss_cls
=
self
.
loss_cls
(
center2d_heatmap
,
center2d_heatmap_target
,
avg_factor
=
avg_factor
)
reg_inds
=
target_labels
[
'reg_indices'
]
loss_bbox_oris
=
self
.
loss_bbox
(
pred_bboxes
[
'ori'
].
corners
[
reg_inds
,
...],
target_labels
[
'gt_cors'
][
reg_inds
,
...])
loss_bbox_dims
=
self
.
loss_bbox
(
pred_bboxes
[
'dim'
].
corners
[
reg_inds
,
...],
target_labels
[
'gt_cors'
][
reg_inds
,
...])
loss_bbox_locs
=
self
.
loss_bbox
(
pred_bboxes
[
'loc'
].
corners
[
reg_inds
,
...],
target_labels
[
'gt_cors'
][
reg_inds
,
...])
loss_bbox
=
loss_bbox_dims
+
loss_bbox_locs
+
loss_bbox_oris
loss_dict
=
dict
(
loss_cls
=
loss_cls
,
loss_bbox
=
loss_bbox
)
return
loss_dict
mmdet3d/models/dense_heads/ssd_3d_head.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
numpy
as
np
import
torch
import
torch
from
mmcv.ops.nms
import
batched_nms
from
mmcv.ops.nms
import
batched_nms
from
mmcv.runner
import
force_fp32
from
mmcv.runner
import
force_fp32
...
@@ -128,15 +127,15 @@ class SSD3DHead(VoteHead):
...
@@ -128,15 +127,15 @@ class SSD3DHead(VoteHead):
Args:
Args:
bbox_preds (dict): Predictions from forward of SSD3DHead.
bbox_preds (dict): Predictions from forward of SSD3DHead.
points (list[torch.Tensor]): Input points.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
\
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (
None |
list[torch.Tensor]): Point-wise
pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask.
semantic mask.
pts_instance_mask (
None |
list[torch.Tensor]): Point-wise
pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask.
instance mask.
img_metas (list[dict]): Contain pcd and img's meta info.
img_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
which bounding.
Returns:
Returns:
...
@@ -231,12 +230,12 @@ class SSD3DHead(VoteHead):
...
@@ -231,12 +230,12 @@ class SSD3DHead(VoteHead):
Args:
Args:
points (list[torch.Tensor]): Points of each batch.
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
\
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (
None |
list[torch.Tensor]): Point-wise semantic
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch.
label of each batch.
pts_instance_mask (
None |
list[torch.Tensor]): Point-wise instance
pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch.
label of each batch.
bbox_preds (torch.Tensor): Bounding box predictions of ssd3d head.
bbox_preds (torch.Tensor): Bounding box predictions of ssd3d head.
...
@@ -320,12 +319,12 @@ class SSD3DHead(VoteHead):
...
@@ -320,12 +319,12 @@ class SSD3DHead(VoteHead):
Args:
Args:
points (torch.Tensor): Points of each batch.
points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
\
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch.
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (
None |
torch.Tensor): Point-wise semantic
pts_semantic_mask (torch.Tensor): Point-wise semantic
label of each batch.
label of each batch.
pts_instance_mask (
None |
torch.Tensor): Point-wise instance
pts_instance_mask (torch.Tensor): Point-wise instance
label of each batch.
label of each batch.
aggregated_points (torch.Tensor): Aggregated points from
aggregated_points (torch.Tensor): Aggregated points from
candidate points layer.
candidate points layer.
...
@@ -392,7 +391,8 @@ class SSD3DHead(VoteHead):
...
@@ -392,7 +391,8 @@ class SSD3DHead(VoteHead):
# LiDARInstance3DBoxes and DepthInstance3DBoxes
# LiDARInstance3DBoxes and DepthInstance3DBoxes
canonical_xyz
=
rotation_3d_in_axis
(
canonical_xyz
=
rotation_3d_in_axis
(
canonical_xyz
.
unsqueeze
(
0
).
transpose
(
0
,
1
),
canonical_xyz
.
unsqueeze
(
0
).
transpose
(
0
,
1
),
-
gt_bboxes_3d
.
yaw
[
assignment
],
2
).
squeeze
(
1
)
-
gt_bboxes_3d
.
yaw
[
assignment
],
axis
=
2
).
squeeze
(
1
)
distance_front
=
torch
.
clamp
(
distance_front
=
torch
.
clamp
(
size_res_targets
[:,
0
]
-
canonical_xyz
[:,
0
],
min
=
0
)
size_res_targets
[:,
0
]
-
canonical_xyz
[:,
0
],
min
=
0
)
distance_back
=
torch
.
clamp
(
distance_back
=
torch
.
clamp
(
...
@@ -441,7 +441,7 @@ class SSD3DHead(VoteHead):
...
@@ -441,7 +441,7 @@ class SSD3DHead(VoteHead):
negative_mask
)
negative_mask
)
def
get_bboxes
(
self
,
points
,
bbox_preds
,
input_metas
,
rescale
=
False
):
def
get_bboxes
(
self
,
points
,
bbox_preds
,
input_metas
,
rescale
=
False
):
"""Generate bboxes from
sdd3d
head predictions.
"""Generate bboxes from
3DSSD
head predictions.
Args:
Args:
points (torch.Tensor): Input points.
points (torch.Tensor): Input points.
...
@@ -464,9 +464,7 @@ class SSD3DHead(VoteHead):
...
@@ -464,9 +464,7 @@ class SSD3DHead(VoteHead):
bbox_selected
,
score_selected
,
labels
=
self
.
multiclass_nms_single
(
bbox_selected
,
score_selected
,
labels
=
self
.
multiclass_nms_single
(
obj_scores
[
b
],
sem_scores
[
b
],
bbox3d
[
b
],
points
[
b
,
...,
:
3
],
obj_scores
[
b
],
sem_scores
[
b
],
bbox3d
[
b
],
points
[
b
,
...,
:
3
],
input_metas
[
b
])
input_metas
[
b
])
# fix the wrong direction
# To do: remove this ops
bbox_selected
[...,
6
]
+=
np
.
pi
bbox
=
input_metas
[
b
][
'box_type_3d'
](
bbox
=
input_metas
[
b
][
'box_type_3d'
](
bbox_selected
.
clone
(),
bbox_selected
.
clone
(),
box_dim
=
bbox_selected
.
shape
[
-
1
],
box_dim
=
bbox_selected
.
shape
[
-
1
],
...
@@ -481,7 +479,7 @@ class SSD3DHead(VoteHead):
...
@@ -481,7 +479,7 @@ class SSD3DHead(VoteHead):
Args:
Args:
obj_scores (torch.Tensor): Objectness score of bounding boxes.
obj_scores (torch.Tensor): Objectness score of bounding boxes.
sem_scores (torch.Tensor):
s
emantic class score of bounding boxes.
sem_scores (torch.Tensor):
S
emantic class score of bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes.
bbox (torch.Tensor): Predicted bounding boxes.
points (torch.Tensor): Input points.
points (torch.Tensor): Input points.
input_meta (dict): Point cloud and image's meta info.
input_meta (dict): Point cloud and image's meta info.
...
@@ -489,23 +487,14 @@ class SSD3DHead(VoteHead):
...
@@ -489,23 +487,14 @@ class SSD3DHead(VoteHead):
Returns:
Returns:
tuple[torch.Tensor]: Bounding boxes, scores and labels.
tuple[torch.Tensor]: Bounding boxes, scores and labels.
"""
"""
num_bbox
=
bbox
.
shape
[
0
]
bbox
=
input_meta
[
'box_type_3d'
](
bbox
=
input_meta
[
'box_type_3d'
](
bbox
.
clone
(),
bbox
.
clone
(),
box_dim
=
bbox
.
shape
[
-
1
],
box_dim
=
bbox
.
shape
[
-
1
],
with_yaw
=
self
.
bbox_coder
.
with_rot
,
with_yaw
=
self
.
bbox_coder
.
with_rot
,
origin
=
(
0.5
,
0.5
,
0.5
))
origin
=
(
0.5
,
0.5
,
0.5
))
if
isinstance
(
bbox
,
LiDARInstance3DBoxes
):
if
isinstance
(
bbox
,
(
LiDARInstance3DBoxes
,
DepthInstance3DBoxes
)):
box_idx
=
bbox
.
points_in_boxes
(
points
)
box_indices
=
bbox
.
points_in_boxes_all
(
points
)
box_indices
=
box_idx
.
new_zeros
([
num_bbox
+
1
])
box_idx
[
box_idx
==
-
1
]
=
num_bbox
box_indices
.
scatter_add_
(
0
,
box_idx
.
long
(),
box_idx
.
new_ones
(
box_idx
.
shape
))
box_indices
=
box_indices
[:
-
1
]
nonempty_box_mask
=
box_indices
>=
0
elif
isinstance
(
bbox
,
DepthInstance3DBoxes
):
box_indices
=
bbox
.
points_in_boxes
(
points
)
nonempty_box_mask
=
box_indices
.
T
.
sum
(
1
)
>=
0
nonempty_box_mask
=
box_indices
.
T
.
sum
(
1
)
>=
0
else
:
else
:
raise
NotImplementedError
(
'Unsupported bbox type!'
)
raise
NotImplementedError
(
'Unsupported bbox type!'
)
...
@@ -516,20 +505,20 @@ class SSD3DHead(VoteHead):
...
@@ -516,20 +505,20 @@ class SSD3DHead(VoteHead):
minmax_box3d
[:,
3
:]
=
torch
.
max
(
corner3d
,
dim
=
1
)[
0
]
minmax_box3d
[:,
3
:]
=
torch
.
max
(
corner3d
,
dim
=
1
)[
0
]
bbox_classes
=
torch
.
argmax
(
sem_scores
,
-
1
)
bbox_classes
=
torch
.
argmax
(
sem_scores
,
-
1
)
nms_
selected
=
batched_nms
(
nms_
keep
=
batched_nms
(
minmax_box3d
[
nonempty_box_mask
][:,
[
0
,
1
,
3
,
4
]],
minmax_box3d
[
nonempty_box_mask
][:,
[
0
,
1
,
3
,
4
]],
obj_scores
[
nonempty_box_mask
],
bbox_classes
[
nonempty_box_mask
],
obj_scores
[
nonempty_box_mask
],
bbox_classes
[
nonempty_box_mask
],
self
.
test_cfg
.
nms_cfg
)[
1
]
self
.
test_cfg
.
nms_cfg
)[
1
]
if
nms_
selected
.
shape
[
0
]
>
self
.
test_cfg
.
max_output_num
:
if
nms_
keep
.
shape
[
0
]
>
self
.
test_cfg
.
max_output_num
:
nms_
selected
=
nms_selected
[:
self
.
test_cfg
.
max_output_num
]
nms_
keep
=
nms_keep
[:
self
.
test_cfg
.
max_output_num
]
# filter empty boxes and boxes with low score
# filter empty boxes and boxes with low score
scores_mask
=
(
obj_scores
>=
self
.
test_cfg
.
score_thr
)
scores_mask
=
(
obj_scores
>=
self
.
test_cfg
.
score_thr
)
nonempty_box_inds
=
torch
.
nonzero
(
nonempty_box_inds
=
torch
.
nonzero
(
nonempty_box_mask
,
as_tuple
=
False
).
flatten
()
nonempty_box_mask
,
as_tuple
=
False
).
flatten
()
nonempty_mask
=
torch
.
zeros_like
(
bbox_classes
).
scatter
(
nonempty_mask
=
torch
.
zeros_like
(
bbox_classes
).
scatter
(
0
,
nonempty_box_inds
[
nms_
selected
],
1
)
0
,
nonempty_box_inds
[
nms_
keep
],
1
)
selected
=
(
nonempty_mask
.
bool
()
&
scores_mask
.
bool
())
selected
=
(
nonempty_mask
.
bool
()
&
scores_mask
.
bool
())
if
self
.
test_cfg
.
per_class_proposal
:
if
self
.
test_cfg
.
per_class_proposal
:
...
@@ -560,18 +549,8 @@ class SSD3DHead(VoteHead):
...
@@ -560,18 +549,8 @@ class SSD3DHead(VoteHead):
tuple[torch.Tensor]: Flags indicating whether each point is
tuple[torch.Tensor]: Flags indicating whether each point is
inside bbox and the index of box where each point are in.
inside bbox and the index of box where each point are in.
"""
"""
# TODO: align points_in_boxes function in each box_structures
if
isinstance
(
bboxes_3d
,
(
LiDARInstance3DBoxes
,
DepthInstance3DBoxes
)):
num_bbox
=
bboxes_3d
.
tensor
.
shape
[
0
]
points_mask
=
bboxes_3d
.
points_in_boxes_all
(
points
)
if
isinstance
(
bboxes_3d
,
LiDARInstance3DBoxes
):
assignment
=
bboxes_3d
.
points_in_boxes
(
points
).
long
()
points_mask
=
assignment
.
new_zeros
(
[
assignment
.
shape
[
0
],
num_bbox
+
1
])
assignment
[
assignment
==
-
1
]
=
num_bbox
points_mask
.
scatter_
(
1
,
assignment
.
unsqueeze
(
1
),
1
)
points_mask
=
points_mask
[:,
:
-
1
]
assignment
[
assignment
==
num_bbox
]
=
num_bbox
-
1
elif
isinstance
(
bboxes_3d
,
DepthInstance3DBoxes
):
points_mask
=
bboxes_3d
.
points_in_boxes
(
points
)
assignment
=
points_mask
.
argmax
(
dim
=-
1
)
assignment
=
points_mask
.
argmax
(
dim
=-
1
)
else
:
else
:
raise
NotImplementedError
(
'Unsupported bbox type!'
)
raise
NotImplementedError
(
'Unsupported bbox type!'
)
...
...
mmdet3d/models/dense_heads/train_mixins.py
View file @
32a4328b
...
@@ -25,7 +25,7 @@ class AnchorTrainMixin(object):
...
@@ -25,7 +25,7 @@ class AnchorTrainMixin(object):
gt_bboxes_list (list[:obj:`BaseInstance3DBoxes`]): Ground truth
gt_bboxes_list (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each image.
bboxes of each image.
input_metas (list[dict]): Meta info of each image.
input_metas (list[dict]): Meta info of each image.
gt_bboxes_ignore_list (
None |
list): Ignore list of gt bboxes.
gt_bboxes_ignore_list (list): Ignore list of gt bboxes.
gt_labels_list (list[torch.Tensor]): Gt labels of batches.
gt_labels_list (list[torch.Tensor]): Gt labels of batches.
label_channels (int): The channel of labels.
label_channels (int): The channel of labels.
num_classes (int): The number of classes.
num_classes (int): The number of classes.
...
@@ -35,7 +35,7 @@ class AnchorTrainMixin(object):
...
@@ -35,7 +35,7 @@ class AnchorTrainMixin(object):
tuple (list, list, list, list, list, list, int, int):
tuple (list, list, list, list, list, list, int, int):
Anchor targets, including labels, label weights,
Anchor targets, including labels, label weights,
bbox targets, bbox weights, direction targets,
bbox targets, bbox weights, direction targets,
direction weights, number of postive anchors and
direction weights, number of pos
i
tive anchors and
number of negative anchors.
number of negative anchors.
"""
"""
num_imgs
=
len
(
input_metas
)
num_imgs
=
len
(
input_metas
)
...
@@ -293,6 +293,7 @@ class AnchorTrainMixin(object):
...
@@ -293,6 +293,7 @@ class AnchorTrainMixin(object):
sampling_result
.
pos_bboxes
,
sampling_result
.
pos_bboxes
,
pos_bbox_targets
,
pos_bbox_targets
,
self
.
dir_offset
,
self
.
dir_offset
,
self
.
dir_limit_offset
,
one_hot
=
False
)
one_hot
=
False
)
bbox_targets
[
pos_inds
,
:]
=
pos_bbox_targets
bbox_targets
[
pos_inds
,
:]
=
pos_bbox_targets
bbox_weights
[
pos_inds
,
:]
=
1.0
bbox_weights
[
pos_inds
,
:]
=
1.0
...
@@ -318,6 +319,7 @@ class AnchorTrainMixin(object):
...
@@ -318,6 +319,7 @@ class AnchorTrainMixin(object):
def
get_direction_target
(
anchors
,
def
get_direction_target
(
anchors
,
reg_targets
,
reg_targets
,
dir_offset
=
0
,
dir_offset
=
0
,
dir_limit_offset
=
0
,
num_bins
=
2
,
num_bins
=
2
,
one_hot
=
True
):
one_hot
=
True
):
"""Encode direction to 0 ~ num_bins-1.
"""Encode direction to 0 ~ num_bins-1.
...
@@ -333,7 +335,7 @@ def get_direction_target(anchors,
...
@@ -333,7 +335,7 @@ def get_direction_target(anchors,
torch.Tensor: Encoded direction targets.
torch.Tensor: Encoded direction targets.
"""
"""
rot_gt
=
reg_targets
[...,
6
]
+
anchors
[...,
6
]
rot_gt
=
reg_targets
[...,
6
]
+
anchors
[...,
6
]
offset_rot
=
limit_period
(
rot_gt
-
dir_offset
,
0
,
2
*
np
.
pi
)
offset_rot
=
limit_period
(
rot_gt
-
dir_offset
,
dir_limit_offset
,
2
*
np
.
pi
)
dir_cls_targets
=
torch
.
floor
(
offset_rot
/
(
2
*
np
.
pi
/
num_bins
)).
long
()
dir_cls_targets
=
torch
.
floor
(
offset_rot
/
(
2
*
np
.
pi
/
num_bins
)).
long
()
dir_cls_targets
=
torch
.
clamp
(
dir_cls_targets
,
min
=
0
,
max
=
num_bins
-
1
)
dir_cls_targets
=
torch
.
clamp
(
dir_cls_targets
,
min
=
0
,
max
=
num_bins
-
1
)
if
one_hot
:
if
one_hot
:
...
...
mmdet3d/models/dense_heads/vote_head.py
View file @
32a4328b
...
@@ -136,7 +136,7 @@ class VoteHead(BaseModule):
...
@@ -136,7 +136,7 @@ class VoteHead(BaseModule):
"""Forward pass.
"""Forward pass.
Note:
Note:
The forward of VoteHead is d
e
vided into 4 steps:
The forward of VoteHead is d
i
vided into 4 steps:
1. Generate vote_points from seed_points.
1. Generate vote_points from seed_points.
2. Aggregate vote_points.
2. Aggregate vote_points.
...
@@ -234,15 +234,15 @@ class VoteHead(BaseModule):
...
@@ -234,15 +234,15 @@ class VoteHead(BaseModule):
Args:
Args:
bbox_preds (dict): Predictions from forward of vote head.
bbox_preds (dict): Predictions from forward of vote head.
points (list[torch.Tensor]): Input points.
points (list[torch.Tensor]): Input points.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
\
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each sample.
bboxes of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
gt_labels_3d (list[torch.Tensor]): Labels of each sample.
pts_semantic_mask (
None |
list[torch.Tensor]): Point-wise
pts_semantic_mask (list[torch.Tensor]): Point-wise
semantic mask.
semantic mask.
pts_instance_mask (
None |
list[torch.Tensor]): Point-wise
pts_instance_mask (list[torch.Tensor]): Point-wise
instance mask.
instance mask.
img_metas (list[dict]): Contain pcd and img's meta info.
img_metas (list[dict]): Contain pcd and img's meta info.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
which bounding.
ret_target (Bool): Return targets or not.
ret_target (Bool): Return targets or not.
...
@@ -358,12 +358,12 @@ class VoteHead(BaseModule):
...
@@ -358,12 +358,12 @@ class VoteHead(BaseModule):
Args:
Args:
points (list[torch.Tensor]): Points of each batch.
points (list[torch.Tensor]): Points of each batch.
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
\
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
bboxes of each batch.
bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
gt_labels_3d (list[torch.Tensor]): Labels of each batch.
pts_semantic_mask (
None |
list[torch.Tensor]): Point-wise semantic
pts_semantic_mask (list[torch.Tensor]): Point-wise semantic
label of each batch.
label of each batch.
pts_instance_mask (
None |
list[torch.Tensor]): Point-wise instance
pts_instance_mask (list[torch.Tensor]): Point-wise instance
label of each batch.
label of each batch.
bbox_preds (torch.Tensor): Bounding box predictions of vote head.
bbox_preds (torch.Tensor): Bounding box predictions of vote head.
...
@@ -447,12 +447,12 @@ class VoteHead(BaseModule):
...
@@ -447,12 +447,12 @@ class VoteHead(BaseModule):
Args:
Args:
points (torch.Tensor): Points of each batch.
points (torch.Tensor): Points of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
\
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): Ground truth
boxes of each batch.
boxes of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
gt_labels_3d (torch.Tensor): Labels of each batch.
pts_semantic_mask (
None |
torch.Tensor): Point-wise semantic
pts_semantic_mask (torch.Tensor): Point-wise semantic
label of each batch.
label of each batch.
pts_instance_mask (
None |
torch.Tensor): Point-wise instance
pts_instance_mask (torch.Tensor): Point-wise instance
label of each batch.
label of each batch.
aggregated_points (torch.Tensor): Aggregated points from
aggregated_points (torch.Tensor): Aggregated points from
vote aggregation layer.
vote aggregation layer.
...
@@ -471,7 +471,7 @@ class VoteHead(BaseModule):
...
@@ -471,7 +471,7 @@ class VoteHead(BaseModule):
vote_target_masks
=
points
.
new_zeros
([
num_points
],
vote_target_masks
=
points
.
new_zeros
([
num_points
],
dtype
=
torch
.
long
)
dtype
=
torch
.
long
)
vote_target_idx
=
points
.
new_zeros
([
num_points
],
dtype
=
torch
.
long
)
vote_target_idx
=
points
.
new_zeros
([
num_points
],
dtype
=
torch
.
long
)
box_indices_all
=
gt_bboxes_3d
.
points_in_boxes
(
points
)
box_indices_all
=
gt_bboxes_3d
.
points_in_boxes
_all
(
points
)
for
i
in
range
(
gt_labels_3d
.
shape
[
0
]):
for
i
in
range
(
gt_labels_3d
.
shape
[
0
]):
box_indices
=
box_indices_all
[:,
i
]
box_indices
=
box_indices_all
[:,
i
]
indices
=
torch
.
nonzero
(
indices
=
torch
.
nonzero
(
...
@@ -621,7 +621,7 @@ class VoteHead(BaseModule):
...
@@ -621,7 +621,7 @@ class VoteHead(BaseModule):
box_dim
=
bbox
.
shape
[
-
1
],
box_dim
=
bbox
.
shape
[
-
1
],
with_yaw
=
self
.
bbox_coder
.
with_rot
,
with_yaw
=
self
.
bbox_coder
.
with_rot
,
origin
=
(
0.5
,
0.5
,
0.5
))
origin
=
(
0.5
,
0.5
,
0.5
))
box_indices
=
bbox
.
points_in_boxes
(
points
)
box_indices
=
bbox
.
points_in_boxes
_all
(
points
)
corner3d
=
bbox
.
corners
corner3d
=
bbox
.
corners
minmax_box3d
=
corner3d
.
new
(
torch
.
Size
((
corner3d
.
shape
[
0
],
6
)))
minmax_box3d
=
corner3d
.
new
(
torch
.
Size
((
corner3d
.
shape
[
0
],
6
)))
...
...
mmdet3d/models/detectors/__init__.py
View file @
32a4328b
...
@@ -10,7 +10,9 @@ from .imvoxelnet import ImVoxelNet
...
@@ -10,7 +10,9 @@ from .imvoxelnet import ImVoxelNet
from
.mvx_faster_rcnn
import
DynamicMVXFasterRCNN
,
MVXFasterRCNN
from
.mvx_faster_rcnn
import
DynamicMVXFasterRCNN
,
MVXFasterRCNN
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.mvx_two_stage
import
MVXTwoStageDetector
from
.parta2
import
PartA2
from
.parta2
import
PartA2
from
.point_rcnn
import
PointRCNN
from
.single_stage_mono3d
import
SingleStageMono3DDetector
from
.single_stage_mono3d
import
SingleStageMono3DDetector
from
.smoke_mono3d
import
SMOKEMono3D
from
.ssd3dnet
import
SSD3DNet
from
.ssd3dnet
import
SSD3DNet
from
.votenet
import
VoteNet
from
.votenet
import
VoteNet
from
.voxelnet
import
VoxelNet
from
.voxelnet
import
VoxelNet
...
@@ -19,5 +21,5 @@ __all__ = [
...
@@ -19,5 +21,5 @@ __all__ = [
'Base3DDetector'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXTwoStageDetector'
,
'Base3DDetector'
,
'VoxelNet'
,
'DynamicVoxelNet'
,
'MVXTwoStageDetector'
,
'DynamicMVXFasterRCNN'
,
'MVXFasterRCNN'
,
'PartA2'
,
'VoteNet'
,
'H3DNet'
,
'DynamicMVXFasterRCNN'
,
'MVXFasterRCNN'
,
'PartA2'
,
'VoteNet'
,
'H3DNet'
,
'CenterPoint'
,
'SSD3DNet'
,
'ImVoteNet'
,
'SingleStageMono3DDetector'
,
'CenterPoint'
,
'SSD3DNet'
,
'ImVoteNet'
,
'SingleStageMono3DDetector'
,
'FCOSMono3D'
,
'ImVoxelNet'
,
'GroupFree3DNet'
'FCOSMono3D'
,
'ImVoxelNet'
,
'GroupFree3DNet'
,
'PointRCNN'
,
'SMOKEMono3D'
]
]
mmdet3d/models/detectors/base.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
os
import
path
as
osp
import
mmcv
import
mmcv
import
torch
import
torch
from
mmcv.parallel
import
DataContainer
as
DC
from
mmcv.parallel
import
DataContainer
as
DC
from
mmcv.runner
import
auto_fp16
from
mmcv.runner
import
auto_fp16
from
os
import
path
as
osp
from
mmdet3d.core
import
Box3DMode
,
Coord3DMode
,
show_result
from
mmdet3d.core
import
Box3DMode
,
Coord3DMode
,
show_result
from
mmdet.models.detectors
import
BaseDetector
from
mmdet.models.detectors
import
BaseDetector
...
@@ -114,7 +115,7 @@ class Base3DDetector(BaseDetector):
...
@@ -114,7 +115,7 @@ class Base3DDetector(BaseDetector):
Box3DMode
.
DEPTH
)
Box3DMode
.
DEPTH
)
elif
box_mode_3d
!=
Box3DMode
.
DEPTH
:
elif
box_mode_3d
!=
Box3DMode
.
DEPTH
:
ValueError
(
ValueError
(
f
'Unsupported box_mode_3d
{
box_mode_3d
}
for conver
t
ion!'
)
f
'Unsupported box_mode_3d
{
box_mode_3d
}
for conver
s
ion!'
)
pred_bboxes
=
pred_bboxes
.
tensor
.
cpu
().
numpy
()
pred_bboxes
=
pred_bboxes
.
tensor
.
cpu
().
numpy
()
show_result
(
show_result
(
points
,
points
,
...
...
mmdet3d/models/detectors/centerpoint.py
View file @
32a4328b
...
@@ -97,7 +97,8 @@ class CenterPoint(MVXTwoStageDetector):
...
@@ -97,7 +97,8 @@ class CenterPoint(MVXTwoStageDetector):
Args:
Args:
feats (list[torch.Tensor]): Feature of point cloud.
feats (list[torch.Tensor]): Feature of point cloud.
img_metas (list[dict]): Meta information of samples.
img_metas (list[dict]): Meta information of samples.
rescale (bool): Whether to rescale bboxes. Default: False.
rescale (bool, optional): Whether to rescale bboxes.
Default: False.
Returns:
Returns:
dict: Returned bboxes consists of the following keys:
dict: Returned bboxes consists of the following keys:
...
@@ -121,8 +122,8 @@ class CenterPoint(MVXTwoStageDetector):
...
@@ -121,8 +122,8 @@ class CenterPoint(MVXTwoStageDetector):
task_id
][
0
][
key
][:,
1
,
...]
task_id
][
0
][
key
][:,
1
,
...]
elif
key
==
'rot'
:
elif
key
==
'rot'
:
outs
[
task_id
][
0
][
outs
[
task_id
][
0
][
key
][:,
1
,
key
][:,
0
,
...]
=
-
outs
[
task_id
][
0
][
key
][:,
1
,
...]
...]
=
-
outs
[
task_id
][
0
][
key
][:,
0
,
...]
elif
key
==
'vel'
:
elif
key
==
'vel'
:
outs
[
task_id
][
0
][
outs
[
task_id
][
0
][
key
][:,
1
,
key
][:,
1
,
...
@@ -135,8 +136,8 @@ class CenterPoint(MVXTwoStageDetector):
...
@@ -135,8 +136,8 @@ class CenterPoint(MVXTwoStageDetector):
task_id
][
0
][
key
][:,
0
,
...]
task_id
][
0
][
key
][:,
0
,
...]
elif
key
==
'rot'
:
elif
key
==
'rot'
:
outs
[
task_id
][
0
][
outs
[
task_id
][
0
][
key
][:,
0
,
key
][:,
1
,
...]
=
-
outs
[
task_id
][
0
][
key
][:,
0
,
...]
...]
=
-
outs
[
task_id
][
0
][
key
][:,
1
,
...]
elif
key
==
'vel'
:
elif
key
==
'vel'
:
outs
[
task_id
][
0
][
outs
[
task_id
][
0
][
key
][:,
0
,
key
][:,
0
,
...
...
mmdet3d/models/detectors/groupfree3dnet.py
View file @
32a4328b
...
@@ -38,11 +38,11 @@ class GroupFree3DNet(SingleStage3DDetector):
...
@@ -38,11 +38,11 @@ class GroupFree3DNet(SingleStage3DDetector):
img_metas (list): Image metas.
img_metas (list): Image metas.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
pts_semantic_mask (
None |
list[torch.Tensor]): point-wise semantic
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
label of each batch.
pts_instance_mask (
None |
list[torch.Tensor]): point-wise instance
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
label of each batch.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
which bounding.
Returns:
Returns:
...
...
mmdet3d/models/detectors/h3dnet.py
View file @
32a4328b
...
@@ -47,11 +47,11 @@ class H3DNet(TwoStage3DDetector):
...
@@ -47,11 +47,11 @@ class H3DNet(TwoStage3DDetector):
img_metas (list): Image metas.
img_metas (list): Image metas.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
pts_semantic_mask (
None |
list[torch.Tensor]): point-wise semantic
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
label of each batch.
pts_instance_mask (
None |
list[torch.Tensor]): point-wise instance
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
label of each batch.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
which bounding.
Returns:
Returns:
...
...
mmdet3d/models/detectors/imvotenet.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
import
warnings
from
mmdet3d.core
import
bbox3d2result
,
merge_aug_bboxes_3d
from
mmdet3d.core
import
bbox3d2result
,
merge_aug_bboxes_3d
from
mmdet3d.models.utils
import
MLP
from
mmdet3d.models.utils
import
MLP
...
@@ -149,21 +150,21 @@ class ImVoteNet(Base3DDetector):
...
@@ -149,21 +150,21 @@ class ImVoteNet(Base3DDetector):
if
self
.
with_img_backbone
:
if
self
.
with_img_backbone
:
if
img_pretrained
is
not
None
:
if
img_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
\
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
'
key, please consider using init_cfg'
)
'
key, please consider using init_cfg
.
'
)
self
.
img_backbone
.
init_cfg
=
dict
(
self
.
img_backbone
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
if
self
.
with_img_roi_head
:
if
self
.
with_img_roi_head
:
if
img_pretrained
is
not
None
:
if
img_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
\
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
'
key, please consider using init_cfg'
)
'
key, please consider using init_cfg
.
'
)
self
.
img_roi_head
.
init_cfg
=
dict
(
self
.
img_roi_head
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
if
self
.
with_pts_backbone
:
if
self
.
with_pts_backbone
:
if
img_pretrained
is
not
None
:
if
img_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
\
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
'
key, please consider using init_cfg'
)
'
key, please consider using init_cfg
.
'
)
self
.
pts_backbone
.
init_cfg
=
dict
(
self
.
pts_backbone
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
pts_pretrained
)
type
=
'Pretrained'
,
checkpoint
=
pts_pretrained
)
...
@@ -393,9 +394,9 @@ class ImVoteNet(Base3DDetector):
...
@@ -393,9 +394,9 @@ class ImVoteNet(Base3DDetector):
with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
with shape (num_gts, 4) in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[torch.Tensor]): class indices for each
gt_labels (list[torch.Tensor]): class indices for each
2d bounding box.
2d bounding box.
gt_bboxes_ignore (
None |
list[torch.Tensor]): specify which
gt_bboxes_ignore (list[torch.Tensor]): specify which
2d bounding boxes can be ignored when computing the loss.
2d bounding boxes can be ignored when computing the loss.
gt_masks (
None |
torch.Tensor): true segmentation masks for each
gt_masks (torch.Tensor): true segmentation masks for each
2d bbox, used if the architecture supports a segmentation task.
2d bbox, used if the architecture supports a segmentation task.
proposals: override rpn proposals (2d) with custom proposals.
proposals: override rpn proposals (2d) with custom proposals.
Use when `with_rpn` is False.
Use when `with_rpn` is False.
...
@@ -403,9 +404,9 @@ class ImVoteNet(Base3DDetector):
...
@@ -403,9 +404,9 @@ class ImVoteNet(Base3DDetector):
not supported yet.
not supported yet.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): 3d gt bboxes.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): 3d gt bboxes.
gt_labels_3d (list[torch.Tensor]): gt class labels for 3d bboxes.
gt_labels_3d (list[torch.Tensor]): gt class labels for 3d bboxes.
pts_semantic_mask (
None |
list[torch.Tensor]): point-wise semantic
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
label of each batch.
pts_instance_mask (
None |
list[torch.Tensor]): point-wise instance
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
label of each batch.
Returns:
Returns:
...
...
mmdet3d/models/detectors/mvx_two_stage.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
warnings
from
os
import
path
as
osp
import
mmcv
import
mmcv
import
torch
import
torch
import
warnings
from
mmcv.parallel
import
DataContainer
as
DC
from
mmcv.parallel
import
DataContainer
as
DC
from
mmcv.runner
import
force_fp32
from
mmcv.runner
import
force_fp32
from
os
import
path
as
osp
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
from
mmdet3d.core
import
(
Box3DMode
,
Coord3DMode
,
bbox3d2result
,
from
mmdet3d.core
import
(
Box3DMode
,
Coord3DMode
,
bbox3d2result
,
...
@@ -84,21 +85,20 @@ class MVXTwoStageDetector(Base3DDetector):
...
@@ -84,21 +85,20 @@ class MVXTwoStageDetector(Base3DDetector):
if
self
.
with_img_backbone
:
if
self
.
with_img_backbone
:
if
img_pretrained
is
not
None
:
if
img_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
\
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
'
key, please consider using init_cfg'
)
'
key, please consider using init_cfg
.
'
)
self
.
img_backbone
.
init_cfg
=
dict
(
self
.
img_backbone
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
if
self
.
with_img_roi_head
:
if
self
.
with_img_roi_head
:
if
img_pretrained
is
not
None
:
if
img_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
\
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
'
key, please consider using init_cfg'
)
'
key, please consider using init_cfg
.
'
)
self
.
img_roi_head
.
init_cfg
=
dict
(
self
.
img_roi_head
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
type
=
'Pretrained'
,
checkpoint
=
img_pretrained
)
if
self
.
with_pts_backbone
:
if
self
.
with_pts_backbone
:
if
pts_pretrained
is
not
None
:
if
pts_pretrained
is
not
None
:
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
\
warnings
.
warn
(
'DeprecationWarning: pretrained is a deprecated
'
key, please consider using init_cfg'
)
'
key, please consider using init_cfg'
)
self
.
pts_backbone
.
init_cfg
=
dict
(
self
.
pts_backbone
.
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
pts_pretrained
)
type
=
'Pretrained'
,
checkpoint
=
pts_pretrained
)
...
@@ -260,7 +260,7 @@ class MVXTwoStageDetector(Base3DDetector):
...
@@ -260,7 +260,7 @@ class MVXTwoStageDetector(Base3DDetector):
of 2D boxes in images. Defaults to None.
of 2D boxes in images. Defaults to None.
gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in
gt_bboxes (list[torch.Tensor], optional): Ground truth 2D boxes in
images. Defaults to None.
images. Defaults to None.
img (torch.Tensor optional): Images of each sample with shape
img (torch.Tensor
,
optional): Images of each sample with shape
(N, C, H, W). Defaults to None.
(N, C, H, W). Defaults to None.
proposals ([list[torch.Tensor], optional): Predicted proposals
proposals ([list[torch.Tensor], optional): Predicted proposals
used for training Fast RCNN. Defaults to None.
used for training Fast RCNN. Defaults to None.
...
@@ -497,7 +497,7 @@ class MVXTwoStageDetector(Base3DDetector):
...
@@ -497,7 +497,7 @@ class MVXTwoStageDetector(Base3DDetector):
Box3DMode
.
DEPTH
)
Box3DMode
.
DEPTH
)
elif
box_mode_3d
!=
Box3DMode
.
DEPTH
:
elif
box_mode_3d
!=
Box3DMode
.
DEPTH
:
ValueError
(
ValueError
(
f
'Unsupported box_mode_3d
{
box_mode_3d
}
for conver
t
ion!'
)
f
'Unsupported box_mode_3d
{
box_mode_3d
}
for conver
s
ion!'
)
pred_bboxes
=
pred_bboxes
.
tensor
.
cpu
().
numpy
()
pred_bboxes
=
pred_bboxes
.
tensor
.
cpu
().
numpy
()
show_result
(
points
,
None
,
pred_bboxes
,
out_dir
,
file_name
)
show_result
(
points
,
None
,
pred_bboxes
,
out_dir
,
file_name
)
mmdet3d/models/detectors/point_rcnn.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
mmdet.models
import
DETECTORS
from
.two_stage
import
TwoStage3DDetector
@
DETECTORS
.
register_module
()
class
PointRCNN
(
TwoStage3DDetector
):
r
"""PointRCNN detector.
Please refer to the `PointRCNN <https://arxiv.org/abs/1812.04244>`_
Args:
backbone (dict): Config dict of detector's backbone.
neck (dict, optional): Config dict of neck. Defaults to None.
rpn_head (dict, optional): Config of RPN head. Defaults to None.
roi_head (dict, optional): Config of ROI head. Defaults to None.
train_cfg (dict, optional): Train configs. Defaults to None.
test_cfg (dict, optional): Test configs. Defaults to None.
pretrained (str, optional): Model pretrained path. Defaults to None.
init_cfg (dict, optional): Config of initialization. Defaults to None.
"""
def
__init__
(
self
,
backbone
,
neck
=
None
,
rpn_head
=
None
,
roi_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
,
init_cfg
=
None
):
super
(
PointRCNN
,
self
).
__init__
(
backbone
=
backbone
,
neck
=
neck
,
rpn_head
=
rpn_head
,
roi_head
=
roi_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
pretrained
=
pretrained
,
init_cfg
=
init_cfg
)
def
extract_feat
(
self
,
points
):
"""Directly extract features from the backbone+neck.
Args:
points (torch.Tensor): Input points.
Returns:
dict: Features from the backbone+neck
"""
x
=
self
.
backbone
(
points
)
if
self
.
with_neck
:
x
=
self
.
neck
(
x
)
return
x
def
forward_train
(
self
,
points
,
img_metas
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Forward of training.
Args:
points (list[torch.Tensor]): Points of each batch.
img_metas (list[dict]): Meta information of each sample.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
Returns:
dict: Losses.
"""
losses
=
dict
()
points_cat
=
torch
.
stack
(
points
)
x
=
self
.
extract_feat
(
points_cat
)
# features for rcnn
backbone_feats
=
x
[
'fp_features'
].
clone
()
backbone_xyz
=
x
[
'fp_xyz'
].
clone
()
rcnn_feats
=
{
'features'
:
backbone_feats
,
'points'
:
backbone_xyz
}
bbox_preds
,
cls_preds
=
self
.
rpn_head
(
x
)
rpn_loss
=
self
.
rpn_head
.
loss
(
bbox_preds
=
bbox_preds
,
cls_preds
=
cls_preds
,
points
=
points
,
gt_bboxes_3d
=
gt_bboxes_3d
,
gt_labels_3d
=
gt_labels_3d
,
img_metas
=
img_metas
)
losses
.
update
(
rpn_loss
)
bbox_list
=
self
.
rpn_head
.
get_bboxes
(
points_cat
,
bbox_preds
,
cls_preds
,
img_metas
)
proposal_list
=
[
dict
(
boxes_3d
=
bboxes
,
scores_3d
=
scores
,
labels_3d
=
labels
,
cls_preds
=
preds_cls
)
for
bboxes
,
scores
,
labels
,
preds_cls
in
bbox_list
]
rcnn_feats
.
update
({
'points_cls_preds'
:
cls_preds
})
roi_losses
=
self
.
roi_head
.
forward_train
(
rcnn_feats
,
img_metas
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
)
losses
.
update
(
roi_losses
)
return
losses
def
simple_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
rescale
=
False
):
"""Forward of testing.
Args:
points (list[torch.Tensor]): Points of each sample.
img_metas (list[dict]): Image metas.
imgs (list[torch.Tensor], optional): Images of each sample.
Defaults to None.
rescale (bool, optional): Whether to rescale results.
Defaults to False.
Returns:
list: Predicted 3d boxes.
"""
points_cat
=
torch
.
stack
(
points
)
x
=
self
.
extract_feat
(
points_cat
)
# features for rcnn
backbone_feats
=
x
[
'fp_features'
].
clone
()
backbone_xyz
=
x
[
'fp_xyz'
].
clone
()
rcnn_feats
=
{
'features'
:
backbone_feats
,
'points'
:
backbone_xyz
}
bbox_preds
,
cls_preds
=
self
.
rpn_head
(
x
)
rcnn_feats
.
update
({
'points_cls_preds'
:
cls_preds
})
bbox_list
=
self
.
rpn_head
.
get_bboxes
(
points_cat
,
bbox_preds
,
cls_preds
,
img_metas
,
rescale
=
rescale
)
proposal_list
=
[
dict
(
boxes_3d
=
bboxes
,
scores_3d
=
scores
,
labels_3d
=
labels
,
cls_preds
=
preds_cls
)
for
bboxes
,
scores
,
labels
,
preds_cls
in
bbox_list
]
bbox_results
=
self
.
roi_head
.
simple_test
(
rcnn_feats
,
img_metas
,
proposal_list
)
return
bbox_results
mmdet3d/models/detectors/single_stage_mono3d.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
os
import
path
as
osp
import
mmcv
import
mmcv
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
mmcv.parallel
import
DataContainer
as
DC
from
mmcv.parallel
import
DataContainer
as
DC
from
os
import
path
as
osp
from
mmdet3d.core
import
(
CameraInstance3DBoxes
,
bbox3d2result
,
from
mmdet3d.core
import
(
CameraInstance3DBoxes
,
bbox3d2result
,
show_multi_modality_result
)
show_multi_modality_result
)
...
@@ -48,14 +49,15 @@ class SingleStageMono3DDetector(SingleStageDetector):
...
@@ -48,14 +49,15 @@ class SingleStageMono3DDetector(SingleStageDetector):
image in [tl_x, tl_y, br_x, br_y] format.
image in [tl_x, tl_y, br_x, br_y] format.
gt_labels (list[Tensor]): Class indices corresponding to each box
gt_labels (list[Tensor]): Class indices corresponding to each box
gt_bboxes_3d (list[Tensor]): Each item are the 3D truth boxes for
gt_bboxes_3d (list[Tensor]): Each item are the 3D truth boxes for
each image in [x, y, z, w, l, h, theta, vx, vy] format.
each image in [x, y, z, x_size, y_size, z_size, yaw, vx, vy]
format.
gt_labels_3d (list[Tensor]): 3D class indices corresponding to
gt_labels_3d (list[Tensor]): 3D class indices corresponding to
each box.
each box.
centers2d (list[Tensor]): Projected 3D centers onto 2D images.
centers2d (list[Tensor]): Projected 3D centers onto 2D images.
depths (list[Tensor]): Depth of projected centers on 2D images.
depths (list[Tensor]): Depth of projected centers on 2D images.
attr_labels (list[Tensor], optional): Attribute indices
attr_labels (list[Tensor], optional): Attribute indices
corresponding to each box
corresponding to each box
gt_bboxes_ignore (
None |
list[Tensor]): Specify which bounding
gt_bboxes_ignore (list[Tensor]): Specify which bounding
boxes can be ignored when computing the loss.
boxes can be ignored when computing the loss.
Returns:
Returns:
...
...
mmdet3d/models/detectors/smoke_mono3d.py
0 → 100644
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
from
mmdet.models.builder
import
DETECTORS
from
.single_stage_mono3d
import
SingleStageMono3DDetector
@
DETECTORS
.
register_module
()
class
SMOKEMono3D
(
SingleStageMono3DDetector
):
r
"""SMOKE <https://arxiv.org/abs/2002.10111>`_ for monocular 3D object
detection.
"""
def
__init__
(
self
,
backbone
,
neck
,
bbox_head
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
):
super
(
SMOKEMono3D
,
self
).
__init__
(
backbone
,
neck
,
bbox_head
,
train_cfg
,
test_cfg
,
pretrained
)
mmdet3d/models/detectors/votenet.py
View file @
32a4328b
...
@@ -40,11 +40,11 @@ class VoteNet(SingleStage3DDetector):
...
@@ -40,11 +40,11 @@ class VoteNet(SingleStage3DDetector):
img_metas (list): Image metas.
img_metas (list): Image metas.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_bboxes_3d (:obj:`BaseInstance3DBoxes`): gt bboxes of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
gt_labels_3d (list[torch.Tensor]): gt class labels of each batch.
pts_semantic_mask (
None |
list[torch.Tensor]): point-wise semantic
pts_semantic_mask (list[torch.Tensor]): point-wise semantic
label of each batch.
label of each batch.
pts_instance_mask (
None |
list[torch.Tensor]): point-wise instance
pts_instance_mask (list[torch.Tensor]): point-wise instance
label of each batch.
label of each batch.
gt_bboxes_ignore (
None |
list[torch.Tensor]): Specify
gt_bboxes_ignore (list[torch.Tensor]): Specify
which bounding.
which bounding.
Returns:
Returns:
...
...
mmdet3d/models/fusion_layers/coord_transform.py
View file @
32a4328b
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
import
torch
from
functools
import
partial
from
functools
import
partial
import
torch
from
mmdet3d.core.points
import
get_points_type
from
mmdet3d.core.points
import
get_points_type
...
...
mmdet3d/models/fusion_layers/point_fusion.py
View file @
32a4328b
...
@@ -32,9 +32,9 @@ def point_sample(img_meta,
...
@@ -32,9 +32,9 @@ def point_sample(img_meta,
points (torch.Tensor): Nx3 point cloud in LiDAR coordinates.
points (torch.Tensor): Nx3 point cloud in LiDAR coordinates.
proj_mat (torch.Tensor): 4x4 transformation matrix.
proj_mat (torch.Tensor): 4x4 transformation matrix.
coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.
coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'.
img_scale_factor (torch.Tensor): Scale factor with shape of
\
img_scale_factor (torch.Tensor): Scale factor with shape of
(w_scale, h_scale).
(w_scale, h_scale).
img_crop_offset (torch.Tensor): Crop offset used to crop
\
img_crop_offset (torch.Tensor): Crop offset used to crop
image during data augmentation with shape of (w_offset, h_offset).
image during data augmentation with shape of (w_offset, h_offset).
img_flip (bool): Whether the image is flipped.
img_flip (bool): Whether the image is flipped.
img_pad_shape (tuple[int]): int tuple indicates the h & w after
img_pad_shape (tuple[int]): int tuple indicates the h & w after
...
...
Prev
1
…
8
9
10
11
12
13
14
15
16
…
21
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment