Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
db44cc50
Commit
db44cc50
authored
Jun 21, 2022
by
ZCMax
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[Refactor] Refactor the model of VoxelNet and DynamicVoxelNet
parent
7fda1f66
Changes
8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
732 additions
and
442 deletions
+732
-442
mmdet3d/models/__init__.py
mmdet3d/models/__init__.py
+1
-0
mmdet3d/models/dense_heads/__init__.py
mmdet3d/models/dense_heads/__init__.py
+2
-1
mmdet3d/models/dense_heads/anchor3d_head.py
mmdet3d/models/dense_heads/anchor3d_head.py
+82
-314
mmdet3d/models/dense_heads/base_3d_dense_head.py
mmdet3d/models/dense_heads/base_3d_dense_head.py
+380
-0
mmdet3d/models/detectors/dynamic_voxelnet.py
mmdet3d/models/detectors/dynamic_voxelnet.py
+32
-26
mmdet3d/models/detectors/single_stage.py
mmdet3d/models/detectors/single_stage.py
+11
-3
mmdet3d/models/detectors/voxelnet.py
mmdet3d/models/detectors/voxelnet.py
+28
-98
tests/test_models/test_dense_heads/test_anchor3d_head.py
tests/test_models/test_dense_heads/test_anchor3d_head.py
+196
-0
No files found.
mmdet3d/models/__init__.py
View file @
db44cc50
...
@@ -7,6 +7,7 @@ from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES,
...
@@ -7,6 +7,7 @@ from .builder import (BACKBONES, DETECTORS, FUSION_LAYERS, HEADS, LOSSES,
build_loss
,
build_middle_encoder
,
build_model
,
build_loss
,
build_middle_encoder
,
build_model
,
build_neck
,
build_roi_extractor
,
build_shared_head
,
build_neck
,
build_roi_extractor
,
build_shared_head
,
build_voxel_encoder
)
build_voxel_encoder
)
from
.data_preprocessors
import
*
# noqa: F401,F403
from
.decode_heads
import
*
# noqa: F401,F403
from
.decode_heads
import
*
# noqa: F401,F403
from
.dense_heads
import
*
# noqa: F401,F403
from
.dense_heads
import
*
# noqa: F401,F403
from
.detectors
import
*
# noqa: F401,F403
from
.detectors
import
*
# noqa: F401,F403
...
...
mmdet3d/models/dense_heads/__init__.py
View file @
db44cc50
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
.anchor3d_head
import
Anchor3DHead
from
.anchor3d_head
import
Anchor3DHead
from
.anchor_free_mono3d_head
import
AnchorFreeMono3DHead
from
.anchor_free_mono3d_head
import
AnchorFreeMono3DHead
from
.base_3d_dense_head
import
Base3DDenseHead
from
.base_conv_bbox_head
import
BaseConvBboxHead
from
.base_conv_bbox_head
import
BaseConvBboxHead
from
.base_mono3d_dense_head
import
BaseMono3DDenseHead
from
.base_mono3d_dense_head
import
BaseMono3DDenseHead
from
.centerpoint_head
import
CenterHead
from
.centerpoint_head
import
CenterHead
...
@@ -21,5 +22,5 @@ __all__ = [
...
@@ -21,5 +22,5 @@ __all__ = [
'SSD3DHead'
,
'BaseConvBboxHead'
,
'CenterHead'
,
'ShapeAwareHead'
,
'SSD3DHead'
,
'BaseConvBboxHead'
,
'CenterHead'
,
'ShapeAwareHead'
,
'BaseMono3DDenseHead'
,
'AnchorFreeMono3DHead'
,
'FCOSMono3DHead'
,
'BaseMono3DDenseHead'
,
'AnchorFreeMono3DHead'
,
'FCOSMono3DHead'
,
'GroupFree3DHead'
,
'PointRPNHead'
,
'SMOKEMono3DHead'
,
'PGDHead'
,
'GroupFree3DHead'
,
'PointRPNHead'
,
'SMOKEMono3DHead'
,
'PGDHead'
,
'MonoFlexHead'
'MonoFlexHead'
,
'Base3DDenseHead'
]
]
mmdet3d/models/dense_heads/anchor3d_head.py
View file @
db44cc50
This diff is collapsed.
Click to expand it.
mmdet3d/models/dense_heads/base_3d_dense_head.py
0 → 100644
View file @
db44cc50
# Copyright (c) OpenMMLab. All rights reserved.
from
abc
import
ABCMeta
,
abstractmethod
from
typing
import
List
,
Optional
,
Tuple
import
numpy
as
np
import
torch
from
mmcv.cnn.utils.weight_init
import
constant_init
from
mmengine.config
import
ConfigDict
from
mmengine.data
import
InstanceData
from
mmengine.model
import
BaseModule
from
torch
import
Tensor
from
mmdet3d.core
import
box3d_multiclass_nms
,
limit_period
,
xywhr2xyxyr
from
mmdet3d.core.utils
import
InstanceList
,
OptMultiConfig
,
SampleList
from
mmdet.core.utils
import
select_single_mlvl
class
Base3DDenseHead
(
BaseModule
,
metaclass
=
ABCMeta
):
"""Base class for 3D DenseHeads.
1. The ``init_weights`` method is used to initialize densehead's
model parameters. After detector initialization, ``init_weights``
is triggered when ``detector.init_weights()`` is called externally.
2. The ``loss`` method is used to calculate the loss of densehead,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``loss_by_feat`` method
is called based on the feature maps to calculate the loss.
.. code:: text
loss(): forward() -> loss_by_feat()
3. The ``predict`` method is used to predict detection results,
which includes two steps: (1) the densehead model performs forward
propagation to obtain the feature maps (2) The ``predict_by_feat`` method
is called based on the feature maps to predict detection results including
post-processing.
.. code:: text
predict(): forward() -> predict_by_feat()
4. The ``loss_and_predict`` method is used to return loss and detection
results at the same time. It will call densehead's ``forward``,
``loss_by_feat`` and ``predict_by_feat`` methods in order. If one-stage is
used as RPN, the densehead needs to return both losses and predictions.
This predictions is used as the proposal of roihead.
.. code:: text
loss_and_predict(): forward() -> loss_by_feat() -> predict_by_feat()
"""
def
__init__
(
self
,
init_cfg
:
OptMultiConfig
=
None
)
->
None
:
super
().
__init__
(
init_cfg
=
init_cfg
)
def
init_weights
(
self
)
->
None
:
"""Initialize the weights."""
super
().
init_weights
()
# avoid init_cfg overwrite the initialization of `conv_offset`
for
m
in
self
.
modules
():
# DeformConv2dPack, ModulatedDeformConv2dPack
if
hasattr
(
m
,
'conv_offset'
):
constant_init
(
m
.
conv_offset
,
0
)
def
loss
(
self
,
x
:
Tuple
[
Tensor
],
batch_data_samples
:
SampleList
,
**
kwargs
)
->
dict
:
"""Perform forward propagation and loss calculation of the detection
head on the features of the upstream network.
Args:
x (tuple[Tensor]): Features from the upstream network, each is
a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
Returns:
dict: A dictionary of loss components.
"""
outs
=
self
(
x
)
batch_gt_instances_3d
=
[]
batch_gt_instances_ignore
=
[]
batch_input_metas
=
[]
for
data_sample
in
batch_data_samples
:
batch_input_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instances_3d
.
append
(
data_sample
.
gt_instances_3d
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
loss_inputs
=
outs
+
(
batch_gt_instances_3d
,
batch_input_metas
,
batch_gt_instances_ignore
)
losses
=
self
.
loss_by_feat
(
*
loss_inputs
)
return
losses
@
abstractmethod
def
loss_by_feat
(
self
,
**
kwargs
)
->
dict
:
"""Calculate the loss based on the features extracted by the detection
head."""
pass
def
loss_and_predict
(
self
,
x
:
Tuple
[
Tensor
],
batch_data_samples
:
SampleList
,
proposal_cfg
:
Optional
[
ConfigDict
]
=
None
,
**
kwargs
)
->
Tuple
[
dict
,
InstanceList
]:
"""Perform forward propagation of the head, then calculate loss and
predictions from the features and data samples.
Args:
x (tuple[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`DetDataSample`]): Each item contains
the meta information of each image and corresponding
annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
Returns:
tuple: the return value is a tuple contains:
- losses: (dict[str, Tensor]): A dictionary of loss components.
- predictions (list[:obj:`InstanceData`]): Detection
results of each image after the post process.
"""
batch_gt_instances
=
[]
batch_gt_instances_ignore
=
[]
batch_input_metas
=
[]
for
data_sample
in
batch_data_samples
:
batch_input_metas
.
append
(
data_sample
.
metainfo
)
batch_gt_instances
.
append
(
data_sample
.
gt_instances_3d
)
batch_gt_instances_ignore
.
append
(
data_sample
.
get
(
'ignored_instances'
,
None
))
outs
=
self
(
x
)
loss_inputs
=
outs
+
(
batch_gt_instances
,
batch_input_metas
,
batch_gt_instances_ignore
)
losses
=
self
.
loss_by_feat
(
*
loss_inputs
)
predictions
=
self
.
predict_by_feat
(
*
outs
,
batch_input_metas
=
batch_input_metas
,
cfg
=
proposal_cfg
)
return
losses
,
predictions
def
predict
(
self
,
x
:
Tuple
[
Tensor
],
batch_data_samples
:
SampleList
,
rescale
:
bool
=
False
)
->
InstanceList
:
"""Perform forward propagation of the 3D detection head and predict
detection results on the features of the upstream network.
Args:
x (tuple[Tensor]): Multi-level features from the
upstream network, each is a 4D-tensor.
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, `gt_pts_panoptic_seg` and
`gt_pts_sem_seg`.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
batch_input_metas
=
[
data_samples
.
metainfo
for
data_samples
in
batch_data_samples
]
outs
=
self
(
x
)
predictions
=
self
.
predict_by_feat
(
*
outs
,
batch_input_metas
=
batch_input_metas
,
rescale
=
rescale
)
return
predictions
def
predict_by_feat
(
self
,
cls_scores
:
List
[
Tensor
],
bbox_preds
:
List
[
Tensor
],
dir_cls_preds
:
List
[
Tensor
],
batch_input_metas
:
Optional
[
List
[
dict
]]
=
None
,
cfg
:
Optional
[
ConfigDict
]
=
None
,
rescale
:
bool
=
False
,
**
kwargs
)
->
InstanceList
:
"""Transform a batch of output features extracted from the head into
bbox results.
Args:
cls_scores (list[Tensor]): Classification scores for all
scale levels, each is a 4D-tensor, has shape
(batch_size, num_priors * num_classes, H, W).
bbox_preds (list[Tensor]): Box energies / deltas for all
scale levels, each is a 4D-tensor, has shape
(batch_size, num_priors * 4, H, W).
score_factors (list[Tensor], optional): Score factor for
all scale level, each is a 4D-tensor, has shape
(batch_size, num_priors * 1, H, W). Defaults to None.
batch_input_metas (list[dict], Optional): Batch image meta info.
Defaults to None.
cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
list[:obj:`InstanceData`]: Detection results of each sample
after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (BaseInstance3DBoxes): Prediction of bboxes,
contains a tensor with shape (num_instances, C), where
C >= 7.
"""
assert
len
(
cls_scores
)
==
len
(
bbox_preds
)
assert
len
(
cls_scores
)
==
len
(
dir_cls_preds
)
num_levels
=
len
(
cls_scores
)
featmap_sizes
=
[
cls_scores
[
i
].
shape
[
-
2
:]
for
i
in
range
(
num_levels
)]
mlvl_priors
=
self
.
prior_generator
.
grid_anchors
(
featmap_sizes
,
device
=
cls_scores
[
0
].
device
)
mlvl_priors
=
[
prior
.
reshape
(
-
1
,
self
.
box_code_size
)
for
prior
in
mlvl_priors
]
result_list
=
[]
for
input_id
in
range
(
len
(
batch_input_metas
)):
input_meta
=
batch_input_metas
[
input_id
]
cls_score_list
=
select_single_mlvl
(
cls_scores
,
input_id
)
bbox_pred_list
=
select_single_mlvl
(
bbox_preds
,
input_id
)
dir_cls_pred_list
=
select_single_mlvl
(
dir_cls_preds
,
input_id
)
results
=
self
.
_predict_by_feat_single
(
cls_score_list
=
cls_score_list
,
bbox_pred_list
=
bbox_pred_list
,
dir_cls_pred_list
=
dir_cls_pred_list
,
mlvl_priors
=
mlvl_priors
,
input_meta
=
input_meta
,
cfg
=
cfg
,
rescale
=
rescale
,
**
kwargs
)
result_list
.
append
(
results
)
return
result_list
def
_predict_by_feat_single
(
self
,
cls_score_list
:
List
[
Tensor
],
bbox_pred_list
:
List
[
Tensor
],
dir_cls_pred_list
:
List
[
Tensor
],
mlvl_priors
:
List
[
Tensor
],
input_meta
:
dict
,
cfg
:
ConfigDict
,
rescale
:
bool
=
False
,
**
kwargs
)
->
InstanceData
:
"""Transform a single image's features extracted from the head into
bbox results.
Args:
cls_score_list (list[Tensor]): Box scores from all scale
levels of a single point cloud sample, each item has shape
(num_priors * num_classes, H, W).
bbox_pred_list (list[Tensor]): Box energies / deltas from
all scale levels of a single point cloud sample, each item
has shape (num_priors * C, H, W).
dir_cls_pred_list (list[Tensor]): Predictions of direction class
from all scale levels of a single point cloud sample, each
item has shape (num_priors * 2, H, W).
mlvl_priors (list[Tensor]): Each element in the list is
the priors of a single level in feature pyramid. In all
anchor-based methods, it has shape (num_priors, 4). In
all anchor-free methods, it has shape (num_priors, 2)
when `with_stride=True`, otherwise it still has shape
(num_priors, 4).
input_meta (dict): Contain point clouds and image meta info.
cfg (:obj:`ConfigDict`): Test / postprocessing configuration,
if None, test_cfg would be used.
rescale (bool): If True, return boxes in original image space.
Defaults to False.
Returns:
:obj:`InstanceData`: Detection results of each image
after the post process.
Each item usually contains following keys.
- scores (Tensor): Classification scores, has a shape
(num_instance, )
- labels (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Has a shape (num_instances, 4),
the last dimension 4 arrange as (x1, y1, x2, y2).
"""
cfg
=
self
.
test_cfg
if
cfg
is
None
else
cfg
assert
len
(
cls_score_list
)
==
len
(
bbox_pred_list
)
==
len
(
mlvl_priors
)
mlvl_bboxes
=
[]
mlvl_scores
=
[]
mlvl_dir_scores
=
[]
for
cls_score
,
bbox_pred
,
dir_cls_pred
,
priors
in
zip
(
cls_score_list
,
bbox_pred_list
,
dir_cls_pred_list
,
mlvl_priors
):
assert
cls_score
.
size
()[
-
2
:]
==
bbox_pred
.
size
()[
-
2
:]
assert
cls_score
.
size
()[
-
2
:]
==
dir_cls_pred
.
size
()[
-
2
:]
dir_cls_pred
=
dir_cls_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
2
)
dir_cls_score
=
torch
.
max
(
dir_cls_pred
,
dim
=-
1
)[
1
]
cls_score
=
cls_score
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
num_classes
)
if
self
.
use_sigmoid_cls
:
scores
=
cls_score
.
sigmoid
()
else
:
scores
=
cls_score
.
softmax
(
-
1
)
bbox_pred
=
bbox_pred
.
permute
(
1
,
2
,
0
).
reshape
(
-
1
,
self
.
box_code_size
)
nms_pre
=
cfg
.
get
(
'nms_pre'
,
-
1
)
if
nms_pre
>
0
and
scores
.
shape
[
0
]
>
nms_pre
:
if
self
.
use_sigmoid_cls
:
max_scores
,
_
=
scores
.
max
(
dim
=
1
)
else
:
max_scores
,
_
=
scores
[:,
:
-
1
].
max
(
dim
=
1
)
_
,
topk_inds
=
max_scores
.
topk
(
nms_pre
)
priors
=
priors
[
topk_inds
,
:]
bbox_pred
=
bbox_pred
[
topk_inds
,
:]
scores
=
scores
[
topk_inds
,
:]
dir_cls_score
=
dir_cls_score
[
topk_inds
]
bboxes
=
self
.
bbox_coder
.
decode
(
priors
,
bbox_pred
)
mlvl_bboxes
.
append
(
bboxes
)
mlvl_scores
.
append
(
scores
)
mlvl_dir_scores
.
append
(
dir_cls_score
)
mlvl_bboxes
=
torch
.
cat
(
mlvl_bboxes
)
mlvl_bboxes_for_nms
=
xywhr2xyxyr
(
input_meta
[
'box_type_3d'
](
mlvl_bboxes
,
box_dim
=
self
.
box_code_size
).
bev
)
mlvl_scores
=
torch
.
cat
(
mlvl_scores
)
mlvl_dir_scores
=
torch
.
cat
(
mlvl_dir_scores
)
if
self
.
use_sigmoid_cls
:
# Add a dummy background class to the front when using sigmoid
padding
=
mlvl_scores
.
new_zeros
(
mlvl_scores
.
shape
[
0
],
1
)
mlvl_scores
=
torch
.
cat
([
mlvl_scores
,
padding
],
dim
=
1
)
score_thr
=
cfg
.
get
(
'score_thr'
,
0
)
results
=
box3d_multiclass_nms
(
mlvl_bboxes
,
mlvl_bboxes_for_nms
,
mlvl_scores
,
score_thr
,
cfg
.
max_num
,
cfg
,
mlvl_dir_scores
)
bboxes
,
scores
,
labels
,
dir_scores
=
results
if
bboxes
.
shape
[
0
]
>
0
:
dir_rot
=
limit_period
(
bboxes
[...,
6
]
-
self
.
dir_offset
,
self
.
dir_limit_offset
,
np
.
pi
)
bboxes
[...,
6
]
=
(
dir_rot
+
self
.
dir_offset
+
np
.
pi
*
dir_scores
.
to
(
bboxes
.
dtype
))
bboxes
=
input_meta
[
'box_type_3d'
](
bboxes
,
box_dim
=
self
.
box_code_size
)
results
=
InstanceData
()
results
.
bboxes_3d
=
bboxes
results
.
scores_3d
=
scores
results
.
labels_3d
=
labels
return
results
# TODO: Support augmentation test
def
aug_test
(
self
,
aug_batch_feats
,
aug_batch_input_metas
,
rescale
=
False
,
with_ori_nms
=
False
,
**
kwargs
):
pass
mmdet3d/models/detectors/dynamic_voxelnet.py
View file @
db44cc50
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Tuple
import
torch
import
torch
from
mmcv.runner
import
force_fp32
from
mmcv.runner
import
force_fp32
from
torch
import
Tensor
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
from
mmdet3d.core.utils
import
ConfigType
,
OptConfigType
,
OptMultiConfig
from
mmdet3d.registry
import
MODELS
from
mmdet3d.registry
import
MODELS
from
.voxelnet
import
VoxelNet
from
.voxelnet
import
VoxelNet
...
@@ -13,17 +17,17 @@ class DynamicVoxelNet(VoxelNet):
...
@@ -13,17 +17,17 @@ class DynamicVoxelNet(VoxelNet):
"""
"""
def
__init__
(
self
,
def
__init__
(
self
,
voxel_layer
,
voxel_layer
:
ConfigType
,
voxel_encoder
,
voxel_encoder
:
ConfigType
,
middle_encoder
,
middle_encoder
:
ConfigType
,
backbone
,
backbone
:
ConfigType
,
neck
=
None
,
neck
:
OptConfigType
=
None
,
bbox_head
=
None
,
bbox_head
:
OptConfigType
=
None
,
train_cfg
=
None
,
train_cfg
:
OptConfigType
=
None
,
test_cfg
=
None
,
test_cfg
:
OptConfigType
=
None
,
pretrained
=
None
,
data_preprocessor
:
OptConfigType
=
None
,
init_cfg
=
None
)
:
init_cfg
:
OptMultiConfig
=
None
)
->
None
:
super
(
DynamicVoxelNet
,
self
).
__init__
(
super
().
__init__
(
voxel_layer
=
voxel_layer
,
voxel_layer
=
voxel_layer
,
voxel_encoder
=
voxel_encoder
,
voxel_encoder
=
voxel_encoder
,
middle_encoder
=
middle_encoder
,
middle_encoder
=
middle_encoder
,
...
@@ -32,30 +36,19 @@ class DynamicVoxelNet(VoxelNet):
...
@@ -32,30 +36,19 @@ class DynamicVoxelNet(VoxelNet):
bbox_head
=
bbox_head
,
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
test_cfg
=
test_cfg
,
pretrained
=
pretrained
,
data_preprocessor
=
data_preprocessor
,
init_cfg
=
init_cfg
)
init_cfg
=
init_cfg
)
def
extract_feat
(
self
,
points
,
img_metas
):
"""Extract features from points."""
voxels
,
coors
=
self
.
voxelize
(
points
)
voxel_features
,
feature_coors
=
self
.
voxel_encoder
(
voxels
,
coors
)
batch_size
=
coors
[
-
1
,
0
].
item
()
+
1
x
=
self
.
middle_encoder
(
voxel_features
,
feature_coors
,
batch_size
)
x
=
self
.
backbone
(
x
)
if
self
.
with_neck
:
x
=
self
.
neck
(
x
)
return
x
@
torch
.
no_grad
()
@
torch
.
no_grad
()
@
force_fp32
()
@
force_fp32
()
def
voxelize
(
self
,
points
)
:
def
voxelize
(
self
,
points
:
List
[
torch
.
Tensor
])
->
tuple
:
"""Apply dynamic voxelization to points.
"""Apply dynamic voxelization to points.
Args:
Args:
points (list[
torch.
Tensor]): Points of each sample.
points (list[Tensor]): Points of each sample.
Returns:
Returns:
tuple[
torch.
Tensor]: Concatenated points and coordinates.
tuple[Tensor]: Concatenated points and coordinates.
"""
"""
coors
=
[]
coors
=
[]
# dynamic voxelization only provide a coors mapping
# dynamic voxelization only provide a coors mapping
...
@@ -69,3 +62,16 @@ class DynamicVoxelNet(VoxelNet):
...
@@ -69,3 +62,16 @@ class DynamicVoxelNet(VoxelNet):
coors_batch
.
append
(
coor_pad
)
coors_batch
.
append
(
coor_pad
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
return
points
,
coors_batch
return
points
,
coors_batch
def
extract_feat
(
self
,
batch_inputs_dict
:
dict
)
->
Tuple
[
Tensor
]:
"""Extract features from points."""
# TODO: Remove voxelization to datapreprocessor
points
=
batch_inputs_dict
[
'points'
]
voxels
,
coors
=
self
.
voxelize
(
points
)
voxel_features
,
feature_coors
=
self
.
voxel_encoder
(
voxels
,
coors
)
batch_size
=
coors
[
-
1
,
0
].
item
()
+
1
x
=
self
.
middle_encoder
(
voxel_features
,
feature_coors
,
batch_size
)
x
=
self
.
backbone
(
x
)
if
self
.
with_neck
:
x
=
self
.
neck
(
x
)
return
x
mmdet3d/models/detectors/single_stage.py
View file @
db44cc50
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Tuple
,
Union
from
typing
import
Dict
,
List
,
Tuple
,
Union
import
torch
import
torch
from
torch
import
Tensor
from
mmdet3d.core.utils
import
(
ConfigType
,
OptConfigType
,
OptMultiConfig
,
from
mmdet3d.core.utils
import
(
ConfigType
,
OptConfigType
,
OptMultiConfig
,
OptSampleList
,
SampleList
)
OptSampleList
,
SampleList
)
...
@@ -134,12 +135,19 @@ class SingleStage3DDetector(Base3DDetector):
...
@@ -134,12 +135,19 @@ class SingleStage3DDetector(Base3DDetector):
results
=
self
.
bbox_head
.
forward
(
x
)
results
=
self
.
bbox_head
.
forward
(
x
)
return
results
return
results
def
extract_feat
(
self
,
def
extract_feat
(
batch_inputs_dict
:
torch
.
Tensor
)
->
Tuple
[
torch
.
Tensor
]:
self
,
batch_inputs_dict
:
torch
.
Tensor
)
->
Union
[
Tuple
[
torch
.
Tensor
],
Dict
[
str
,
Tensor
]]:
"""Directly extract features from the backbone+neck.
"""Directly extract features from the backbone+neck.
Args:
Args:
points (torch.Tensor): Input points.
points (torch.Tensor): Input points.
Returns:
tuple[Tensor] | dict: For outside 3D object detection, we
typically obtain a tuple of features from the backbone + neck,
and for inside 3D object detection, usually a dict containing
features will be obtained.
"""
"""
points
=
batch_inputs_dict
[
'points'
]
points
=
batch_inputs_dict
[
'points'
]
stack_points
=
torch
.
stack
(
points
)
stack_points
=
torch
.
stack
(
points
)
...
...
mmdet3d/models/detectors/voxelnet.py
View file @
db44cc50
# Copyright (c) OpenMMLab. All rights reserved.
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Dict
,
List
,
Optional
from
typing
import
List
,
Tuple
import
torch
import
torch
from
mmcv.ops
import
Voxelization
from
mmcv.ops
import
Voxelization
from
mmcv.runner
import
force_fp32
from
mmcv.runner
import
force_fp32
from
torch
import
Tensor
from
torch.nn
import
functional
as
F
from
torch.nn
import
functional
as
F
from
mmdet3d.core
import
Det3DDataSample
from
mmdet3d.core
.utils
import
ConfigType
,
OptConfigType
,
OptMultiConfig
from
mmdet3d.registry
import
MODELS
from
mmdet3d.registry
import
MODELS
from
.single_stage
import
SingleStage3DDetector
from
.single_stage
import
SingleStage3DDetector
...
@@ -16,39 +17,28 @@ class VoxelNet(SingleStage3DDetector):
...
@@ -16,39 +17,28 @@ class VoxelNet(SingleStage3DDetector):
r
"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection."""
r
"""`VoxelNet <https://arxiv.org/abs/1711.06396>`_ for 3D detection."""
def
__init__
(
self
,
def
__init__
(
self
,
voxel_layer
:
dict
,
voxel_layer
:
ConfigType
,
voxel_encoder
:
dict
,
voxel_encoder
:
ConfigType
,
middle_encoder
:
dict
,
middle_encoder
:
ConfigType
,
backbone
:
dict
,
backbone
:
ConfigType
,
neck
:
Opt
i
on
al
[
dict
]
=
None
,
neck
:
Opt
C
on
figType
=
None
,
bbox_head
:
Opt
i
on
al
[
dict
]
=
None
,
bbox_head
:
Opt
C
on
figType
=
None
,
train_cfg
:
Opt
i
on
al
[
dict
]
=
None
,
train_cfg
:
Opt
C
on
figType
=
None
,
test_cfg
:
Opt
i
on
al
[
dict
]
=
None
,
test_cfg
:
Opt
C
on
figType
=
None
,
init_cfg
:
Opt
i
on
al
[
dict
]
=
None
,
data_preprocessor
:
Opt
C
on
figType
=
None
,
pretrained
:
Optional
[
str
]
=
None
)
->
None
:
init_cfg
:
OptMultiConfig
=
None
)
->
None
:
super
(
VoxelNet
,
self
).
__init__
(
super
().
__init__
(
backbone
=
backbone
,
backbone
=
backbone
,
neck
=
neck
,
neck
=
neck
,
bbox_head
=
bbox_head
,
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
,
test_cfg
=
test_cfg
,
init_cfg
=
init_cfg
,
data_preprocessor
=
data_preprocessor
,
pretrained
=
pretrained
)
init_cfg
=
init_cfg
)
self
.
voxel_layer
=
Voxelization
(
**
voxel_layer
)
self
.
voxel_layer
=
Voxelization
(
**
voxel_layer
)
self
.
voxel_encoder
=
MODELS
.
build
(
voxel_encoder
)
self
.
voxel_encoder
=
MODELS
.
build
(
voxel_encoder
)
self
.
middle_encoder
=
MODELS
.
build
(
middle_encoder
)
self
.
middle_encoder
=
MODELS
.
build
(
middle_encoder
)
def
extract_feat
(
self
,
points
:
List
[
torch
.
Tensor
])
->
list
:
"""Extract features from points."""
voxels
,
num_points
,
coors
=
self
.
voxelize
(
points
)
voxel_features
=
self
.
voxel_encoder
(
voxels
,
num_points
,
coors
)
batch_size
=
coors
[
-
1
,
0
].
item
()
+
1
x
=
self
.
middle_encoder
(
voxel_features
,
coors
,
batch_size
)
x
=
self
.
backbone
(
x
)
if
self
.
with_neck
:
x
=
self
.
neck
(
x
)
return
x
@
torch
.
no_grad
()
@
torch
.
no_grad
()
@
force_fp32
()
@
force_fp32
()
def
voxelize
(
self
,
points
:
List
[
torch
.
Tensor
])
->
tuple
:
def
voxelize
(
self
,
points
:
List
[
torch
.
Tensor
])
->
tuple
:
...
@@ -68,75 +58,15 @@ class VoxelNet(SingleStage3DDetector):
...
@@ -68,75 +58,15 @@ class VoxelNet(SingleStage3DDetector):
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
return
voxels
,
num_points
,
coors_batch
return
voxels
,
num_points
,
coors_batch
def
forward_train
(
self
,
batch_inputs_dict
:
Dict
[
list
,
torch
.
Tensor
],
def
extract_feat
(
self
,
batch_inputs_dict
:
dict
)
->
Tuple
[
Tensor
]:
batch_data_samples
:
List
[
Det3DDataSample
],
"""Extract features from points."""
**
kwargs
)
->
dict
:
# TODO: Remove voxelization to datapreprocessor
"""
points
=
batch_inputs_dict
[
'points'
]
Args:
voxels
,
num_points
,
coors
=
self
.
voxelize
(
points
)
batch_inputs_dict (dict): The model input dict. It should contain
voxel_features
=
self
.
voxel_encoder
(
voxels
,
num_points
,
coors
)
``points`` and ``img`` keys.
batch_size
=
coors
[
-
1
,
0
].
item
()
+
1
x
=
self
.
middle_encoder
(
voxel_features
,
coors
,
batch_size
)
- points (list[torch.Tensor]): Point cloud of each sample.
x
=
self
.
backbone
(
x
)
- imgs (torch.Tensor, optional): Image of each sample.
if
self
.
with_neck
:
x
=
self
.
neck
(
x
)
batch_data_samples (list[:obj:`Det3DDataSample`]): The batch
return
x
data samples. It usually includes information such
as `gt_instance_3d` or `gt_panoptic_seg_3d` or `gt_sem_seg_3d`.
Returns:
dict[str, Tensor]: A dictionary of loss components.
"""
x
=
self
.
extract_feat
(
batch_inputs_dict
[
'points'
])
losses
=
self
.
bbox_head
.
forward_train
(
x
,
batch_data_samples
,
**
kwargs
)
return
losses
def
simple_test
(
self
,
batch_inputs_dict
:
Dict
[
list
,
torch
.
Tensor
],
batch_input_metas
:
List
[
dict
],
rescale
:
bool
=
False
)
->
list
:
"""Test function without test-time augmentation.
Args:
batch_inputs_dict (dict): The model input dict. It should contain
``points`` and ``img`` keys.
- points (list[torch.Tensor]): Point cloud of single
sample.
- imgs (torch.Tensor, optional): Image of single sample.
batch_input_metas (list[dict]): List of input information.
rescale (bool, optional): Whether to rescale the results.
Defaults to False.
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
\
inputs. Each Det3DDataSample usually contain
\
'pred_instances_3d'. And the ``pred_instances_3d`` usually
\
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instances, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`BaseInstance3DBoxes`): Prediction of bboxes,
contains a tensor with shape (num_instances, 7).
"""
x
=
self
.
extract_feat
(
batch_inputs_dict
[
'points'
])
bboxes_list
=
self
.
bbox_head
.
simple_test
(
x
,
batch_input_metas
,
rescale
=
rescale
)
# connvert to Det3DDataSample
results_list
=
self
.
postprocess_result
(
bboxes_list
)
return
results_list
def
aug_test
(
self
,
aug_batch_inputs_dict
:
Dict
[
list
,
torch
.
Tensor
],
aug_batch_input_metas
:
List
[
dict
],
rescale
:
bool
=
False
)
->
list
:
"""Test function with augmentaiton."""
# TODO Refactor this after mmdet update
feats
=
self
.
extract_feats
(
aug_batch_inputs_dict
)
aug_bboxes
=
self
.
bbox_head
.
aug_test
(
feats
,
aug_batch_input_metas
,
rescale
=
rescale
)
return
aug_bboxes
tests/test_models/test_dense_heads/test_anchor3d_head.py
0 → 100644
View file @
db44cc50
# Copyright (c) OpenMMLab. All rights reserved.
from
unittest
import
TestCase
import
torch
from
mmengine
import
Config
from
mmengine.data
import
InstanceData
from
mmdet3d
import
*
# noqa
from
mmdet3d.core.bbox
import
Box3DMode
,
LiDARInstance3DBoxes
from
mmdet3d.models.dense_heads
import
Anchor3DHead
class
TestAnchor3DHead
(
TestCase
):
def
test_anchor3d_head_loss
(
self
):
"""Test anchor head loss when truth is empty and non-empty."""
cfg
=
Config
(
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'Max3DIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'Max3DIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'Max3DIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
))
anchor3d_head
=
Anchor3DHead
(
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
sizes
=
[[
0.8
,
0.6
,
1.73
],
[
1.76
,
0.6
,
1.73
],
[
3.9
,
1.6
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
train_cfg
=
cfg
)
# Anchor head expects a multiple levels of features per image
feats
=
(
torch
.
rand
([
1
,
512
,
200
,
176
],
dtype
=
torch
.
float32
),
)
(
cls_scores
,
bbox_preds
,
dir_cls_preds
)
=
anchor3d_head
.
forward
(
feats
)
self
.
assertEqual
(
cls_scores
[
0
].
shape
,
torch
.
Size
([
1
,
18
,
200
,
176
]))
self
.
assertEqual
(
bbox_preds
[
0
].
shape
,
torch
.
Size
([
1
,
42
,
200
,
176
]))
self
.
assertEqual
(
dir_cls_preds
[
0
].
shape
,
torch
.
Size
([
1
,
12
,
200
,
176
]))
# # Test that empty ground truth encourages the network to
# # predict background
gt_instances
=
InstanceData
()
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
torch
.
empty
((
0
,
7
)))
gt_labels_3d
=
torch
.
tensor
([])
input_metas
=
dict
(
sample_idx
=
1234
)
# fake input_metas
gt_instances
.
bboxes_3d
=
gt_bboxes_3d
gt_instances
.
labels_3d
=
gt_labels_3d
empty_gt_losses
=
anchor3d_head
.
loss_by_feat
(
cls_scores
,
bbox_preds
,
dir_cls_preds
,
[
gt_instances
],
[
input_metas
])
# When there is no truth, the cls loss should be nonzero but
# there should be no box and dir loss.
self
.
assertGreater
(
empty_gt_losses
[
'loss_cls'
][
0
],
0
,
'cls loss should be non-zero'
)
self
.
assertEqual
(
empty_gt_losses
[
'loss_bbox'
][
0
],
0
,
'there should be no box loss when there are no true boxes'
)
self
.
assertEqual
(
empty_gt_losses
[
'loss_dir'
][
0
],
0
,
'there should be no dir loss when there are no true dirs'
)
# When truth is non-empty then both cls and box loss
# should be nonzero for random inputs
gt_instances
=
InstanceData
()
gt_bboxes_3d
=
LiDARInstance3DBoxes
(
torch
.
tensor
(
[[
6.4118
,
-
3.4305
,
-
1.7291
,
1.7033
,
3.4693
,
1.6197
,
-
0.9091
]],
dtype
=
torch
.
float32
))
gt_labels_3d
=
torch
.
tensor
([
1
],
dtype
=
torch
.
int64
)
gt_instances
.
bboxes_3d
=
gt_bboxes_3d
gt_instances
.
labels_3d
=
gt_labels_3d
gt_losses
=
anchor3d_head
.
loss_by_feat
(
cls_scores
,
bbox_preds
,
dir_cls_preds
,
[
gt_instances
],
[
input_metas
])
self
.
assertGreater
(
gt_losses
[
'loss_cls'
][
0
],
0
,
'cls loss should be non-zero'
)
self
.
assertGreater
(
gt_losses
[
'loss_bbox'
][
0
],
0
,
'box loss should be non-zero'
)
self
.
assertGreater
(
gt_losses
[
'loss_dir'
][
0
],
0
,
'dir loss should be none-zero'
)
def
test_anchor3d_head_predict
(
self
):
cfg
=
Config
(
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
))
anchor3d_head
=
Anchor3DHead
(
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
sizes
=
[[
0.8
,
0.6
,
1.73
],
[
1.76
,
0.6
,
1.73
],
[
3.9
,
1.6
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
test_cfg
=
cfg
)
feats
=
(
torch
.
rand
([
2
,
512
,
200
,
176
],
dtype
=
torch
.
float32
),
)
(
cls_scores
,
bbox_preds
,
dir_cls_preds
)
=
anchor3d_head
.
forward
(
feats
)
# fake input_metas
input_metas
=
[{
'sample_idx'
:
1234
,
'box_type_3d'
:
LiDARInstance3DBoxes
,
'box_mode_3d'
:
Box3DMode
.
LIDAR
},
{
'sample_idx'
:
2345
,
'box_type_3d'
:
LiDARInstance3DBoxes
,
'box_mode_3d'
:
Box3DMode
.
LIDAR
}]
# test get_boxes
cls_scores
[
0
]
-=
1.5
# too many positive samples may cause cuda oom
results
=
anchor3d_head
.
predict_by_feat
(
cls_scores
,
bbox_preds
,
dir_cls_preds
,
input_metas
)
pred_instances
=
results
[
0
]
scores_3d
=
pred_instances
.
scores_3d
assert
(
scores_3d
>
0.3
).
all
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment