Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
bd73d3b9
Commit
bd73d3b9
authored
Jul 15, 2022
by
jshilong
Committed by
ChaimZhu
Jul 20, 2022
Browse files
[refactor]MVXTwoStage & Centerpoint
parent
360c27f9
Changes
26
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
717 additions
and
617 deletions
+717
-617
configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
...gs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
+4
-2
configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
...s/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
+4
-2
configs/_base_/schedules/cosine.py
configs/_base_/schedules/cosine.py
+20
-16
configs/centerpoint/centerpoint_0075voxel_second_secfpn_4x8_cyclic_20e_nus.py
...centerpoint_0075voxel_second_secfpn_4x8_cyclic_20e_nus.py
+15
-14
configs/centerpoint/centerpoint_01voxel_second_secfpn_4x8_cyclic_20e_nus.py
...t/centerpoint_01voxel_second_secfpn_4x8_cyclic_20e_nus.py
+24
-24
configs/centerpoint/centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py
.../centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py
+24
-24
configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
...dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
+71
-70
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+2
-2
mmdet3d/core/bbox/assigners/__init__.py
mmdet3d/core/bbox/assigners/__init__.py
+2
-2
mmdet3d/datasets/det3d_dataset.py
mmdet3d/datasets/det3d_dataset.py
+18
-0
mmdet3d/datasets/kitti_dataset.py
mmdet3d/datasets/kitti_dataset.py
+2
-0
mmdet3d/models/dense_heads/base_3d_dense_head.py
mmdet3d/models/dense_heads/base_3d_dense_head.py
+3
-3
mmdet3d/models/dense_heads/centerpoint_head.py
mmdet3d/models/dense_heads/centerpoint_head.py
+159
-62
mmdet3d/models/dense_heads/vote_head.py
mmdet3d/models/dense_heads/vote_head.py
+1
-2
mmdet3d/models/detectors/base.py
mmdet3d/models/detectors/base.py
+39
-17
mmdet3d/models/detectors/centerpoint.py
mmdet3d/models/detectors/centerpoint.py
+60
-69
mmdet3d/models/detectors/mvx_faster_rcnn.py
mmdet3d/models/detectors/mvx_faster_rcnn.py
+23
-5
mmdet3d/models/detectors/mvx_two_stage.py
mmdet3d/models/detectors/mvx_two_stage.py
+225
-293
mmdet3d/models/voxel_encoders/voxel_encoder.py
mmdet3d/models/voxel_encoders/voxel_encoder.py
+11
-7
tests/test_core/test_bbox/test_samplers.py
tests/test_core/test_bbox/test_samplers.py
+10
-3
No files found.
configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
View file @
bd73d3b9
voxel_size
=
[
0.1
,
0.1
,
0.2
]
model
=
dict
(
type
=
'CenterPoint'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
pts_voxel_layer
=
dict
(
max_num_points
=
10
,
voxel_size
=
voxel_size
,
max_voxels
=
(
90000
,
120000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
,
num_features
=
5
),
...
...
@@ -54,8 +55,9 @@ model = dict(
code_size
=
9
),
separate_head
=
dict
(
type
=
'SeparateHead'
,
init_bias
=-
2.19
,
final_kernel
=
3
),
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
reduction
=
'mean'
,
loss_weight
=
0.25
),
loss_cls
=
dict
(
type
=
'mmdet.GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'mmdet.L1Loss'
,
reduction
=
'mean'
,
loss_weight
=
0.25
),
norm_bbox
=
True
),
# model training and testing settings
train_cfg
=
dict
(
...
...
configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
View file @
bd73d3b9
voxel_size
=
[
0.2
,
0.2
,
8
]
model
=
dict
(
type
=
'CenterPoint'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
pts_voxel_layer
=
dict
(
max_num_points
=
20
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
...
...
@@ -53,8 +54,9 @@ model = dict(
code_size
=
9
),
separate_head
=
dict
(
type
=
'SeparateHead'
,
init_bias
=-
2.19
,
final_kernel
=
3
),
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
reduction
=
'mean'
,
loss_weight
=
0.25
),
loss_cls
=
dict
(
type
=
'mmdet.GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'mmdet.L1Loss'
,
reduction
=
'mean'
,
loss_weight
=
0.25
),
norm_bbox
=
True
),
# model training and testing settings
train_cfg
=
dict
(
...
...
configs/_base_/schedules/cosine.py
View file @
bd73d3b9
# This schedule is mainly used by models with dynamic voxelization
# optimizer
lr
=
0.003
# max learning rate
optim
iz
er
=
dict
(
type
=
'
AdamW
'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
)
)
optim
_wrapp
er
=
dict
(
type
=
'
OptimWrapper
'
,
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.001
,
betas
=
(
0.95
,
0.99
)),
clip_grad
=
dict
(
max_norm
=
10
,
norm_type
=
2
),
)
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
min_lr_ratio
=
1e-5
)
momentum_config
=
None
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
40
)
param_scheduler
=
[
dict
(
type
=
'LinearLR'
,
start_factor
=
0.1
,
by_epoch
=
False
,
begin
=
0
,
end
=
1000
),
dict
(
type
=
'CosineAnnealingLR'
,
begin
=
0
,
T_max
=
40
,
end
=
40
,
by_epoch
=
True
,
eta_min
=
1e-5
)
]
# training schedule for 1x
train_cfg
=
dict
(
type
=
'EpochBasedTrainLoop'
,
max_epochs
=
40
,
val_interval
=
1
)
val_cfg
=
dict
(
type
=
'ValLoop'
)
test_cfg
=
dict
(
type
=
'TestLoop'
)
configs/centerpoint/centerpoint_0075voxel_second_secfpn_4x8_cyclic_20e_nus.py
View file @
bd73d3b9
...
...
@@ -9,7 +9,7 @@ class_names = [
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
data_prefix
=
dict
(
pts
=
'samples/LIDAR_TOP'
,
img
=
''
)
model
=
dict
(
pts_voxel_layer
=
dict
(
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
...
...
@@ -96,7 +96,9 @@ train_pipeline = [
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
...
...
@@ -125,16 +127,15 @@ test_pipeline = [
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
)
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
train
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)),
val
=
dict
(
pipeline
=
test_pipeline
),
test
=
dict
(
pipeline
=
test_pipeline
))
train_dataloader
=
dict
(
dataset
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
,
metainfo
=
dict
(
CLASSES
=
class_names
))))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
CLASSES
=
class_names
)))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
CLASSES
=
class_names
)))
configs/centerpoint/centerpoint_01voxel_second_secfpn_4x8_cyclic_20e_nus.py
View file @
bd73d3b9
...
...
@@ -12,7 +12,7 @@ class_names = [
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
data_prefix
=
dict
(
pts
=
'samples/LIDAR_TOP'
,
img
=
''
)
model
=
dict
(
pts_voxel_layer
=
dict
(
point_cloud_range
=
point_cloud_range
),
pts_bbox_head
=
dict
(
bbox_coder
=
dict
(
pc_range
=
point_cloud_range
[:
2
])),
...
...
@@ -90,8 +90,9 @@ train_pipeline = [
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
...
...
@@ -120,13 +121,9 @@ test_pipeline = [
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
)
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
...
...
@@ -144,28 +141,31 @@ eval_pipeline = [
file_client_args
=
file_client_args
,
pad_empty_sweeps
=
True
,
remove_close
=
True
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
train
=
dict
(
train_dataloader
=
dict
(
_delete_
=
True
,
batch_size
=
4
,
num_workers
=
4
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'CBGSDataset'
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train.pkl'
,
ann_file
=
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
metainfo
=
dict
(
CLASSES
=
class_names
)
,
test_mode
=
False
,
data_prefix
=
data_prefix
,
use_valid_flag
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
)),
val
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
),
test
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
))
box_type_3d
=
'LiDAR'
)))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
CLASSES
=
class_names
)))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
CLASSES
=
class_names
)))
evaluation
=
dict
(
interval
=
20
,
pipeline
=
eval_pipeline
)
train_cfg
=
dict
(
val_
interval
=
20
)
configs/centerpoint/centerpoint_02pillar_second_secfpn_4x8_cyclic_20e_nus.py
View file @
bd73d3b9
...
...
@@ -12,7 +12,7 @@ class_names = [
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
data_prefix
=
dict
(
pts
=
'samples/LIDAR_TOP'
,
img
=
''
)
model
=
dict
(
pts_voxel_layer
=
dict
(
point_cloud_range
=
point_cloud_range
),
pts_voxel_encoder
=
dict
(
point_cloud_range
=
point_cloud_range
),
...
...
@@ -91,8 +91,9 @@ train_pipeline = [
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
...
...
@@ -119,13 +120,9 @@ test_pipeline = [
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
dict
(
type
=
'RandomFlip3D'
)
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
...
...
@@ -143,28 +140,31 @@ eval_pipeline = [
file_client_args
=
file_client_args
,
pad_empty_sweeps
=
True
,
remove_close
=
True
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
data
=
dict
(
train
=
dict
(
train_dataloader
=
dict
(
_delete_
=
True
,
batch_size
=
4
,
num_workers
=
4
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'CBGSDataset'
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train.pkl'
,
ann_file
=
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
metainfo
=
dict
(
CLASSES
=
class_names
)
,
test_mode
=
False
,
data_prefix
=
data_prefix
,
use_valid_flag
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
)),
val
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
),
test
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
))
box_type_3d
=
'LiDAR'
)))
test_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
CLASSES
=
class_names
)))
val_dataloader
=
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
CLASSES
=
class_names
)))
evaluation
=
dict
(
interval
=
20
,
pipeline
=
eval_pipeline
)
train_cfg
=
dict
(
val_
interval
=
20
)
configs/mvxnet/dv_mvx-fpn_second_secfpn_adamw_2x8_80e_kitti-3d-3class.py
View file @
bd73d3b9
...
...
@@ -6,8 +6,14 @@ point_cloud_range = [0, -40, -3, 70.4, 40, 1]
model
=
dict
(
type
=
'DynamicMVXFasterRCNN'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
102.9801
,
115.9465
,
122.7717
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
img_backbone
=
dict
(
type
=
'ResNet'
,
type
=
'
mmdet.
ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
...
...
@@ -16,7 +22,7 @@ model = dict(
norm_eval
=
True
,
style
=
'caffe'
),
img_neck
=
dict
(
type
=
'FPN'
,
type
=
'
mmdet.
FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
...
...
@@ -82,34 +88,36 @@ model = dict(
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
type
=
'
mmdet.
FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
type
=
'Max
3D
IoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
type
=
'Max
3D
IoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
type
=
'Max
3D
IoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
...
...
@@ -133,18 +141,14 @@ model = dict(
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
metainfo
=
dict
(
CLASSES
=
class_names
)
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'Resize'
,
img_scale
=
[(
640
,
192
),
(
2560
,
768
)],
multiscale_mode
=
'range'
,
keep_ratio
=
True
),
type
=
'RandomResize'
,
scale
=
[(
640
,
192
),
(
2560
,
768
)],
keep_ratio
=
True
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.78539816
,
0.78539816
],
...
...
@@ -154,12 +158,12 @@ train_pipeline = [
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
...
...
@@ -170,82 +174,79 @@ test_pipeline = [
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'Resize'
,
multiscale_mode
=
'value'
,
keep_ratio
=
True
),
# Temporary solution, fix this after refactor the augtest
dict
(
type
=
'Resize'
,
scale
=
0
,
keep_ratio
=
True
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img'
])
])
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'img'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadImageFromFile'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'img'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
modality
=
modality
,
ann_file
=
'kitti_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
,
box_type_3d
=
'LiDAR'
)),
val
=
dict
(
filter_empty_gt
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
modality
=
modality
,
ann_file
=
'kitti_infos_val.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
),
test
=
dict
(
box_type_3d
=
'LiDAR'
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
ann_file
=
'kitti_infos_val.pkl'
,
modality
=
modality
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
))
# Training settings
optimizer
=
dict
(
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
evaluation
=
dict
(
interval
=
1
,
pipeline
=
eval_pipeline
)
optim_wrapper
=
dict
(
optimizer
=
dict
(
weight_decay
=
0.01
),
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
),
)
val_evaluator
=
dict
(
type
=
'KittiMetric'
,
ann_file
=
'data/kitti/kitti_infos_val.pkl'
)
test_evaluator
=
val_evaluator
# You may need to download the model first is the network is unstable
load_from
=
'https://download.openmmlab.com/mmdetection3d/pretrain_models/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth'
# noqa
mmdet3d/core/bbox/__init__.py
View file @
bd73d3b9
# Copyright (c) OpenMMLab. All rights reserved.
from
.assigners
import
AssignResult
,
BaseAssigner
,
MaxIoUAssigner
from
.assigners
import
AssignResult
,
BaseAssigner
,
Max
3D
IoUAssigner
# from .bbox_target import bbox_target
from
.builder
import
build_assigner
,
build_bbox_coder
,
build_sampler
from
.coders
import
DeltaXYZWLHRBBoxCoder
...
...
@@ -18,7 +18,7 @@ from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
from
.transforms
import
bbox3d2result
,
bbox3d2roi
,
bbox3d_mapping_back
__all__
=
[
'BaseSampler'
,
'AssignResult'
,
'BaseAssigner'
,
'MaxIoUAssigner'
,
'BaseSampler'
,
'AssignResult'
,
'BaseAssigner'
,
'Max
3D
IoUAssigner'
,
'PseudoSampler'
,
'RandomSampler'
,
'InstanceBalancedPosSampler'
,
'IoUBalancedNegSampler'
,
'CombinedSampler'
,
'SamplingResult'
,
'DeltaXYZWLHRBBoxCoder'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
...
...
mmdet3d/core/bbox/assigners/__init__.py
View file @
bd73d3b9
# Copyright (c) OpenMMLab. All rights reserved.
from
mmdet.core.bbox
import
AssignResult
,
BaseAssigner
from
.max_3d_iou_assigner
import
MaxIoUAssigner
from
.max_3d_iou_assigner
import
Max
3D
IoUAssigner
__all__
=
[
'BaseAssigner'
,
'MaxIoUAssigner'
,
'AssignResult'
]
__all__
=
[
'BaseAssigner'
,
'Max
3D
IoUAssigner'
,
'AssignResult'
]
mmdet3d/datasets/det3d_dataset.py
View file @
bd73d3b9
...
...
@@ -35,6 +35,8 @@ class Det3DDataset(BaseDataset):
- use_camera: bool
- use_lidar: bool
Defaults to `dict(use_lidar=True, use_camera=False)`
default_cam_key (str, optional): The default camera name adopted.
Defaults to None.
box_type_3d (str, optional): Type of 3D box of this dataset.
Based on the `box_type_3d`, the dataset will encapsulate the box
to its original format then converted them to `box_type_3d`.
...
...
@@ -65,6 +67,7 @@ class Det3DDataset(BaseDataset):
data_prefix
:
dict
=
dict
(
pts
=
'velodyne'
,
img
=
''
),
pipeline
:
List
[
Union
[
dict
,
Callable
]]
=
[],
modality
:
dict
=
dict
(
use_lidar
=
True
,
use_camera
=
False
),
default_cam_key
:
str
=
None
,
box_type_3d
:
dict
=
'LiDAR'
,
filter_empty_gt
:
bool
=
True
,
test_mode
:
bool
=
False
,
...
...
@@ -84,6 +87,7 @@ class Det3DDataset(BaseDataset):
if
key
not
in
modality
:
modality
[
key
]
=
False
self
.
modality
=
modality
self
.
default_cam_key
=
default_cam_key
assert
self
.
modality
[
'use_lidar'
]
or
self
.
modality
[
'use_camera'
],
(
'Please specify the `modality` (`use_lidar` '
f
', `use_camera`) for
{
self
.
__class__
.
__name__
}
'
)
...
...
@@ -233,6 +237,20 @@ class Det3DDataset(BaseDataset):
cam_prefix
=
self
.
data_prefix
.
get
(
'img'
,
''
)
img_info
[
'img_path'
]
=
osp
.
join
(
cam_prefix
,
img_info
[
'img_path'
])
if
self
.
default_cam_key
is
not
None
:
info
[
'img_path'
]
=
info
[
'images'
][
self
.
default_cam_key
][
'img_path'
]
if
'lidar2cam'
in
info
[
'images'
][
self
.
default_cam_key
]:
info
[
'lidar2cam'
]
=
np
.
array
(
info
[
'images'
][
self
.
default_cam_key
][
'lidar2cam'
])
if
'cam2img'
in
info
[
'images'
][
self
.
default_cam_key
]:
info
[
'cam2img'
]
=
np
.
array
(
info
[
'images'
][
self
.
default_cam_key
][
'cam2img'
])
if
'lidar2img'
in
info
[
'images'
][
self
.
default_cam_key
]:
info
[
'lidar2img'
]
=
np
.
array
(
info
[
'images'
][
self
.
default_cam_key
][
'lidar2img'
])
else
:
info
[
'lidar2img'
]
=
info
[
'cam2img'
]
@
info
[
'lidar2cam'
]
if
not
self
.
test_mode
:
# used in traing
...
...
mmdet3d/datasets/kitti_dataset.py
View file @
bd73d3b9
...
...
@@ -49,6 +49,7 @@ class KittiDataset(Det3DDataset):
ann_file
:
str
,
pipeline
:
List
[
Union
[
dict
,
Callable
]]
=
[],
modality
:
Optional
[
dict
]
=
dict
(
use_lidar
=
True
),
default_cam_key
=
'CAM2'
,
box_type_3d
:
str
=
'LiDAR'
,
filter_empty_gt
:
bool
=
True
,
test_mode
:
bool
=
False
,
...
...
@@ -61,6 +62,7 @@ class KittiDataset(Det3DDataset):
ann_file
=
ann_file
,
pipeline
=
pipeline
,
modality
=
modality
,
default_cam_key
=
default_cam_key
,
box_type_3d
=
box_type_3d
,
filter_empty_gt
=
filter_empty_gt
,
test_mode
=
test_mode
,
...
...
mmdet3d/models/dense_heads/base_3d_dense_head.py
View file @
bd73d3b9
...
...
@@ -111,9 +111,9 @@ class Base3DDenseHead(BaseModule, metaclass=ABCMeta):
Args:
x (tuple[Tensor]): Features from FPN.
batch_data_samples (list[:obj:`DetDataSample`]): Each item
contains
the meta information of each image and
corresponding
annotations.
batch_data_samples (list[:obj:`Det
3D
DataSample`]): Each item
contains
the meta information of each image and
corresponding
annotations.
proposal_cfg (ConfigDict, optional): Test / postprocessing
configuration, if None, test_cfg would be used.
Defaults to None.
...
...
mmdet3d/models/dense_heads/centerpoint_head.py
View file @
bd73d3b9
# Copyright (c) OpenMMLab. All rights reserved.
import
copy
from
typing
import
Dict
,
List
,
Optional
,
Tuple
,
Union
import
torch
from
mmcv.cnn
import
ConvModule
,
build_conv_layer
from
mmcv.runner
import
BaseModule
,
force_fp32
from
torch
import
nn
from
mmengine
import
InstanceData
from
torch
import
Tensor
,
nn
from
mmdet3d.core
import
(
circle_nms
,
draw_heatmap_gaussian
,
gaussian_radius
,
xywhr2xyxyr
)
from
mmdet3d.core
import
(
Det3DDataSample
,
circle_nms
,
draw_heatmap_gaussian
,
gaussian_radius
,
xywhr2xyxyr
)
from
mmdet3d.core.post_processing
import
nms_bev
from
mmdet3d.models
import
builder
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.models.utils
import
clip_sigmoid
from
mmdet3d.registry
import
MODELS
from
mmdet.core
import
build_bbox_coder
,
multi_apply
from
mmdet3d.registry
import
MODELS
,
TASK_UTILS
from
mmdet.core
import
multi_apply
@
MODELS
.
register_module
()
...
...
@@ -53,7 +54,6 @@ class SeparateHead(BaseModule):
self
.
init_bias
=
init_bias
for
head
in
self
.
heads
:
classes
,
num_conv
=
self
.
heads
[
head
]
conv_layers
=
[]
c_in
=
in_channels
for
i
in
range
(
num_conv
-
1
):
...
...
@@ -250,8 +250,6 @@ class CenterHead(BaseModule):
feature map. Default: [128].
tasks (list[dict], optional): Task information including class number
and class names. Default: None.
train_cfg (dict, optional): Train-time configs. Default: None.
test_cfg (dict, optional): Test-time configs. Default: None.
bbox_coder (dict, optional): Bbox coder configs. Default: None.
common_heads (dict, optional): Conv information for common heads.
Default: dict().
...
...
@@ -269,32 +267,45 @@ class CenterHead(BaseModule):
Default: dict(type='Conv2d')
norm_cfg (dict, optional): Config of norm layer.
Default: dict(type='BN2d').
bias (str, optional): Type of bias. Default: 'auto'.
bias (str): Type of bias. Default: 'auto'.
norm_bbox (bool): Whether normalize the bbox predictions.
Defaults to True.
train_cfg (dict, optional): Train-time configs. Default: None.
test_cfg (dict, optional): Test-time configs. Default: None.
init_cfg (dict, optional): Config for initialization.
"""
def
__init__
(
self
,
in_channels
=
[
128
],
tasks
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
bbox_coder
=
None
,
common_heads
=
dict
(),
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
reduction
=
'none'
,
loss_weight
=
0.25
),
separate_head
=
dict
(
type
=
'SeparateHead'
,
init_bias
=-
2.19
,
final_kernel
=
3
),
share_conv_channel
=
64
,
num_heatmap_convs
=
2
,
conv_cfg
=
dict
(
type
=
'Conv2d'
),
norm_cfg
=
dict
(
type
=
'BN2d'
),
bias
=
'auto'
,
norm_bbox
=
True
,
init_cfg
=
None
):
in_channels
:
Union
[
List
[
int
],
int
]
=
[
128
],
tasks
:
Optional
[
List
[
dict
]]
=
None
,
bbox_coder
:
Optional
[
dict
]
=
None
,
common_heads
:
dict
=
dict
(),
loss_cls
:
dict
=
dict
(
type
=
'mmdet.GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
:
dict
=
dict
(
type
=
'mmdet.L1Loss'
,
reduction
=
'none'
,
loss_weight
=
0.25
),
separate_head
:
dict
=
dict
(
type
=
'mmdet.SeparateHead'
,
init_bias
=-
2.19
,
final_kernel
=
3
),
share_conv_channel
:
int
=
64
,
num_heatmap_convs
:
int
=
2
,
conv_cfg
:
dict
=
dict
(
type
=
'Conv2d'
),
norm_cfg
:
dict
=
dict
(
type
=
'BN2d'
),
bias
:
str
=
'auto'
,
norm_bbox
:
bool
=
True
,
train_cfg
:
Optional
[
dict
]
=
None
,
test_cfg
:
Optional
[
dict
]
=
None
,
init_cfg
:
Optional
[
dict
]
=
None
,
**
kwargs
):
assert
init_cfg
is
None
,
'To prevent abnormal initialization '
\
'behavior, init_cfg is not allowed to be set'
super
(
CenterHead
,
self
).
__init__
(
init_cfg
=
init_cfg
)
super
(
CenterHead
,
self
).
__init__
(
init_cfg
=
init_cfg
,
**
kwargs
)
# TODO we should rename this variable,
# for example num_classes_per_task ?
# {'num_class': 2, 'class_names': ['pedestrian', 'traffic_cone']}]
# TODO seems num_classes is useless
num_classes
=
[
len
(
t
[
'class_names'
])
for
t
in
tasks
]
self
.
class_names
=
[
t
[
'class_names'
]
for
t
in
tasks
]
self
.
train_cfg
=
train_cfg
...
...
@@ -303,9 +314,9 @@ class CenterHead(BaseModule):
self
.
num_classes
=
num_classes
self
.
norm_bbox
=
norm_bbox
self
.
loss_cls
=
build
_loss
(
loss_cls
)
self
.
loss_bbox
=
build
_loss
(
loss_bbox
)
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
self
.
loss_cls
=
MODELS
.
build
(
loss_cls
)
self
.
loss_bbox
=
MODELS
.
build
(
loss_bbox
)
self
.
bbox_coder
=
TASK_UTILS
.
build
(
bbox_coder
)
self
.
num_anchor_per_locs
=
[
n
for
n
in
num_classes
]
self
.
fp16_enabled
=
False
...
...
@@ -328,7 +339,7 @@ class CenterHead(BaseModule):
in_channels
=
share_conv_channel
,
heads
=
heads
,
num_cls
=
num_cls
)
self
.
task_heads
.
append
(
builder
.
build_head
(
separate_head
))
def
forward_single
(
self
,
x
)
:
def
forward_single
(
self
,
x
:
Tensor
)
->
dict
:
"""Forward function for CenterPoint.
Args:
...
...
@@ -347,7 +358,7 @@ class CenterHead(BaseModule):
return
ret_dicts
def
forward
(
self
,
feats
)
:
def
forward
(
self
,
feats
:
List
[
Tensor
])
->
Tuple
[
List
[
Tensor
]]
:
"""Forward pass.
Args:
...
...
@@ -384,7 +395,10 @@ class CenterHead(BaseModule):
feat
=
feat
.
view
(
-
1
,
dim
)
return
feat
def
get_targets
(
self
,
gt_bboxes_3d
,
gt_labels_3d
):
def
get_targets
(
self
,
batch_gt_instances_3d
:
List
[
InstanceData
],
)
->
Tuple
[
List
[
Tensor
]]:
"""Generate targets.
How each output is transformed:
...
...
@@ -399,24 +413,24 @@ class CenterHead(BaseModule):
[ tensor0, tensor1, tensor2, ... ]
Args:
gt_bbox
es_3d (list[:obj:`
LiDAR
Instance
3DBoxes`]): Ground
truth gt boxes.
gt_
labels_3d
(list[torch.Tensor]): Labels of box
es.
batch_gt_instanc
es_3d (list[:obj:`Instance
Data`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
\
``
labels_3d
`` attribut
es.
Returns:
Returns:
tuple[list[torch.Tensor]]: Tuple of target including
the following results in order.
- list[torch.Tensor]: Heatmap scores.
- list[torch.Tensor]: Ground truth boxes.
- list[torch.Tensor]: Indexes indicating the
position of the valid boxes.
- list[torch.Tensor]: Masks indicating which
boxes are valid.
- list[torch.Tensor]: Heatmap scores.
- list[torch.Tensor]: Ground truth boxes.
- list[torch.Tensor]: Indexes indicating the
position of the valid boxes.
- list[torch.Tensor]: Masks indicating which
boxes are valid.
"""
heatmaps
,
anno_boxes
,
inds
,
masks
=
multi_apply
(
self
.
get_targets_single
,
gt_bboxes_3d
,
gt_label
s_3d
)
self
.
get_targets_single
,
batch_gt_instance
s_3d
)
# Transpose heatmaps
heatmaps
=
list
(
map
(
list
,
zip
(
*
heatmaps
)))
heatmaps
=
[
torch
.
stack
(
hms_
)
for
hms_
in
heatmaps
]
...
...
@@ -431,12 +445,14 @@ class CenterHead(BaseModule):
masks
=
[
torch
.
stack
(
masks_
)
for
masks_
in
masks
]
return
heatmaps
,
anno_boxes
,
inds
,
masks
def
get_targets_single
(
self
,
gt_bboxes_3d
,
gt_labels_3d
):
def
get_targets_single
(
self
,
gt_instances_3d
:
InstanceData
)
->
Tuple
[
Tensor
]:
"""Generate training targets for a single sample.
Args:
gt_bboxes_3d (:obj:`LiDARInstance3DBoxes`): Ground truth gt boxes.
gt_labels_3d (torch.Tensor): Labels of boxes.
gt_instances_3d (:obj:`InstanceData`): Gt_instances of
single data sample. It usually includes
``bboxes_3d`` and ``labels_3d`` attributes.
Returns:
tuple[list[torch.Tensor]]: Tuple of target including
...
...
@@ -449,6 +465,8 @@ class CenterHead(BaseModule):
- list[torch.Tensor]: Masks indicating which boxes
are valid.
"""
gt_labels_3d
=
gt_instances_3d
.
labels_3d
gt_bboxes_3d
=
gt_instances_3d
.
bboxes_3d
device
=
gt_labels_3d
.
device
gt_bboxes_3d
=
torch
.
cat
(
(
gt_bboxes_3d
.
gravity_center
,
gt_bboxes_3d
.
tensor
[:,
3
:]),
...
...
@@ -569,21 +587,48 @@ class CenterHead(BaseModule):
inds
.
append
(
ind
)
return
heatmaps
,
anno_boxes
,
inds
,
masks
def
loss
(
self
,
pts_feats
:
List
[
Tensor
],
batch_data_samples
:
List
[
Det3DDataSample
],
*
args
,
**
kwargs
)
->
Dict
[
str
,
Tensor
]:
"""Forward function for point cloud branch.
Args:
pts_feats (list[torch.Tensor]): Features of point cloud branch
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes information such as
`gt_instance_3d`, .
Returns:
dict: Losses of each branch.
"""
outs
=
self
(
pts_feats
)
batch_gt_instance_3d
=
[]
for
data_sample
in
batch_data_samples
:
batch_gt_instance_3d
.
append
(
data_sample
.
gt_instances_3d
)
losses
=
self
.
loss_by_feat
(
outs
,
batch_gt_instance_3d
)
return
losses
@
force_fp32
(
apply_to
=
(
'preds_dicts'
))
def
loss
(
self
,
gt_bboxes_3d
,
gt_labels_3d
,
preds_dicts
,
**
kwargs
):
def
loss_by_feat
(
self
,
preds_dicts
:
Tuple
[
List
[
dict
]],
batch_gt_instances_3d
:
List
[
InstanceData
],
*
args
,
**
kwargs
):
"""Loss function for CenterHead.
Args:
gt_bboxes_3d (list[:obj:`LiDARInstance3DBoxes`]): Ground
truth gt boxes.
gt_labels_3d (list[torch.Tensor]): Labels of boxes.
preds_dicts (dict): Output of forward function.
preds_dicts (tuple[list[dict]]): Prediction results of
multiple tasks. The outer tuple indicate different
tasks head, and the internal list indicate different
FPN level.
batch_gt_instances_3d (list[:obj:`InstanceData`]): Batch of
gt_instances. It usually includes ``bboxes_3d`` and
\
``labels_3d`` attributes.
Returns:
dict[str
:
torch.Tensor]: Loss of heatmap and bbox of each task.
dict[str
,
torch.Tensor]: Loss of heatmap and bbox of each task.
"""
heatmaps
,
anno_boxes
,
inds
,
masks
=
self
.
get_targets
(
gt_bboxes_3d
,
gt_label
s_3d
)
batch_gt_instance
s_3d
)
loss_dict
=
dict
()
for
task_id
,
preds_dict
in
enumerate
(
preds_dicts
):
# heatmap focal loss
...
...
@@ -619,15 +664,62 @@ class CenterHead(BaseModule):
loss_dict
[
f
'task
{
task_id
}
.loss_bbox'
]
=
loss_bbox
return
loss_dict
def
get_bboxes
(
self
,
preds_dicts
,
img_metas
,
img
=
None
,
rescale
=
False
):
def
predict
(
self
,
pts_feats
:
Dict
[
str
,
torch
.
Tensor
],
batch_data_samples
:
List
[
Det3DDataSample
],
rescale
=
True
,
**
kwargs
)
->
List
[
InstanceData
]:
"""
Args:
pts_feats (dict): Point features..
batch_data_samples (List[:obj:`Det3DDataSample`]): The Data
Samples. It usually includes meta information of data.
rescale (bool): Whether rescale the resutls to
the original scale.
Returns:
list[:obj:`InstanceData`]: List of processed predictions. Each
InstanceData contains 3d Bounding boxes and corresponding
scores and labels.
"""
preds_dict
=
self
(
pts_feats
)
batch_size
=
len
(
batch_data_samples
)
batch_input_metas
=
[]
for
batch_index
in
range
(
batch_size
):
metainfo
=
batch_data_samples
[
batch_index
].
metainfo
batch_input_metas
.
append
(
metainfo
)
results_list
=
self
.
predict_by_feat
(
preds_dict
,
batch_input_metas
,
rescale
=
rescale
,
**
kwargs
)
return
results_list
def
predict_by_feat
(
self
,
preds_dicts
:
Tuple
[
List
[
dict
]],
batch_input_metas
:
List
[
dict
],
*
args
,
**
kwargs
)
->
List
[
InstanceData
]:
"""Generate bboxes from bbox head predictions.
Args:
preds_dicts (tuple[list[dict]]): Prediction results.
img_metas (list[dict]): Point cloud and image's meta info.
preds_dicts (tuple[list[dict]]): Prediction results of
multiple tasks. The outer tuple indicate different
tasks head, and the internal list indicate different
FPN level.
batch_input_metas (list[dict]): Meta info of multiple
inputs.
Returns:
list[dict]: Decoded bbox, scores and labels after nms.
list[:obj:`InstanceData`]: Instance prediction
results of each sample after the post process.
Each item usually contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of bboxes, has a shape
(num_instances, ).
- bboxes_3d (:obj:`LiDARInstance3DBoxes`): Prediction
of bboxes, contains a tensor with shape
(num_instances, 7) or (num_instances, 9), and
the last 2 dimensions of 9 is
velocity.
"""
rets
=
[]
for
task_id
,
preds_dict
in
enumerate
(
preds_dicts
):
...
...
@@ -689,18 +781,20 @@ class CenterHead(BaseModule):
rets
.
append
(
self
.
get_task_detections
(
num_class_with_bg
,
batch_cls_preds
,
batch_reg_preds
,
batch_cls_labels
,
img_metas
))
batch_cls_labels
,
batch_input_metas
))
# Merge branches results
num_samples
=
len
(
rets
[
0
])
ret_list
=
[]
for
i
in
range
(
num_samples
):
temp_instances
=
InstanceData
()
for
k
in
rets
[
0
][
i
].
keys
():
if
k
==
'bboxes'
:
bboxes
=
torch
.
cat
([
ret
[
i
][
k
]
for
ret
in
rets
])
bboxes
[:,
2
]
=
bboxes
[:,
2
]
-
bboxes
[:,
5
]
*
0.5
bboxes
=
img
_metas
[
i
][
'box_type_3d'
](
bboxes
=
batch_input
_metas
[
i
][
'box_type_3d'
](
bboxes
,
self
.
bbox_coder
.
code_size
)
elif
k
==
'scores'
:
scores
=
torch
.
cat
([
ret
[
i
][
k
]
for
ret
in
rets
])
...
...
@@ -710,7 +804,10 @@ class CenterHead(BaseModule):
rets
[
j
][
i
][
k
]
+=
flag
flag
+=
num_class
labels
=
torch
.
cat
([
ret
[
i
][
k
].
int
()
for
ret
in
rets
])
ret_list
.
append
([
bboxes
,
scores
,
labels
])
temp_instances
.
bboxes_3d
=
bboxes
temp_instances
.
scores_3d
=
scores
temp_instances
.
labels_3d
=
labels
ret_list
.
append
(
temp_instances
)
return
ret_list
def
get_task_detections
(
self
,
num_class_with_bg
,
batch_cls_preds
,
...
...
mmdet3d/models/dense_heads/vote_head.py
View file @
bd73d3b9
...
...
@@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Union
import
numpy
as
np
import
torch
from
mmcv.ops
import
furthest_point_sample
from
mmcv.runner
import
BaseModule
,
force_fp32
from
mmcv.runner
import
BaseModule
from
mmengine
import
ConfigDict
,
InstanceData
from
torch.nn
import
functional
as
F
...
...
@@ -308,7 +308,6 @@ class VoteHead(BaseModule):
results
.
update
(
decode_res
)
return
results
@
force_fp32
(
apply_to
=
(
'bbox_preds'
,
))
def
loss_by_feat
(
self
,
points
:
List
[
torch
.
Tensor
],
...
...
mmdet3d/models/detectors/base.py
View file @
bd73d3b9
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Union
from
mmengine
import
InstanceData
from
mmdet3d.core
import
Det3DDataSample
from
mmdet3d.core.utils
import
(
ForwardResults
,
InstanceList
,
OptConfigType
,
OptMultiConfig
,
OptSampleList
,
SampleList
)
...
...
@@ -38,7 +40,7 @@ class Base3DDetector(BaseDetector):
- "tensor": Forward the whole network and return tensor or tuple of
tensor without any post-processing, same as a common nn.Module.
- "predict": Forward and return the predictions, which are fully
processed to a list of :obj:`DetDataSample`.
processed to a list of :obj:`Det
3D
DataSample`.
- "loss": Forward and return a dict of losses according to the given
inputs and data samples.
...
...
@@ -53,8 +55,8 @@ class Base3DDetector(BaseDetector):
- points (list[torch.Tensor]): Point cloud of each sample.
- imgs (torch.Tensor): Image tensor has shape (B, C, H, W).
data_samples (list[:obj:`DetDataSample`],
list[list[:obj:`DetDataSample`]], optional): The
data_samples (list[:obj:`Det
3D
DataSample`],
list[list[:obj:`Det
3D
DataSample`]], optional): The
annotation data of every samples. When it is a list[list], the
outer list indicate the test time augmentation, and the
inter list indicate the batch. Otherwise, the list simply
...
...
@@ -65,7 +67,7 @@ class Base3DDetector(BaseDetector):
The return type depends on ``mode``.
- If ``mode="tensor"``, return a tensor or a tuple of tensor.
- If ``mode="predict"``, return a list of :obj:`DetDataSample`.
- If ``mode="predict"``, return a list of :obj:`Det
3D
DataSample`.
- If ``mode="loss"``, return a dict of tensor.
"""
if
mode
==
'loss'
:
...
...
@@ -87,7 +89,11 @@ class Base3DDetector(BaseDetector):
raise
RuntimeError
(
f
'Invalid mode "
{
mode
}
". '
'Only supports loss, predict and tensor mode'
)
def
convert_to_datasample
(
self
,
results_list
:
InstanceList
)
->
SampleList
:
def
convert_to_datasample
(
self
,
results_list_3d
:
InstanceList
,
results_list_2d
:
InstanceList
=
None
,
)
->
SampleList
:
"""Convert results list to `Det3DDataSample`.
Subclasses could override it to be compatible for some multi-modality
...
...
@@ -100,19 +106,35 @@ class Base3DDetector(BaseDetector):
Returns:
list[:obj:`Det3DDataSample`]: Detection results of the
input. Each Det3DDataSample usually contains
'pred_instances_3d'. And the ``pred_instances_3d`` usually
'pred_instances_3d'. And the ``pred_instances_3d`` normally
contains following keys.
- scores_3d (Tensor): Classification scores, has a shape
(num_instance, )
- labels_3d (Tensor): Labels of 3D bboxes, has a shape
(num_instances, ).
- bboxes_3d (Tensor): Contains a tensor with shape
(num_instances, C) where C >=7.
When there are image prediction in some models, it should
contains `pred_instances`, And the ``pred_instances`` normally
contains following keys.
- scores
_3d
(Tensor): Classification scores, has a shape
(num_instance, )
- labels
_3d
(Tensor): Labels of
3
D bboxes, has a shape
(num_instances, ).
- bboxes
_3d
(Tensor): Contains a tensor with shape
(num_instances,
C) where C >=7
.
- scores (Tensor): Classification scores
of image
, has a shape
(num_instance, )
- labels (Tensor):
Predict
Labels of
2
D bboxes, has a shape
(num_instances, ).
- bboxes (Tensor): Contains a tensor with shape
(num_instances,
4)
.
"""
out_results_list
=
[]
for
i
in
range
(
len
(
results_list
)):
data_sample_list
=
[]
if
results_list_2d
is
None
:
results_list_2d
=
[
InstanceData
()
for
_
in
range
(
len
(
results_list_3d
))
]
for
i
in
range
(
len
(
results_list_3d
)):
result
=
Det3DDataSample
()
result
.
pred_instances_3d
=
results_list
[
i
]
out_results_list
.
append
(
result
)
return
out_results_list
result
.
pred_instances_3d
=
results_list_3d
[
i
]
result
.
pred_instances
=
results_list_2d
[
i
]
data_sample_list
.
append
(
result
)
return
data_sample_list
mmdet3d/models/detectors/centerpoint.py
View file @
bd73d3b9
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
Optional
import
torch
from
mmdet3d.core
import
bbox3d2result
,
merge_aug_bboxes_3d
from
mmdet3d.core
import
merge_aug_bboxes_3d
from
mmdet3d.registry
import
MODELS
from
.mvx_two_stage
import
MVXTwoStageDetector
@
MODELS
.
register_module
()
class
CenterPoint
(
MVXTwoStageDetector
):
"""Base class of Multi-modality VoxelNet."""
"""Base class of Multi-modality VoxelNet.
Args:
pts_voxel_layer (dict, optional): Point cloud voxelization
layer. Defaults to None.
pts_voxel_encoder (dict, optional): Point voxelization
encoder layer. Defaults to None.
pts_middle_encoder (dict, optional): Middle encoder layer
of points cloud modality. Defaults to None.
pts_fusion_layer (dict, optional): Fusion layer.
Defaults to None.
img_backbone (dict, optional): Backbone of extracting
images feature. Defaults to None.
pts_backbone (dict, optional): Backbone of extracting
points features. Defaults to None.
img_neck (dict, optional): Neck of extracting
image features. Defaults to None.
pts_neck (dict, optional): Neck of extracting
points features. Defaults to None.
pts_bbox_head (dict, optional): Bboxes head of
point cloud modality. Defaults to None.
img_roi_head (dict, optional): RoI head of image
modality. Defaults to None.
img_rpn_head (dict, optional): RPN head of image
modality. Defaults to None.
train_cfg (dict, optional): Train config of model.
Defaults to None.
test_cfg (dict, optional): Train config of model.
Defaults to None.
init_cfg (dict, optional): Initialize config of
model. Defaults to None.
data_preprocessor (dict or ConfigDict, optional): The pre-process
config of :class:`Det3DDataPreprocessor`. Defaults to None.
"""
def
__init__
(
self
,
pts_voxel_layer
=
None
,
pts_voxel_encoder
=
None
,
pts_middle_encoder
=
None
,
pts_fusion_layer
=
None
,
img_backbone
=
None
,
pts_backbone
=
None
,
img_neck
=
None
,
pts_neck
=
None
,
pts_bbox_head
=
None
,
img_roi_head
=
None
,
img_rpn_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
,
pretrained
=
None
,
init_cfg
=
None
):
pts_voxel_layer
:
Optional
[
dict
]
=
None
,
pts_voxel_encoder
:
Optional
[
dict
]
=
None
,
pts_middle_encoder
:
Optional
[
dict
]
=
None
,
pts_fusion_layer
:
Optional
[
dict
]
=
None
,
img_backbone
:
Optional
[
dict
]
=
None
,
pts_backbone
:
Optional
[
dict
]
=
None
,
img_neck
:
Optional
[
dict
]
=
None
,
pts_neck
:
Optional
[
dict
]
=
None
,
pts_bbox_head
:
Optional
[
dict
]
=
None
,
img_roi_head
:
Optional
[
dict
]
=
None
,
img_rpn_head
:
Optional
[
dict
]
=
None
,
train_cfg
:
Optional
[
dict
]
=
None
,
test_cfg
:
Optional
[
dict
]
=
None
,
init_cfg
:
Optional
[
dict
]
=
None
,
data_preprocessor
:
Optional
[
dict
]
=
None
,
**
kwargs
):
super
(
CenterPoint
,
self
).
__init__
(
pts_voxel_layer
,
pts_voxel_encoder
,
pts_middle_encoder
,
pts_fusion_layer
,
img_backbone
,
pts_backbone
,
img_neck
,
pts_neck
,
pts_bbox_head
,
img_roi_head
,
img_rpn_head
,
train_cfg
,
test_cfg
,
pretrained
,
init_cfg
)
def
extract_pts_feat
(
self
,
pts
,
img_feats
,
img_metas
):
"""Extract features of points."""
if
not
self
.
with_pts_bbox
:
return
None
voxels
,
num_points
,
coors
=
self
.
voxelize
(
pts
)
voxel_features
=
self
.
pts_voxel_encoder
(
voxels
,
num_points
,
coors
)
batch_size
=
coors
[
-
1
,
0
]
+
1
x
=
self
.
pts_middle_encoder
(
voxel_features
,
coors
,
batch_size
)
x
=
self
.
pts_backbone
(
x
)
if
self
.
with_pts_neck
:
x
=
self
.
pts_neck
(
x
)
return
x
def
forward_pts_train
(
self
,
pts_feats
,
gt_bboxes_3d
,
gt_labels_3d
,
img_metas
,
gt_bboxes_ignore
=
None
):
"""Forward function for point cloud branch.
Args:
pts_feats (list[torch.Tensor]): Features of point cloud branch
gt_bboxes_3d (list[:obj:`BaseInstance3DBoxes`]): Ground truth
boxes for each sample.
gt_labels_3d (list[torch.Tensor]): Ground truth labels for
boxes of each sampole
img_metas (list[dict]): Meta information of samples.
gt_bboxes_ignore (list[torch.Tensor], optional): Ground truth
boxes to be ignored. Defaults to None.
Returns:
dict: Losses of each branch.
"""
outs
=
self
.
pts_bbox_head
(
pts_feats
)
loss_inputs
=
[
gt_bboxes_3d
,
gt_labels_3d
,
outs
]
losses
=
self
.
pts_bbox_head
.
loss
(
*
loss_inputs
)
return
losses
def
simple_test_pts
(
self
,
x
,
img_metas
,
rescale
=
False
):
"""Test function of point cloud branch."""
outs
=
self
.
pts_bbox_head
(
x
)
bbox_list
=
self
.
pts_bbox_head
.
get_bboxes
(
outs
,
img_metas
,
rescale
=
rescale
)
bbox_results
=
[
bbox3d2result
(
bboxes
,
scores
,
labels
)
for
bboxes
,
scores
,
labels
in
bbox_list
]
return
bbox_results
train_cfg
,
test_cfg
,
init_cfg
,
data_preprocessor
,
**
kwargs
)
# TODO support this
def
aug_test_pts
(
self
,
feats
,
img_metas
,
rescale
=
False
):
"""Test function of point cloud branch with augmentaiton.
...
...
@@ -107,6 +95,7 @@ class CenterPoint(MVXTwoStageDetector):
- scores_3d (torch.Tensor): Scores of predicted boxes.
- labels_3d (torch.Tensor): Labels of predicted boxes.
"""
raise
NotImplementedError
# only support aug_test for one sample
outs_list
=
[]
for
x
,
img_meta
in
zip
(
feats
,
img_metas
):
...
...
@@ -186,7 +175,9 @@ class CenterPoint(MVXTwoStageDetector):
bbox_list
[
0
][
key
]
=
bbox_list
[
0
][
key
].
to
(
'cpu'
)
return
bbox_list
[
0
]
# TODO support this
def
aug_test
(
self
,
points
,
img_metas
,
imgs
=
None
,
rescale
=
False
):
raise
NotImplementedError
"""Test function with augmentaiton."""
img_feats
,
pts_feats
=
self
.
extract_feats
(
points
,
img_metas
,
imgs
)
bbox_list
=
dict
()
...
...
mmdet3d/models/detectors/mvx_faster_rcnn.py
View file @
bd73d3b9
# Copyright (c) OpenMMLab. All rights reserved.
from
typing
import
List
,
Optional
,
Sequence
import
torch
from
mmcv.runner
import
force_fp32
from
torch
import
Tensor
from
torch.nn
import
functional
as
F
from
mmdet3d.registry
import
MODELS
...
...
@@ -23,7 +25,6 @@ class DynamicMVXFasterRCNN(MVXTwoStageDetector):
super
(
DynamicMVXFasterRCNN
,
self
).
__init__
(
**
kwargs
)
@
torch
.
no_grad
()
@
force_fp32
()
def
voxelize
(
self
,
points
):
"""Apply dynamic voxelization to points.
...
...
@@ -46,13 +47,30 @@ class DynamicMVXFasterRCNN(MVXTwoStageDetector):
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
return
points
,
coors_batch
def
extract_pts_feat
(
self
,
points
,
img_feats
,
img_metas
):
"""Extract point features."""
def
extract_pts_feat
(
self
,
points
:
List
[
Tensor
],
img_feats
:
Optional
[
Sequence
[
Tensor
]]
=
None
,
batch_input_metas
:
Optional
[
List
[
dict
]]
=
None
)
->
Sequence
[
Tensor
]:
"""Extract features of points.
Args:
points (List[tensor]): Point cloud of multiple inputs.
img_feats (list[Tensor], tuple[tensor], optional): Features from
image backbone.
batch_input_metas (list[dict], optional): The meta information
of multiple samples. Defaults to True.
Returns:
Sequence[tensor]: points features of multiple inputs
from backbone or neck.
"""
if
not
self
.
with_pts_bbox
:
return
None
voxels
,
coors
=
self
.
voxelize
(
points
)
voxel_features
,
feature_coors
=
self
.
pts_voxel_encoder
(
voxels
,
coors
,
points
,
img_feats
,
img
_metas
)
voxels
,
coors
,
points
,
img_feats
,
batch_input
_metas
)
batch_size
=
coors
[
-
1
,
0
]
+
1
x
=
self
.
pts_middle_encoder
(
voxel_features
,
feature_coors
,
batch_size
)
x
=
self
.
pts_backbone
(
x
)
...
...
mmdet3d/models/detectors/mvx_two_stage.py
View file @
bd73d3b9
This diff is collapsed.
Click to expand it.
mmdet3d/models/voxel_encoders/voxel_encoder.py
View file @
bd73d3b9
...
...
@@ -3,7 +3,7 @@ import torch
from
mmcv.cnn
import
build_norm_layer
from
mmcv.ops
import
DynamicScatter
from
mmcv.runner
import
force_fp32
from
torch
import
nn
from
torch
import
Tensor
,
nn
from
mmdet3d.registry
import
MODELS
from
..
import
builder
...
...
@@ -20,13 +20,14 @@ class HardSimpleVFE(nn.Module):
num_features (int, optional): Number of features to use. Default: 4.
"""
def
__init__
(
self
,
num_features
=
4
)
:
def
__init__
(
self
,
num_features
:
int
=
4
)
->
None
:
super
(
HardSimpleVFE
,
self
).
__init__
()
self
.
num_features
=
num_features
self
.
fp16_enabled
=
False
@
force_fp32
(
out_fp16
=
True
)
def
forward
(
self
,
features
,
num_points
,
coors
):
def
forward
(
self
,
features
:
Tensor
,
num_points
:
Tensor
,
coors
:
Tensor
,
*
args
,
**
kwargs
)
->
Tensor
:
"""Forward function.
Args:
...
...
@@ -66,7 +67,7 @@ class DynamicSimpleVFE(nn.Module):
@
torch
.
no_grad
()
@
force_fp32
(
out_fp16
=
True
)
def
forward
(
self
,
features
,
coors
):
def
forward
(
self
,
features
,
coors
,
*
args
,
**
kwargs
):
"""Forward function.
Args:
...
...
@@ -218,13 +219,14 @@ class DynamicVFE(nn.Module):
center_per_point
=
voxel_mean
[
voxel_inds
,
...]
return
center_per_point
@
force_fp32
(
out_fp16
=
True
)
def
forward
(
self
,
features
,
coors
,
points
=
None
,
img_feats
=
None
,
img_metas
=
None
):
img_metas
=
None
,
*
args
,
**
kwargs
):
"""Forward functions.
Args:
...
...
@@ -390,7 +392,9 @@ class HardVFE(nn.Module):
num_points
,
coors
,
img_feats
=
None
,
img_metas
=
None
):
img_metas
=
None
,
*
args
,
**
kwargs
):
"""Forward functions.
Args:
...
...
tests/test_core/test_bbox/test_samplers.py
View file @
bd73d3b9
# Copyright (c) OpenMMLab. All rights reserved.
import
pytest
import
torch
from
mmengine
import
InstanceData
from
mmdet3d.core.bbox.assigners
import
MaxIoUAssigner
from
mmdet3d.core.bbox.assigners
import
Max
3D
IoUAssigner
from
mmdet3d.core.bbox.samplers
import
IoUNegPiecewiseSampler
def
test_iou_piecewise_sampler
():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
()
assigner
=
MaxIoUAssigner
(
assigner
=
Max
3D
IoUAssigner
(
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
...
...
@@ -27,7 +28,13 @@ def test_iou_piecewise_sampler():
[[
0
,
0
,
0
,
10
,
10
,
9
,
0.2
],
[
5
,
10
,
10
,
20
,
20
,
15
,
0.6
]],
dtype
=
torch
.
float32
).
cuda
()
gt_labels
=
torch
.
tensor
([
1
,
1
],
dtype
=
torch
.
int64
).
cuda
()
assign_result
=
assigner
.
assign
(
bboxes
,
gt_bboxes
,
gt_labels
=
gt_labels
)
gt_instanses
=
InstanceData
()
gt_instanses
.
bboxes_3d
=
gt_bboxes
gt_instanses
.
labels_3d
=
gt_labels
pred_instaces
=
InstanceData
()
pred_instaces
.
priors
=
bboxes
assign_result
=
assigner
.
assign
(
pred_instaces
,
gt_instanses
)
sampler
=
IoUNegPiecewiseSampler
(
num
=
10
,
...
...
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment