Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
2c136730
Unverified
Commit
2c136730
authored
Jun 19, 2023
by
Jingwei Zhang
Committed by
GitHub
Jun 19, 2023
Browse files
[Feature] Support new config type (#2608)
* support new configs * support new configs * verify configs
parent
aea26ac7
Changes
52
Show whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
970 additions
and
0 deletions
+970
-0
mmdet3d/configs/centerpoint/__init__.py
mmdet3d/configs/centerpoint/__init__.py
+1
-0
mmdet3d/configs/centerpoint/centerpoint_pillar02_second_secfpn_8xb4_cyclic_20e_nus_3d.py
...terpoint_pillar02_second_secfpn_8xb4_cyclic_20e_nus_3d.py
+179
-0
mmdet3d/configs/centerpoint/centerpoint_voxel01_second_secfpn_8xb4_cyclic_20e_nus_3d.py
...nterpoint_voxel01_second_secfpn_8xb4_cyclic_20e_nus_3d.py
+179
-0
mmdet3d/configs/minkunet/__init__.py
mmdet3d/configs/minkunet/__init__.py
+1
-0
mmdet3d/configs/minkunet/minkunet34_w32_torchsparse_8xb2_laser_polar_mix_3x_semantickitti.py
..._w32_torchsparse_8xb2_laser_polar_mix_3x_semantickitti.py
+97
-0
mmdet3d/configs/mvxnet/__init__.py
mmdet3d/configs/mvxnet/__init__.py
+1
-0
mmdet3d/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2_80e_kitti_3d_3class.py
...t/mvxnet_fpn_dv_second_secfpn_8xb2_80e_kitti_3d_3class.py
+310
-0
mmdet3d/configs/pgd/__init__.py
mmdet3d/configs/pgd/__init__.py
+1
-0
mmdet3d/configs/pgd/pgd_r101_caffe_fpn_head_gn_4xb3_4x_kitti_mono3d.py
...gs/pgd/pgd_r101_caffe_fpn_head_gn_4xb3_4x_kitti_mono3d.py
+150
-0
mmdet3d/configs/votenet/__init__.py
mmdet3d/configs/votenet/__init__.py
+1
-0
mmdet3d/configs/votenet/votenet_8xb8_scannet_3d.py
mmdet3d/configs/votenet/votenet_8xb8_scannet_3d.py
+47
-0
setup.cfg
setup.cfg
+3
-0
No files found.
mmdet3d/configs/centerpoint/__init__.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
mmdet3d/configs/centerpoint/centerpoint_pillar02_second_secfpn_8xb4_cyclic_20e_nus_3d.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
if
'_base_'
:
from
.._base_.datasets.nus_3d
import
*
from
.._base_.models.centerpoint_pillar02_second_secfpn_nus
import
*
from
.._base_.schedules.cyclic_20e
import
*
from
.._base_.default_runtime
import
*
from
mmengine.dataset.sampler
import
DefaultSampler
from
mmdet3d.datasets.dataset_wrappers
import
CBGSDataset
from
mmdet3d.datasets.nuscenes_dataset
import
NuScenesDataset
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
,
LoadPointsFromMultiSweeps
)
from
mmdet3d.datasets.transforms.test_time_aug
import
MultiScaleFlipAug3D
from
mmdet3d.datasets.transforms.transforms_3d
import
(
# noqa
GlobalRotScaleTrans
,
ObjectNameFilter
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-51.2, -52, -5.0, 51.2, 50.4, 3.0]
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
data_prefix
.
merge
(
dict
(
pts
=
'samples/LIDAR_TOP'
,
img
=
''
,
sweeps
=
'sweeps/LIDAR_TOP'
))
model
.
merge
(
dict
(
data_preprocessor
=
dict
(
voxel_layer
=
dict
(
point_cloud_range
=
point_cloud_range
)),
pts_voxel_encoder
=
dict
(
point_cloud_range
=
point_cloud_range
),
pts_bbox_head
=
dict
(
bbox_coder
=
dict
(
pc_range
=
point_cloud_range
[:
2
])),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
point_cloud_range
=
point_cloud_range
)),
test_cfg
=
dict
(
pts
=
dict
(
pc_range
=
point_cloud_range
[:
2
]))))
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
backend_args
=
None
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'nuscenes_dbinfos_train.pkl'
,
rate
=
1.0
,
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
car
=
5
,
truck
=
5
,
bus
=
5
,
trailer
=
5
,
construction_vehicle
=
5
,
traffic_cone
=
5
,
barrier
=
5
,
motorcycle
=
5
,
bicycle
=
5
,
pedestrian
=
5
)),
classes
=
class_names
,
sample_groups
=
dict
(
car
=
2
,
truck
=
3
,
construction_vehicle
=
7
,
bus
=
4
,
trailer
=
6
,
barrier
=
2
,
motorcycle
=
6
,
bicycle
=
6
,
pedestrian
=
2
,
traffic_cone
=
2
),
points_loader
=
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
backend_args
=
backend_args
),
backend_args
=
backend_args
)
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
LoadPointsFromMultiSweeps
,
sweeps_num
=
9
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
ObjectSample
,
db_sampler
=
db_sampler
),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
-
0.3925
,
0.3925
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectNameFilter
,
classes
=
class_names
),
dict
(
type
=
PointShuffle
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
LoadPointsFromMultiSweeps
,
sweeps_num
=
9
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
MultiScaleFlipAug3D
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
)
]),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
])
]
train_dataloader
.
merge
(
dict
(
_delete_
=
True
,
batch_size
=
4
,
num_workers
=
4
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
True
),
dataset
=
dict
(
type
=
CBGSDataset
,
dataset
=
dict
(
type
=
NuScenesDataset
,
data_root
=
data_root
,
ann_file
=
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
metainfo
=
dict
(
classes
=
class_names
),
test_mode
=
False
,
data_prefix
=
data_prefix
,
use_valid_flag
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))))
test_dataloader
.
merge
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
classes
=
class_names
))))
val_dataloader
.
merge
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
classes
=
class_names
))))
train_cfg
.
merge
(
dict
(
val_interval
=
20
))
mmdet3d/configs/centerpoint/centerpoint_voxel01_second_secfpn_8xb4_cyclic_20e_nus_3d.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
if
'_base_'
:
from
.._base_.datasets.nus_3d
import
*
from
.._base_.models.centerpoint_voxel01_second_secfpn_nus
import
*
from
.._base_.schedules.cyclic_20e
import
*
from
.._base_.default_runtime
import
*
from
mmengine.dataset.sampler
import
DefaultSampler
from
mmdet3d.datasets.dataset_wrappers
import
CBGSDataset
from
mmdet3d.datasets.nuscenes_dataset
import
NuScenesDataset
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
,
LoadPointsFromMultiSweeps
)
from
mmdet3d.datasets.transforms.test_time_aug
import
MultiScaleFlipAug3D
from
mmdet3d.datasets.transforms.transforms_3d
import
(
# noqa
GlobalRotScaleTrans
,
ObjectNameFilter
,
ObjectRangeFilter
,
ObjectSample
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
# Using calibration info convert the Lidar-coordinate point cloud range to the
# ego-coordinate point cloud range could bring a little promotion in nuScenes.
# point_cloud_range = [-51.2, -52, -5.0, 51.2, 50.4, 3.0]
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
data_prefix
.
merge
(
dict
(
pts
=
'samples/LIDAR_TOP'
,
img
=
''
,
sweeps
=
'sweeps/LIDAR_TOP'
))
model
.
merge
(
dict
(
data_preprocessor
=
dict
(
voxel_layer
=
dict
(
point_cloud_range
=
point_cloud_range
)),
pts_bbox_head
=
dict
(
bbox_coder
=
dict
(
pc_range
=
point_cloud_range
[:
2
])),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
point_cloud_range
=
point_cloud_range
)),
test_cfg
=
dict
(
pts
=
dict
(
pc_range
=
point_cloud_range
[:
2
]))))
dataset_type
=
'NuScenesDataset'
data_root
=
'data/nuscenes/'
backend_args
=
None
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'nuscenes_dbinfos_train.pkl'
,
rate
=
1.0
,
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
car
=
5
,
truck
=
5
,
bus
=
5
,
trailer
=
5
,
construction_vehicle
=
5
,
traffic_cone
=
5
,
barrier
=
5
,
motorcycle
=
5
,
bicycle
=
5
,
pedestrian
=
5
)),
classes
=
class_names
,
sample_groups
=
dict
(
car
=
2
,
truck
=
3
,
construction_vehicle
=
7
,
bus
=
4
,
trailer
=
6
,
barrier
=
2
,
motorcycle
=
6
,
bicycle
=
6
,
pedestrian
=
2
,
traffic_cone
=
2
),
points_loader
=
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
backend_args
=
backend_args
),
backend_args
=
backend_args
)
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
LoadPointsFromMultiSweeps
,
sweeps_num
=
9
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
ObjectSample
,
db_sampler
=
db_sampler
),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
-
0.3925
,
0.3925
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectNameFilter
,
classes
=
class_names
),
dict
(
type
=
PointShuffle
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
LoadPointsFromMultiSweeps
,
sweeps_num
=
9
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
pad_empty_sweeps
=
True
,
remove_close
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
MultiScaleFlipAug3D
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
)
]),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
])
]
train_dataloader
.
merge
(
dict
(
_delete_
=
True
,
batch_size
=
4
,
num_workers
=
4
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
True
),
dataset
=
dict
(
type
=
CBGSDataset
,
dataset
=
dict
(
type
=
NuScenesDataset
,
data_root
=
data_root
,
ann_file
=
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
metainfo
=
dict
(
classes
=
class_names
),
test_mode
=
False
,
data_prefix
=
data_prefix
,
use_valid_flag
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))))
test_dataloader
.
merge
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
classes
=
class_names
))))
val_dataloader
.
merge
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
,
metainfo
=
dict
(
classes
=
class_names
))))
train_cfg
.
merge
(
dict
(
val_interval
=
20
))
mmdet3d/configs/minkunet/__init__.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
mmdet3d/configs/minkunet/minkunet34_w32_torchsparse_8xb2_laser_polar_mix_3x_semantickitti.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
if
'_base_'
:
from
.._base_.datasets.semantickitti
import
*
from
.._base_.models.minkunet
import
*
from
.._base_.schedules.schedule_3x
import
*
from
.._base_.default_runtime
import
*
from
mmcv.transforms.wrappers
import
RandomChoice
from
mmengine.hooks.checkpoint_hook
import
CheckpointHook
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
,
PointSegClassMapping
)
from
mmdet3d.datasets.transforms.transforms_3d
import
(
GlobalRotScaleTrans
,
LaserMix
,
PolarMix
)
model
.
merge
(
dict
(
data_preprocessor
=
dict
(
max_voxels
=
None
),
backbone
=
dict
(
encoder_blocks
=
[
2
,
3
,
4
,
6
])))
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
),
dict
(
type
=
RandomChoice
,
transforms
=
[
[
dict
(
type
=
LaserMix
,
num_areas
=
[
3
,
4
,
5
,
6
],
pitch_angles
=
[
-
25
,
3
],
pre_transform
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
)
],
prob
=
1
)
],
[
dict
(
type
=
PolarMix
,
instance_classes
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
],
swap_ratio
=
0.5
,
rotate_paste_ratio
=
1.0
,
pre_transform
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
),
dict
(
type
=
PointSegClassMapping
)
],
prob
=
1
)
],
],
prob
=
[
0.5
,
0.5
]),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0.
,
6.28318531
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
],
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
train_dataloader
.
merge
(
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
)))
default_hooks
.
merge
(
dict
(
checkpoint
=
dict
(
type
=
CheckpointHook
,
interval
=
1
)))
mmdet3d/configs/mvxnet/__init__.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
mmdet3d/configs/mvxnet/mvxnet_fpn_dv_second_secfpn_8xb2_80e_kitti_3d_3class.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
if
'_base_'
:
from
.._base_.schedules.cosine
import
*
from
.._base_.default_runtime
import
*
from
mmcv.transforms.loading
import
LoadImageFromFile
from
mmcv.transforms.processing
import
RandomResize
,
Resize
from
mmengine.dataset.dataset_wrapper
import
RepeatDataset
from
mmengine.dataset.sampler
import
DefaultSampler
from
mmengine.visualization.vis_backend
import
LocalVisBackend
from
mmdet3d.datasets.kitti_dataset
import
KittiDataset
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadPointsFromFile
)
from
mmdet3d.datasets.transforms.test_time_aug
import
MultiScaleFlipAug3D
from
mmdet3d.datasets.transforms.transforms_3d
import
(
GlobalRotScaleTrans
,
ObjectRangeFilter
,
PointShuffle
,
PointsRangeFilter
,
RandomFlip3D
)
from
mmdet3d.evaluation.metrics.kitti_metric
import
KittiMetric
from
mmdet3d.models.backbones.second
import
SECOND
from
mmdet3d.models.data_preprocessors.data_preprocessor
import
\
Det3DDataPreprocessor
from
mmdet3d.models.dense_heads.anchor3d_head
import
Anchor3DHead
from
mmdet3d.models.detectors.mvx_faster_rcnn
import
DynamicMVXFasterRCNN
from
mmdet3d.models.layers.fusion_layers.point_fusion
import
PointFusion
from
mmdet3d.models.middle_encoders.sparse_encoder
import
SparseEncoder
from
mmdet3d.models.necks.second_fpn
import
SECONDFPN
from
mmdet3d.models.task_modules.anchor.anchor_3d_generator
import
\
Anchor3DRangeGenerator
from
mmdet3d.models.task_modules.assigners.max_3d_iou_assigner
import
\
Max3DIoUAssigner
from
mmdet3d.models.task_modules.coders.delta_xyzwhlr_bbox_coder
import
\
DeltaXYZWLHRBBoxCoder
from
mmdet3d.models.voxel_encoders.voxel_encoder
import
DynamicVFE
from
mmdet3d.structures.ops.iou3d_calculator
import
BboxOverlapsNearest3D
from
mmdet3d.visualization.local_visualizer
import
Det3DLocalVisualizer
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
model
=
dict
(
type
=
DynamicMVXFasterRCNN
,
data_preprocessor
=
dict
(
type
=
Det3DDataPreprocessor
,
voxel
=
True
,
voxel_type
=
'dynamic'
,
voxel_layer
=
dict
(
max_num_points
=-
1
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
-
1
,
-
1
)),
mean
=
[
102.9801
,
115.9465
,
122.7717
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
img_backbone
=
dict
(
type
=
'mmdet.ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
),
img_neck
=
dict
(
type
=
'mmdet.FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
# make the image features more stable numerically to avoid loss nan
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
),
num_outs
=
5
),
pts_voxel_encoder
=
dict
(
type
=
DynamicVFE
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
fusion_layer
=
dict
(
type
=
PointFusion
,
img_channels
=
256
,
pts_channels
=
64
,
mid_channels
=
128
,
out_channels
=
128
,
img_levels
=
[
0
,
1
,
2
,
3
,
4
],
align_corners
=
False
,
activate_out
=
True
,
fuse_out
=
False
)),
pts_middle_encoder
=
dict
(
type
=
SparseEncoder
,
in_channels
=
128
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
pts_backbone
=
dict
(
type
=
SECOND
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
pts_neck
=
dict
(
type
=
SECONDFPN
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
pts_bbox_head
=
dict
(
type
=
Anchor3DHead
,
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
Anchor3DRangeGenerator
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
sizes
=
[[
0.8
,
0.6
,
1.73
],
[
1.76
,
0.6
,
1.73
],
[
3.9
,
1.6
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
assigner_per_size
=
True
,
diff_rad_by_sin
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
DeltaXYZWLHRBBoxCoder
),
loss_cls
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
Max3DIoUAssigner
,
iou_calculator
=
dict
(
type
=
BboxOverlapsNearest3D
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
Max3DIoUAssigner
,
iou_calculator
=
dict
(
type
=
BboxOverlapsNearest3D
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
Max3DIoUAssigner
,
iou_calculator
=
dict
(
type
=
BboxOverlapsNearest3D
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)))
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
metainfo
=
dict
(
classes
=
class_names
)
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
backend_args
=
backend_args
),
dict
(
type
=
LoadImageFromFile
,
backend_args
=
backend_args
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
RandomResize
,
scale
=
[(
640
,
192
),
(
2560
,
768
)],
keep_ratio
=
True
),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
-
0.78539816
,
0.78539816
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0.2
,
0.2
,
0.2
]),
dict
(
type
=
RandomFlip3D
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
ObjectRangeFilter
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
PointShuffle
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'gt_bboxes'
,
'gt_labels'
])
]
test_pipeline
=
[
dict
(
type
=
LoadPointsFromFile
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
backend_args
=
backend_args
),
dict
(
type
=
LoadImageFromFile
,
backend_args
=
backend_args
),
dict
(
type
=
MultiScaleFlipAug3D
,
img_scale
=
(
1280
,
384
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
# Temporary solution, fix this after refactor the augtest
dict
(
type
=
Resize
,
scale
=
0
,
keep_ratio
=
True
),
dict
(
type
=
GlobalRotScaleTrans
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
RandomFlip3D
),
dict
(
type
=
PointsRangeFilter
,
point_cloud_range
=
point_cloud_range
),
]),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'points'
,
'img'
])
]
modality
=
dict
(
use_lidar
=
True
,
use_camera
=
True
)
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
True
),
dataset
=
dict
(
type
=
RepeatDataset
,
times
=
2
,
dataset
=
dict
(
type
=
KittiDataset
,
data_root
=
data_root
,
modality
=
modality
,
ann_file
=
'kitti_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
False
),
dataset
=
dict
(
type
=
KittiDataset
,
data_root
=
data_root
,
modality
=
modality
,
ann_file
=
'kitti_infos_val.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
test_pipeline
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
DefaultSampler
,
shuffle
=
False
),
dataset
=
dict
(
type
=
KittiDataset
,
data_root
=
data_root
,
ann_file
=
'kitti_infos_val.pkl'
,
modality
=
modality
,
data_prefix
=
dict
(
pts
=
'training/velodyne_reduced'
,
img
=
'training/image_2'
),
pipeline
=
test_pipeline
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))
optim_wrapper
.
merge
(
dict
(
optimizer
=
dict
(
weight_decay
=
0.01
),
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
),
))
val_evaluator
=
dict
(
type
=
KittiMetric
,
ann_file
=
'data/kitti/kitti_infos_val.pkl'
)
test_evaluator
=
val_evaluator
vis_backends
=
[
dict
(
type
=
LocalVisBackend
)]
visualizer
=
dict
(
type
=
Det3DLocalVisualizer
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
# You may need to download the model first is the network is unstable
load_from
=
'https://download.openmmlab.com/mmdetection3d/pretrain_models/mvx_faster_rcnn_detectron2-caffe_20e_coco-pretrain_gt-sample_kitti-3-class_moderate-79.3_20200207-a4a6a3c7.pth'
# noqa
mmdet3d/configs/pgd/__init__.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
mmdet3d/configs/pgd/pgd_r101_caffe_fpn_head_gn_4xb3_4x_kitti_mono3d.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
if
'_base_'
:
from
.._base_.datasets.kitti_mono3d
import
*
from
.._base_.models.pgd
import
*
from
.._base_.schedules.mmdet_schedule_1x
import
*
from
.._base_.default_runtime
import
*
from
mmcv.transforms.processing
import
Resize
from
mmengine.optim.scheduler.lr_scheduler
import
LinearLR
,
MultiStepLR
from
mmdet3d.datasets.transforms.formating
import
Pack3DDetInputs
from
mmdet3d.datasets.transforms.loading
import
(
LoadAnnotations3D
,
LoadImageFromFileMono3D
)
from
mmdet3d.datasets.transforms.transforms_3d
import
RandomFlip3D
from
mmdet3d.models.data_preprocessors.data_preprocessor
import
\
Det3DDataPreprocessor
from
mmdet3d.models.losses.uncertain_smooth_l1_loss
import
\
UncertainSmoothL1Loss
from
mmdet3d.models.task_modules.coders.pgd_bbox_coder
import
PGDBBoxCoder
# model settings
model
.
merge
(
dict
(
data_preprocessor
=
dict
(
type
=
Det3DDataPreprocessor
,
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
bgr_to_rgb
=
False
,
pad_size_divisor
=
32
),
backbone
=
dict
(
frozen_stages
=
0
),
neck
=
dict
(
start_level
=
0
,
num_outs
=
4
),
bbox_head
=
dict
(
num_classes
=
3
,
bbox_code_size
=
7
,
pred_attrs
=
False
,
pred_velo
=
False
,
pred_bbox2d
=
True
,
use_onlyreg_proj
=
True
,
strides
=
(
4
,
8
,
16
,
32
),
regress_ranges
=
((
-
1
,
64
),
(
64
,
128
),
(
128
,
256
),
(
256
,
1e8
)),
group_reg_dims
=
(
2
,
1
,
3
,
1
,
16
,
4
),
# offset, depth, size, rot, kpts, bbox2d
reg_branch
=
(
(
256
,
),
# offset
(
256
,
),
# depth
(
256
,
),
# size
(
256
,
),
# rot
(
256
,
),
# kpts
(
256
,
)
# bbox2d
),
centerness_branch
=
(
256
,
),
loss_cls
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
use_depth_classifier
=
True
,
depth_branch
=
(
256
,
),
depth_range
=
(
0
,
70
),
depth_unit
=
10
,
division
=
'uniform'
,
depth_bins
=
8
,
pred_keypoints
=
True
,
weight_dim
=
1
,
loss_depth
=
dict
(
type
=
UncertainSmoothL1Loss
,
alpha
=
1.0
,
beta
=
3.0
,
loss_weight
=
1.0
),
bbox_coder
=
dict
(
type
=
PGDBBoxCoder
,
base_depths
=
((
28.01
,
16.32
),
),
base_dims
=
((
0.8
,
1.73
,
0.6
),
(
1.76
,
1.73
,
0.6
),
(
3.9
,
1.56
,
1.6
)),
code_size
=
7
)),
# set weight 1.0 for base 7 dims (offset, depth, size, rot)
# 0.2 for 16-dim keypoint offsets and 1.0 for 4-dim 2D distance targets
train_cfg
=
dict
(
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
0.2
,
1.0
,
1.0
,
1.0
,
1.0
]),
test_cfg
=
dict
(
nms_pre
=
100
,
nms_thr
=
0.05
,
score_thr
=
0.001
,
max_per_img
=
20
)))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
LoadImageFromFileMono3D
,
backend_args
=
backend_args
),
dict
(
type
=
LoadAnnotations3D
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
dict
(
type
=
Resize
,
scale
=
(
1242
,
375
),
keep_ratio
=
True
),
dict
(
type
=
RandomFlip3D
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_bboxes_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers_2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
LoadImageFromFileMono3D
,
backend_args
=
backend_args
),
dict
(
type
=
Resize
,
scale_factor
=
1.0
),
dict
(
type
=
Pack3DDetInputs
,
keys
=
[
'img'
])
]
train_dataloader
.
merge
(
dict
(
batch_size
=
3
,
num_workers
=
3
,
dataset
=
dict
(
pipeline
=
train_pipeline
)))
test_dataloader
.
merge
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
)))
val_dataloader
.
merge
(
dict
(
dataset
=
dict
(
pipeline
=
test_pipeline
)))
# optimizer
optim_wrapper
.
merge
(
dict
(
optimizer
=
dict
(
lr
=
0.001
),
paramwise_cfg
=
dict
(
bias_lr_mult
=
2.
,
bias_decay_mult
=
0.
),
clip_grad
=
dict
(
max_norm
=
35
,
norm_type
=
2
)))
# learning rate
param_scheduler
=
[
dict
(
type
=
LinearLR
,
start_factor
=
1.0
/
3
,
by_epoch
=
False
,
begin
=
0
,
end
=
500
),
dict
(
type
=
MultiStepLR
,
begin
=
0
,
end
=
48
,
by_epoch
=
True
,
milestones
=
[
32
,
44
],
gamma
=
0.1
)
]
train_cfg
.
merge
(
dict
(
max_epochs
=
48
,
val_interval
=
2
))
auto_scale_lr
.
merge
(
dict
(
base_batch_size
=
12
))
mmdet3d/configs/votenet/__init__.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
mmdet3d/configs/votenet/votenet_8xb8_scannet_3d.py
0 → 100644
View file @
2c136730
# Copyright (c) OpenMMLab. All rights reserved.
if
'_base_'
:
from
.._base_.datasets.scannet_3d
import
*
from
.._base_.models.votenet
import
*
from
.._base_.schedules.schedule_3x
import
*
from
.._base_.default_runtime
import
*
from
mmengine.hooks.logger_hook
import
LoggerHook
from
mmdet3d.models.task_modules.coders.partial_bin_based_bbox_coder
import
\
PartialBinBasedBBoxCoder
# model settings
model
.
merge
(
dict
(
bbox_head
=
dict
(
num_classes
=
18
,
bbox_coder
=
dict
(
type
=
PartialBinBasedBBoxCoder
,
num_sizes
=
18
,
num_dir_bins
=
1
,
with_rot
=
False
,
mean_sizes
=
[[
0.76966727
,
0.8116021
,
0.92573744
],
[
1.876858
,
1.8425595
,
1.1931566
],
[
0.61328
,
0.6148609
,
0.7182701
],
[
1.3955007
,
1.5121545
,
0.83443564
],
[
0.97949594
,
1.0675149
,
0.6329687
],
[
0.531663
,
0.5955577
,
1.7500148
],
[
0.9624706
,
0.72462326
,
1.1481868
],
[
0.83221924
,
1.0490936
,
1.6875663
],
[
0.21132214
,
0.4206159
,
0.5372846
],
[
1.4440073
,
1.8970833
,
0.26985747
],
[
1.0294262
,
1.4040797
,
0.87554324
],
[
1.3766412
,
0.65521795
,
1.6813129
],
[
0.6650819
,
0.71111923
,
1.298853
],
[
0.41999173
,
0.37906948
,
1.7513971
],
[
0.59359556
,
0.5912492
,
0.73919016
],
[
0.50867593
,
0.50656086
,
0.30136237
],
[
1.1511526
,
1.0546296
,
0.49706793
],
[
0.47535285
,
0.49249494
,
0.5802117
]]))))
default_hooks
.
merge
(
dict
(
logger
=
dict
(
type
=
LoggerHook
,
interval
=
30
)))
# Default setting for scaling LR automatically
# - `enable` means enable scaling LR automatically
# or not by default.
# - `base_batch_size` = (8 GPUs) x (8 samples per GPU).
auto_scale_lr
.
merge
(
dict
(
enable
=
False
,
base_batch_size
=
64
))
setup.cfg
View file @
2c136730
...
...
@@ -14,3 +14,6 @@ default_section = THIRDPARTY
[codespell]
ignore-words-list = ans,refridgerator,crate,hist,formating,dout,wan,nd,fo,avod,AVOD,warmup
[flake8]
per-file-ignores = mmdet3d/configs/*:F401,F403,F405
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment