Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
raojy
mmdetection3d_rjy
Commits
eb1107e4
Commit
eb1107e4
authored
Apr 01, 2026
by
raojy
Browse files
fix_mmdetection
parent
7aa442d5
Pipeline
#3461
canceled with stages
Changes
569
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2658 additions
and
0 deletions
+2658
-0
mmde/configs/_base_/datasets/semantickitti.py
mmde/configs/_base_/datasets/semantickitti.py
+224
-0
mmde/configs/_base_/datasets/sunrgbd-3d.py
mmde/configs/_base_/datasets/sunrgbd-3d.py
+126
-0
mmde/configs/_base_/datasets/waymoD3-fov-mono3d-3class.py
mmde/configs/_base_/datasets/waymoD3-fov-mono3d-3class.py
+184
-0
mmde/configs/_base_/datasets/waymoD3-mv-mono3d-3class.py
mmde/configs/_base_/datasets/waymoD3-mv-mono3d-3class.py
+191
-0
mmde/configs/_base_/datasets/waymoD5-3d-3class.py
mmde/configs/_base_/datasets/waymoD5-3d-3class.py
+178
-0
mmde/configs/_base_/datasets/waymoD5-3d-car.py
mmde/configs/_base_/datasets/waymoD5-3d-car.py
+173
-0
mmde/configs/_base_/datasets/waymoD5-fov-mono3d-3class.py
mmde/configs/_base_/datasets/waymoD5-fov-mono3d-3class.py
+163
-0
mmde/configs/_base_/datasets/waymoD5-mv-mono3d-3class.py
mmde/configs/_base_/datasets/waymoD5-mv-mono3d-3class.py
+163
-0
mmde/configs/_base_/datasets/waymoD5-mv3d-3class.py
mmde/configs/_base_/datasets/waymoD5-mv3d-3class.py
+178
-0
mmde/configs/_base_/default_runtime.py
mmde/configs/_base_/default_runtime.py
+23
-0
mmde/configs/_base_/models/3dssd.py
mmde/configs/_base_/models/3dssd.py
+76
-0
mmde/configs/_base_/models/cascade-mask-rcnn_r50_fpn.py
mmde/configs/_base_/models/cascade-mask-rcnn_r50_fpn.py
+199
-0
mmde/configs/_base_/models/centerpoint_pillar02_second_secfpn_nus.py
...s/_base_/models/centerpoint_pillar02_second_secfpn_nus.py
+89
-0
mmde/configs/_base_/models/centerpoint_voxel01_second_secfpn_nus.py
...gs/_base_/models/centerpoint_voxel01_second_secfpn_nus.py
+89
-0
mmde/configs/_base_/models/cylinder3d.py
mmde/configs/_base_/models/cylinder3d.py
+41
-0
mmde/configs/_base_/models/dgcnn.py
mmde/configs/_base_/models/dgcnn.py
+29
-0
mmde/configs/_base_/models/fcaf3d.py
mmde/configs/_base_/models/fcaf3d.py
+20
-0
mmde/configs/_base_/models/fcos3d.py
mmde/configs/_base_/models/fcos3d.py
+86
-0
mmde/configs/_base_/models/groupfree3d.py
mmde/configs/_base_/models/groupfree3d.py
+75
-0
mmde/configs/_base_/models/h3dnet.py
mmde/configs/_base_/models/h3dnet.py
+351
-0
No files found.
Too many changes to show.
To preserve performance only
569 of 569+
files are displayed.
Plain diff
Email patch
mmde/configs/_base_/datasets/semantickitti.py
0 → 100644
View file @
eb1107e4
# For SemanticKitti we usually do 19-class segmentation.
# For labels_map we follow the uniform format of MMDetection & MMSegmentation
# i.e. we consider the unlabeled class as the last one, which is different
# from the original implementation of some methods e.g. Cylinder3D.
dataset_type
=
'SemanticKittiDataset'
data_root
=
'data/semantickitti/'
class_names
=
[
'car'
,
'bicycle'
,
'motorcycle'
,
'truck'
,
'bus'
,
'person'
,
'bicyclist'
,
'motorcyclist'
,
'road'
,
'parking'
,
'sidewalk'
,
'other-ground'
,
'building'
,
'fence'
,
'vegetation'
,
'trunck'
,
'terrian'
,
'pole'
,
'traffic-sign'
]
labels_map
=
{
0
:
19
,
# "unlabeled"
1
:
19
,
# "outlier" mapped to "unlabeled" --------------mapped
10
:
0
,
# "car"
11
:
1
,
# "bicycle"
13
:
4
,
# "bus" mapped to "other-vehicle" --------------mapped
15
:
2
,
# "motorcycle"
16
:
4
,
# "on-rails" mapped to "other-vehicle" ---------mapped
18
:
3
,
# "truck"
20
:
4
,
# "other-vehicle"
30
:
5
,
# "person"
31
:
6
,
# "bicyclist"
32
:
7
,
# "motorcyclist"
40
:
8
,
# "road"
44
:
9
,
# "parking"
48
:
10
,
# "sidewalk"
49
:
11
,
# "other-ground"
50
:
12
,
# "building"
51
:
13
,
# "fence"
52
:
19
,
# "other-structure" mapped to "unlabeled" ------mapped
60
:
8
,
# "lane-marking" to "road" ---------------------mapped
70
:
14
,
# "vegetation"
71
:
15
,
# "trunk"
72
:
16
,
# "terrain"
80
:
17
,
# "pole"
81
:
18
,
# "traffic-sign"
99
:
19
,
# "other-object" to "unlabeled" ----------------mapped
252
:
0
,
# "moving-car" to "car" ------------------------mapped
253
:
6
,
# "moving-bicyclist" to "bicyclist" ------------mapped
254
:
5
,
# "moving-person" to "person" ------------------mapped
255
:
7
,
# "moving-motorcyclist" to "motorcyclist" ------mapped
256
:
4
,
# "moving-on-rails" mapped to "other-vehic------mapped
257
:
4
,
# "moving-bus" mapped to "other-vehicle" -------mapped
258
:
3
,
# "moving-truck" to "truck" --------------------mapped
259
:
4
# "moving-other"-vehicle to "other-vehicle"-----mapped
}
metainfo
=
dict
(
classes
=
class_names
,
seg_label_mapping
=
labels_map
,
max_label
=
259
)
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/semantickitti/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
,
backend_args
=
backend_args
),
dict
(
type
=
'PointSegClassMapping'
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.78539816
,
0.78539816
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0.1
,
0.1
,
0.1
],
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
,
backend_args
=
backend_args
),
dict
(
type
=
'PointSegClassMapping'
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
backend_args
=
backend_args
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
tta_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
4
,
use_dim
=
4
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_seg_3d
=
True
,
seg_3d_dtype
=
'np.int32'
,
seg_offset
=
2
**
16
,
dataset_type
=
'semantickitti'
,
backend_args
=
backend_args
),
dict
(
type
=
'PointSegClassMapping'
),
dict
(
type
=
'TestTimeAug'
,
transforms
=
[[
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.
,
flip_ratio_bev_vertical
=
0.
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.
,
flip_ratio_bev_vertical
=
1.
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
1.
,
flip_ratio_bev_vertical
=
0.
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
1.
,
flip_ratio_bev_vertical
=
1.
)
],
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
pcd_rotate_range
,
pcd_rotate_range
],
scale_ratio_range
=
[
pcd_scale_factor
,
pcd_scale_factor
],
translation_std
=
[
0
,
0
,
0
])
for
pcd_rotate_range
in
[
-
0.78539816
,
0.0
,
0.78539816
]
for
pcd_scale_factor
in
[
0.95
,
1.0
,
1.05
]
],
[
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])]])
]
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
4
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'semantickitti_infos_train.pkl'
,
pipeline
=
train_pipeline
,
metainfo
=
metainfo
,
modality
=
input_modality
,
ignore_index
=
19
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'semantickitti_infos_val.pkl'
,
pipeline
=
test_pipeline
,
metainfo
=
metainfo
,
modality
=
input_modality
,
ignore_index
=
19
,
test_mode
=
True
,
backend_args
=
backend_args
))
val_dataloader
=
test_dataloader
val_evaluator
=
dict
(
type
=
'SegMetric'
)
test_evaluator
=
val_evaluator
vis_backends
=
[
dict
(
type
=
'LocalVisBackend'
)]
visualizer
=
dict
(
type
=
'Det3DLocalVisualizer'
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
tta_model
=
dict
(
type
=
'Seg3DTTAModel'
)
mmde/configs/_base_/datasets/sunrgbd-3d.py
0 → 100644
View file @
eb1107e4
dataset_type
=
'SUNRGBDDataset'
data_root
=
'data/sunrgbd/'
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
metainfo
=
dict
(
classes
=
class_names
)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/sunrgbd/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
],
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.523599
,
0.523599
],
scale_ratio_range
=
[
0.85
,
1.15
],
shift_height
=
True
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
],
backend_args
=
backend_args
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
)
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
])
]
train_dataloader
=
dict
(
batch_size
=
16
,
num_workers
=
4
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'sunrgbd_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
,
backend_args
=
backend_args
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
metainfo
=
metainfo
,
test_mode
=
True
,
box_type_3d
=
'Depth'
,
backend_args
=
backend_args
))
val_evaluator
=
dict
(
type
=
'IndoorMetric'
)
test_evaluator
=
val_evaluator
vis_backends
=
[
dict
(
type
=
'LocalVisBackend'
)]
visualizer
=
dict
(
type
=
'Det3DLocalVisualizer'
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
mmde/configs/_base_/datasets/waymoD3-fov-mono3d-3class.py
0 → 100644
View file @
eb1107e4
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type
=
'WaymoDataset'
data_root
=
'data/waymo/kitti_format/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
metainfo
=
dict
(
classes
=
class_names
)
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
# base shape (1248, 832), scale (0.95, 1.05)
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
0.95
,
1.05
),
# ratio_range=(1., 1.),
interpolation
=
'nearest'
,
keep_ratio
=
True
,
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_bboxes_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers_2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
1.
,
1.
),
interpolation
=
'nearest'
,
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
],
meta_keys
=
[
'box_type_3d'
,
'img_shape'
,
'cam2img'
,
'scale_factor'
,
'sample_idx'
,
'context_name'
,
'timestamp'
,
'lidar2cam'
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
1.
,
1.
),
interpolation
=
'nearest'
,
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
],
meta_keys
=
[
'box_type_3d'
,
'img_shape'
,
'cam2img'
,
'scale_factor'
,
'sample_idx'
,
'context_name'
,
'timestamp'
,
'lidar2cam'
]),
]
train_dataloader
=
dict
(
batch_size
=
3
,
num_workers
=
3
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
metainfo
=
metainfo
,
cam_sync_instances
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'fov_image_based'
,
# load one frame every three frames
load_interval
=
3
,
backend_args
=
backend_args
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
cam_sync_instances
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'fov_image_based'
,
load_eval_anns
=
False
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
cam_sync_instances
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'fov_image_based'
,
backend_args
=
backend_args
))
val_evaluator
=
dict
(
type
=
'WaymoMetric'
,
waymo_bin_file
=
'./data/waymo/waymo_format/fov_gt.bin'
,
metric
=
'LET_mAP'
,
load_type
=
'fov_image_based'
,
result_prefix
=
'./pgd_fov_pred'
)
test_evaluator
=
val_evaluator
vis_backends
=
[
dict
(
type
=
'LocalVisBackend'
)]
visualizer
=
dict
(
type
=
'Det3DLocalVisualizer'
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
mmde/configs/_base_/datasets/waymoD3-mv-mono3d-3class.py
0 → 100644
View file @
eb1107e4
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type
=
'WaymoDataset'
data_root
=
'data/waymo/kitti_format/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
metainfo
=
dict
(
classes
=
class_names
)
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
# base shape (1248, 832), scale (0.95, 1.05)
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
# ratio_range=(1., 1.),
ratio_range
=
(
0.95
,
1.05
),
interpolation
=
'nearest'
,
keep_ratio
=
True
,
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_bboxes_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers_2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'Resize3D'
,
scale_factor
=
0.65
,
interpolation
=
'nearest'
,
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
],
meta_keys
=
[
'box_type_3d'
,
'img_shape'
,
'cam2img'
,
'scale_factor'
,
'sample_idx'
,
'context_name'
,
'timestamp'
,
'lidar2cam'
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'Resize3D'
,
scale_factor
=
0.65
,
interpolation
=
'nearest'
,
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
],
meta_keys
=
[
'box_type_3d'
,
'img_shape'
,
'cam2img'
,
'scale_factor'
,
'sample_idx'
,
'context_name'
,
'timestamp'
,
'lidar2cam'
]),
]
train_dataloader
=
dict
(
batch_size
=
3
,
num_workers
=
3
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
metainfo
=
metainfo
,
cam_sync_instances
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'mv_image_based'
,
# load one frame every three frames
load_interval
=
3
,
backend_args
=
backend_args
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
0
,
persistent_workers
=
False
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
cam_sync_instances
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'mv_image_based'
,
# load_eval_anns=False,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
0
,
persistent_workers
=
False
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
cam_sync_instances
=
True
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'mv_image_based'
,
load_eval_anns
=
False
,
backend_args
=
backend_args
))
val_evaluator
=
dict
(
type
=
'WaymoMetric'
,
waymo_bin_file
=
'./data/waymo/waymo_format/cam_gt.bin'
,
metric
=
'LET_mAP'
,
load_type
=
'mv_image_based'
,
result_prefix
=
'./pgd_mv_pred'
,
nms_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
500
,
nms_thr
=
0.05
,
score_thr
=
0.001
,
min_bbox_size
=
0
,
max_per_frame
=
100
))
test_evaluator
=
val_evaluator
vis_backends
=
[
dict
(
type
=
'LocalVisBackend'
)]
visualizer
=
dict
(
type
=
'Det3DLocalVisualizer'
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
mmde/configs/_base_/datasets/waymoD5-3d-3class.py
0 → 100644
View file @
eb1107e4
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type
=
'WaymoDataset'
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
data_root
=
'data/waymo/kitti_format/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
class_names
=
[
'Car'
,
'Pedestrian'
,
'Cyclist'
]
metainfo
=
dict
(
classes
=
class_names
)
point_cloud_range
=
[
-
74.88
,
-
74.88
,
-
2
,
74.88
,
74.88
,
4
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'waymo_dbinfos_train.pkl'
,
rate
=
1.0
,
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
)),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
,
Pedestrian
=
10
,
Cyclist
=
10
),
points_loader
=
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
backend_args
=
backend_args
),
backend_args
=
backend_args
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
# dict(type='ObjectSample', db_sampler=db_sampler),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.78539816
,
0.78539816
],
scale_ratio_range
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
)
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
],
meta_keys
=
[
'box_type_3d'
,
'sample_idx'
,
'context_name'
,
'timestamp'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
],
meta_keys
=
[
'box_type_3d'
,
'sample_idx'
,
'context_name'
,
'timestamp'
])
]
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
sweeps
=
'training/velodyne'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
# load one frame every five frames
load_interval
=
5
,
backend_args
=
backend_args
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
sweeps
=
'training/velodyne'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
sweeps
=
'training/velodyne'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))
val_evaluator
=
dict
(
type
=
'WaymoMetric'
,
waymo_bin_file
=
'./data/waymo/waymo_format/gt.bin'
)
test_evaluator
=
val_evaluator
vis_backends
=
[
dict
(
type
=
'LocalVisBackend'
)]
visualizer
=
dict
(
type
=
'Det3DLocalVisualizer'
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
mmde/configs/_base_/datasets/waymoD5-3d-car.py
0 → 100644
View file @
eb1107e4
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type
=
'WaymoDataset'
data_root
=
'data/waymo/kitti_format/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
class_names
=
[
'Car'
]
metainfo
=
dict
(
classes
=
class_names
)
point_cloud_range
=
[
-
74.88
,
-
74.88
,
-
2
,
74.88
,
74.88
,
4
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'waymo_dbinfos_train.pkl'
,
rate
=
1.0
,
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
)),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
points_loader
=
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
backend_args
=
backend_args
),
backend_args
=
backend_args
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.78539816
,
0.78539816
],
scale_ratio_range
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
],
meta_keys
=
[
'box_type_3d'
,
'sample_idx'
,
'context_name'
,
'timestamp'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
)
]),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
],
meta_keys
=
[
'box_type_3d'
,
'sample_idx'
,
'context_name'
,
'timestamp'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
backend_args
=
backend_args
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'points'
]),
]
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
sweeps
=
'training/velodyne'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
# load one frame every five frames
load_interval
=
5
,
backend_args
=
backend_args
)))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
sweeps
=
'training/velodyne'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
sweeps
=
'training/velodyne'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
box_type_3d
=
'LiDAR'
,
backend_args
=
backend_args
))
val_evaluator
=
dict
(
type
=
'WaymoMetric'
,
waymo_bin_file
=
'./data/waymo/waymo_format/gt.bin'
)
test_evaluator
=
val_evaluator
vis_backends
=
[
dict
(
type
=
'LocalVisBackend'
)]
visualizer
=
dict
(
type
=
'Det3DLocalVisualizer'
,
vis_backends
=
vis_backends
,
name
=
'visualizer'
)
mmde/configs/_base_/datasets/waymoD5-fov-mono3d-3class.py
0 → 100644
View file @
eb1107e4
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type
=
'WaymoDataset'
data_root
=
'data/waymo/kitti_format/'
class_names
=
[
'Car'
,
'Pedestrian'
,
'Cyclist'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
# base shape (1248, 832), scale (0.95, 1.05)
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1284
,
832
),
ratio_range
=
(
0.95
,
1.05
),
keep_ratio
=
True
,
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_bboxes_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers_2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
1.
,
1.
),
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
1.
,
1.
),
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
]),
]
metainfo
=
dict
(
CLASSES
=
class_names
)
train_dataloader
=
dict
(
batch_size
=
3
,
num_workers
=
3
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'fov_image_based'
,
# load one frame every three frames
load_interval
=
5
,
backend_args
=
backend_args
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'fov_image_based'
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'fov_image_based'
,
backend_args
=
backend_args
))
val_evaluator
=
dict
(
type
=
'WaymoMetric'
,
ann_file
=
'./data/waymo/kitti_format/waymo_infos_val.pkl'
,
waymo_bin_file
=
'./data/waymo/waymo_format/fov_gt.bin'
,
data_root
=
'./data/waymo/waymo_format'
,
metric
=
'LET_mAP'
,
load_type
=
'fov_image_based'
,
backend_args
=
backend_args
)
test_evaluator
=
val_evaluator
mmde/configs/_base_/datasets/waymoD5-mv-mono3d-3class.py
0 → 100644
View file @
eb1107e4
# dataset settings
# D3 in the config name means the whole dataset is divided into 3 folds
# We only use one fold for efficient experiments
dataset_type
=
'WaymoDataset'
data_root
=
'data/waymo/kitti_format/'
class_names
=
[
'Car'
,
'Pedestrian'
,
'Cyclist'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
train_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
# base shape (1248, 832), scale (0.95, 1.05)
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1284
,
832
),
ratio_range
=
(
0.95
,
1.05
),
keep_ratio
=
True
,
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'gt_bboxes'
,
'gt_bboxes_labels'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'centers_2d'
,
'depths'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
1.
,
1.
),
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
]),
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadImageFromFileMono3D'
,
backend_args
=
backend_args
),
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
1.
,
1.
),
keep_ratio
=
True
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
]),
]
metainfo
=
dict
(
classes
=
class_names
)
train_dataloader
=
dict
(
batch_size
=
3
,
num_workers
=
3
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'mv_image_based'
,
# load one frame every three frames
load_interval
=
5
,
backend_args
=
backend_args
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'mv_image_based'
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
ann_file
=
'waymo_infos_val.pkl'
,
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Camera'
,
load_type
=
'mv_image_based'
,
backend_args
=
backend_args
))
val_evaluator
=
dict
(
type
=
'WaymoMetric'
,
ann_file
=
'./data/waymo/kitti_format/waymo_infos_val.pkl'
,
waymo_bin_file
=
'./data/waymo/waymo_format/cam_gt.bin'
,
data_root
=
'./data/waymo/waymo_format'
,
metric
=
'LET_mAP'
,
load_type
=
'mv_image_based'
,
backend_args
=
backend_args
)
test_evaluator
=
val_evaluator
mmde/configs/_base_/datasets/waymoD5-mv3d-3class.py
0 → 100644
View file @
eb1107e4
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type
=
'WaymoDataset'
data_root
=
'data/waymo/kitti_format/'
# Example to use different file client
# Method 1: simply set the data root and let the file I/O module
# automatically infer from prefix (not support LMDB and Memcache yet)
# data_root = 's3://openmmlab/datasets/detection3d/waymo/kitti_format/'
# Method 2: Use backend_args, file_client_args in versions before 1.1.0
# backend_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/': 's3://openmmlab/datasets/detection3d/',
# 'data/': 's3://openmmlab/datasets/detection3d/'
# }))
backend_args
=
None
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
)
point_cloud_range
=
[
-
35.0
,
-
75.0
,
-
2
,
75.0
,
75.0
,
4
]
train_transforms
=
[
dict
(
type
=
'PhotoMetricDistortion3D'
),
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
0.95
,
1.05
),
keep_ratio
=
True
),
dict
(
type
=
'RandomCrop3D'
,
crop_size
=
(
1080
,
720
)),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
,
flip_box3d
=
False
),
]
train_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox
=
True
,
with_label
=
True
,
with_attr_label
=
False
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_bbox_depth
=
True
),
dict
(
type
=
'MultiViewWrapper'
,
transforms
=
train_transforms
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
]),
]
test_transforms
=
[
dict
(
type
=
'RandomResize3D'
,
scale
=
(
1248
,
832
),
ratio_range
=
(
1.
,
1.
),
keep_ratio
=
True
)
]
test_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
'MultiViewWrapper'
,
transforms
=
test_transforms
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
],
meta_keys
=
[
'box_type_3d'
,
'img_shape'
,
'ori_cam2img'
,
'scale_factor'
,
'sample_idx'
,
'context_name'
,
'timestamp'
,
'lidar2cam'
,
'num_ref_frames'
,
'num_views'
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
,
backend_args
=
backend_args
),
dict
(
type
=
'MultiViewWrapper'
,
transforms
=
test_transforms
),
dict
(
type
=
'Pack3DDetInputs'
,
keys
=
[
'img'
],
meta_keys
=
[
'box_type_3d'
,
'img_shape'
,
'ori_cam2img'
,
'scale_factor'
,
'sample_idx'
,
'context_name'
,
'timestamp'
,
'lidar2cam'
,
'num_ref_frames'
,
'num_views'
])
]
metainfo
=
dict
(
classes
=
class_names
)
train_dataloader
=
dict
(
batch_size
=
2
,
num_workers
=
2
,
persistent_workers
=
True
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
True
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_train.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
pipeline
=
train_pipeline
,
modality
=
input_modality
,
test_mode
=
False
,
cam_sync_instances
=
True
,
metainfo
=
metainfo
,
box_type_3d
=
'Lidar'
,
load_interval
=
5
,
backend_args
=
backend_args
))
val_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_val.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
pipeline
=
eval_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
box_type_3d
=
'Lidar'
,
backend_args
=
backend_args
))
test_dataloader
=
dict
(
batch_size
=
1
,
num_workers
=
1
,
persistent_workers
=
True
,
drop_last
=
False
,
sampler
=
dict
(
type
=
'DefaultSampler'
,
shuffle
=
False
),
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
'waymo_infos_val.pkl'
,
data_prefix
=
dict
(
pts
=
'training/velodyne'
,
CAM_FRONT
=
'training/image_0'
,
CAM_FRONT_LEFT
=
'training/image_1'
,
CAM_FRONT_RIGHT
=
'training/image_2'
,
CAM_SIDE_LEFT
=
'training/image_3'
,
CAM_SIDE_RIGHT
=
'training/image_4'
),
pipeline
=
test_pipeline
,
modality
=
input_modality
,
test_mode
=
True
,
metainfo
=
metainfo
,
box_type_3d
=
'Lidar'
,
backend_args
=
backend_args
))
val_evaluator
=
dict
(
type
=
'WaymoMetric'
,
waymo_bin_file
=
'./data/waymo/waymo_format/cam_gt.bin'
,
metric
=
'LET_mAP'
)
test_evaluator
=
val_evaluator
mmde/configs/_base_/default_runtime.py
0 → 100644
View file @
eb1107e4
default_scope
=
'mmdet3d'
default_hooks
=
dict
(
timer
=
dict
(
type
=
'IterTimerHook'
),
logger
=
dict
(
type
=
'LoggerHook'
,
interval
=
50
),
param_scheduler
=
dict
(
type
=
'ParamSchedulerHook'
),
checkpoint
=
dict
(
type
=
'CheckpointHook'
,
interval
=-
1
),
sampler_seed
=
dict
(
type
=
'DistSamplerSeedHook'
),
visualization
=
dict
(
type
=
'Det3DVisualizationHook'
))
env_cfg
=
dict
(
cudnn_benchmark
=
False
,
mp_cfg
=
dict
(
mp_start_method
=
'fork'
,
opencv_num_threads
=
0
),
dist_cfg
=
dict
(
backend
=
'nccl'
),
)
log_processor
=
dict
(
type
=
'LogProcessor'
,
window_size
=
50
,
by_epoch
=
True
)
log_level
=
'INFO'
load_from
=
None
resume
=
False
# TODO: support auto scaling lr
mmde/configs/_base_/models/3dssd.py
0 → 100644
View file @
eb1107e4
model
=
dict
(
type
=
'SSD3DNet'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
backbone
=
dict
(
type
=
'PointNet2SAMSG'
,
in_channels
=
4
,
num_points
=
(
4096
,
512
,
(
256
,
256
)),
radii
=
((
0.2
,
0.4
,
0.8
),
(
0.4
,
0.8
,
1.6
),
(
1.6
,
3.2
,
4.8
)),
num_samples
=
((
32
,
32
,
64
),
(
32
,
32
,
64
),
(
32
,
32
,
32
)),
sa_channels
=
(((
16
,
16
,
32
),
(
16
,
16
,
32
),
(
32
,
32
,
64
)),
((
64
,
64
,
128
),
(
64
,
64
,
128
),
(
64
,
96
,
128
)),
((
128
,
128
,
256
),
(
128
,
192
,
256
),
(
128
,
256
,
256
))),
aggregation_channels
=
(
64
,
128
,
256
),
fps_mods
=
((
'D-FPS'
),
(
'FS'
),
(
'F-FPS'
,
'D-FPS'
)),
fps_sample_range_lists
=
((
-
1
),
(
-
1
),
(
512
,
-
1
)),
norm_cfg
=
dict
(
type
=
'BN2d'
,
eps
=
1e-3
,
momentum
=
0.1
),
sa_cfg
=
dict
(
type
=
'PointSAModuleMSG'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
False
)),
bbox_head
=
dict
(
type
=
'SSD3DHead'
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
num_points
=
256
,
gt_per_seed
=
1
,
conv_channels
=
(
128
,
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.1
),
with_res_feat
=
False
,
vote_xyz_range
=
(
3.0
,
3.0
,
2.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModuleMSG'
,
num_point
=
256
,
radii
=
(
4.8
,
6.4
),
sample_nums
=
(
16
,
32
),
mlp_channels
=
((
256
,
256
,
256
,
512
),
(
256
,
256
,
512
,
1024
)),
norm_cfg
=
dict
(
type
=
'BN2d'
,
eps
=
1e-3
,
momentum
=
0.1
),
use_xyz
=
True
,
normalize_xyz
=
False
,
bias
=
True
),
pred_layer_cfg
=
dict
(
in_channels
=
1536
,
shared_conv_channels
=
(
512
,
128
),
cls_conv_channels
=
(
128
,
),
reg_conv_channels
=
(
128
,
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.1
),
bias
=
True
),
objectness_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_class_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
corner_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
vote_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
sample_mode
=
'spec'
,
pos_distance_thr
=
10.0
,
expand_dims_length
=
0.05
),
test_cfg
=
dict
(
nms_cfg
=
dict
(
type
=
'nms'
,
iou_thr
=
0.1
),
sample_mode
=
'spec'
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
max_output_num
=
100
))
mmde/configs/_base_/models/cascade-mask-rcnn_r50_fpn.py
0 → 100644
View file @
eb1107e4
# model settings
model
=
dict
(
type
=
'CascadeRCNN'
,
pretrained
=
'torchvision://resnet50'
,
_scope_
=
'mmdet'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'CascadeRoIHead'
,
num_stages
=
3
,
stage_loss_weights
=
[
1
,
0.5
,
0.25
],
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
[
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)),
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.05
,
0.05
,
0.1
,
0.1
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)),
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.033
,
0.033
,
0.067
,
0.067
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
))
],
mask_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
14
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
mask_head
=
dict
(
type
=
'FCNMaskHead'
,
num_convs
=
4
,
in_channels
=
256
,
conv_out_channels
=
256
,
num_classes
=
80
,
loss_mask
=
dict
(
type
=
'CrossEntropyLoss'
,
use_mask
=
True
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
2000
,
nms_post
=
2000
,
max_per_img
=
2000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
[
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
),
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.6
,
min_pos_iou
=
0.6
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
),
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.7
,
min_pos_iou
=
0.7
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
)
]),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1000
,
nms_post
=
1000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
,
mask_thr_binary
=
0.5
)))
mmde/configs/_base_/models/centerpoint_pillar02_second_secfpn_nus.py
0 → 100644
View file @
eb1107e4
voxel_size
=
[
0.2
,
0.2
,
8
]
model
=
dict
(
type
=
'CenterPoint'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
voxel
=
True
,
voxel_layer
=
dict
(
max_num_points
=
20
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
))),
pts_voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
in_channels
=
5
,
feat_channels
=
[
64
],
with_distance
=
False
,
voxel_size
=
(
0.2
,
0.2
,
8
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
legacy
=
False
),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
(
512
,
512
)),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
out_channels
=
[
64
,
128
,
256
],
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
conv_cfg
=
dict
(
type
=
'Conv2d'
,
bias
=
False
)),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
64
,
128
,
256
],
out_channels
=
[
128
,
128
,
128
],
upsample_strides
=
[
0.5
,
1
,
2
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
upsample_cfg
=
dict
(
type
=
'deconv'
,
bias
=
False
),
use_conv_for_no_stride
=
True
),
pts_bbox_head
=
dict
(
type
=
'CenterHead'
,
in_channels
=
sum
([
128
,
128
,
128
]),
tasks
=
[
dict
(
num_class
=
1
,
class_names
=
[
'car'
]),
dict
(
num_class
=
2
,
class_names
=
[
'truck'
,
'construction_vehicle'
]),
dict
(
num_class
=
2
,
class_names
=
[
'bus'
,
'trailer'
]),
dict
(
num_class
=
1
,
class_names
=
[
'barrier'
]),
dict
(
num_class
=
2
,
class_names
=
[
'motorcycle'
,
'bicycle'
]),
dict
(
num_class
=
2
,
class_names
=
[
'pedestrian'
,
'traffic_cone'
]),
],
common_heads
=
dict
(
reg
=
(
2
,
2
),
height
=
(
1
,
2
),
dim
=
(
3
,
2
),
rot
=
(
2
,
2
),
vel
=
(
2
,
2
)),
share_conv_channel
=
64
,
bbox_coder
=
dict
(
type
=
'CenterPointBBoxCoder'
,
post_center_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
max_num
=
500
,
score_threshold
=
0.1
,
out_size_factor
=
4
,
voxel_size
=
voxel_size
[:
2
],
code_size
=
9
),
separate_head
=
dict
(
type
=
'SeparateHead'
,
init_bias
=-
2.19
,
final_kernel
=
3
),
loss_cls
=
dict
(
type
=
'mmdet.GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'mmdet.L1Loss'
,
reduction
=
'mean'
,
loss_weight
=
0.25
),
norm_bbox
=
True
),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
512
,
512
,
1
],
voxel_size
=
voxel_size
,
out_size_factor
=
4
,
dense_reg
=
1
,
gaussian_overlap
=
0.1
,
max_objs
=
500
,
min_radius
=
2
,
code_weights
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
])),
test_cfg
=
dict
(
pts
=
dict
(
post_center_limit_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
max_per_img
=
500
,
max_pool_nms
=
False
,
min_radius
=
[
4
,
12
,
10
,
1
,
0.85
,
0.175
],
score_threshold
=
0.1
,
pc_range
=
[
-
51.2
,
-
51.2
],
out_size_factor
=
4
,
voxel_size
=
voxel_size
[:
2
],
nms_type
=
'rotate'
,
pre_max_size
=
1000
,
post_max_size
=
83
,
nms_thr
=
0.2
)))
mmde/configs/_base_/models/centerpoint_voxel01_second_secfpn_nus.py
0 → 100644
View file @
eb1107e4
voxel_size
=
[
0.1
,
0.1
,
0.2
]
model
=
dict
(
type
=
'CenterPoint'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
voxel
=
True
,
voxel_layer
=
dict
(
max_num_points
=
10
,
voxel_size
=
voxel_size
,
max_voxels
=
(
90000
,
120000
))),
pts_voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
,
num_features
=
5
),
pts_middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
5
,
sparse_shape
=
[
41
,
1024
,
1024
],
output_channels
=
128
,
order
=
(
'conv'
,
'norm'
,
'act'
),
encoder_channels
=
((
16
,
16
,
32
),
(
32
,
32
,
64
),
(
64
,
64
,
128
),
(
128
,
128
)),
encoder_paddings
=
((
0
,
0
,
1
),
(
0
,
0
,
1
),
(
0
,
0
,
[
0
,
1
,
1
]),
(
0
,
0
)),
block_type
=
'basicblock'
),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
out_channels
=
[
128
,
256
],
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
conv_cfg
=
dict
(
type
=
'Conv2d'
,
bias
=
False
)),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
out_channels
=
[
256
,
256
],
upsample_strides
=
[
1
,
2
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
upsample_cfg
=
dict
(
type
=
'deconv'
,
bias
=
False
),
use_conv_for_no_stride
=
True
),
pts_bbox_head
=
dict
(
type
=
'CenterHead'
,
in_channels
=
sum
([
256
,
256
]),
tasks
=
[
dict
(
num_class
=
1
,
class_names
=
[
'car'
]),
dict
(
num_class
=
2
,
class_names
=
[
'truck'
,
'construction_vehicle'
]),
dict
(
num_class
=
2
,
class_names
=
[
'bus'
,
'trailer'
]),
dict
(
num_class
=
1
,
class_names
=
[
'barrier'
]),
dict
(
num_class
=
2
,
class_names
=
[
'motorcycle'
,
'bicycle'
]),
dict
(
num_class
=
2
,
class_names
=
[
'pedestrian'
,
'traffic_cone'
]),
],
common_heads
=
dict
(
reg
=
(
2
,
2
),
height
=
(
1
,
2
),
dim
=
(
3
,
2
),
rot
=
(
2
,
2
),
vel
=
(
2
,
2
)),
share_conv_channel
=
64
,
bbox_coder
=
dict
(
type
=
'CenterPointBBoxCoder'
,
post_center_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
max_num
=
500
,
score_threshold
=
0.1
,
out_size_factor
=
8
,
voxel_size
=
voxel_size
[:
2
],
code_size
=
9
),
separate_head
=
dict
(
type
=
'SeparateHead'
,
init_bias
=-
2.19
,
final_kernel
=
3
),
loss_cls
=
dict
(
type
=
'mmdet.GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'mmdet.L1Loss'
,
reduction
=
'mean'
,
loss_weight
=
0.25
),
norm_bbox
=
True
),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
1024
,
1024
,
40
],
voxel_size
=
voxel_size
,
out_size_factor
=
8
,
dense_reg
=
1
,
gaussian_overlap
=
0.1
,
max_objs
=
500
,
min_radius
=
2
,
code_weights
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
])),
test_cfg
=
dict
(
pts
=
dict
(
post_center_limit_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
max_per_img
=
500
,
max_pool_nms
=
False
,
min_radius
=
[
4
,
12
,
10
,
1
,
0.85
,
0.175
],
score_threshold
=
0.1
,
out_size_factor
=
8
,
voxel_size
=
voxel_size
[:
2
],
nms_type
=
'rotate'
,
pre_max_size
=
1000
,
post_max_size
=
83
,
nms_thr
=
0.2
)))
mmde/configs/_base_/models/cylinder3d.py
0 → 100644
View file @
eb1107e4
grid_shape
=
[
480
,
360
,
32
]
model
=
dict
(
type
=
'Cylinder3D'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
voxel
=
True
,
voxel_type
=
'cylindrical'
,
voxel_layer
=
dict
(
grid_shape
=
grid_shape
,
point_cloud_range
=
[
0
,
-
3.14159265359
,
-
4
,
50
,
3.14159265359
,
2
],
max_num_points
=-
1
,
max_voxels
=-
1
,
),
),
voxel_encoder
=
dict
(
type
=
'SegVFE'
,
feat_channels
=
[
64
,
128
,
256
,
256
],
in_channels
=
6
,
with_voxel_center
=
True
,
feat_compression
=
16
,
return_point_feats
=
False
),
backbone
=
dict
(
type
=
'Asymm3DSpconv'
,
grid_size
=
grid_shape
,
input_channels
=
16
,
base_channels
=
32
,
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-5
,
momentum
=
0.1
)),
decode_head
=
dict
(
type
=
'Cylinder3DHead'
,
channels
=
128
,
num_classes
=
20
,
loss_ce
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
class_weight
=
None
,
loss_weight
=
1.0
),
loss_lovasz
=
dict
(
type
=
'LovaszLoss'
,
loss_weight
=
1.0
,
reduction
=
'none'
),
),
train_cfg
=
None
,
test_cfg
=
dict
(
mode
=
'whole'
),
)
mmde/configs/_base_/models/dgcnn.py
0 → 100644
View file @
eb1107e4
# model settings
model
=
dict
(
type
=
'EncoderDecoder3D'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
backbone
=
dict
(
type
=
'DGCNNBackbone'
,
in_channels
=
9
,
# [xyz, rgb, normal_xyz], modified with dataset
num_samples
=
(
20
,
20
,
20
),
knn_modes
=
(
'D-KNN'
,
'F-KNN'
,
'F-KNN'
),
radius
=
(
None
,
None
,
None
),
gf_channels
=
((
64
,
64
),
(
64
,
64
),
(
64
,
)),
fa_channels
=
(
1024
,
),
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
)),
decode_head
=
dict
(
type
=
'DGCNNHead'
,
fp_channels
=
(
1216
,
512
),
channels
=
256
,
dropout_ratio
=
0.5
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
act_cfg
=
dict
(
type
=
'LeakyReLU'
,
negative_slope
=
0.2
),
loss_decode
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
class_weight
=
None
,
# modified with dataset
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'slide'
))
mmde/configs/_base_/models/fcaf3d.py
0 → 100644
View file @
eb1107e4
model
=
dict
(
type
=
'MinkSingleStage3DDetector'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
backbone
=
dict
(
type
=
'MinkResNet'
,
in_channels
=
3
,
depth
=
34
),
bbox_head
=
dict
(
type
=
'FCAF3DHead'
,
in_channels
=
(
64
,
128
,
256
,
512
),
out_channels
=
128
,
voxel_size
=
.
01
,
pts_prune_threshold
=
100000
,
pts_assign_threshold
=
27
,
pts_center_threshold
=
18
,
num_classes
=
18
,
num_reg_outs
=
6
,
center_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
True
),
bbox_loss
=
dict
(
type
=
'AxisAlignedIoULoss'
),
cls_loss
=
dict
(
type
=
'mmdet.FocalLoss'
),
),
train_cfg
=
dict
(),
test_cfg
=
dict
(
nms_pre
=
1000
,
iou_thr
=
.
5
,
score_thr
=
.
01
))
mmde/configs/_base_/models/fcos3d.py
0 → 100644
View file @
eb1107e4
# model settings
model
=
dict
(
type
=
'FCOSMono3D'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
,
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
bgr_to_rgb
=
True
,
pad_size_divisor
=
32
),
backbone
=
dict
(
type
=
'mmdet.ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
,
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
'open-mmlab://detectron2/resnet101_caffe'
)),
neck
=
dict
(
type
=
'mmdet.FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
'on_output'
,
num_outs
=
5
,
relu_before_extra_convs
=
True
),
bbox_head
=
dict
(
type
=
'FCOSMono3DHead'
,
num_classes
=
10
,
in_channels
=
256
,
stacked_convs
=
2
,
feat_channels
=
256
,
use_direction_classifier
=
True
,
diff_rad_by_sin
=
True
,
pred_attrs
=
True
,
pred_velo
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
strides
=
[
8
,
16
,
32
,
64
,
128
],
group_reg_dims
=
(
2
,
1
,
3
,
1
,
2
),
# offset, depth, size, rot, velo
cls_branch
=
(
256
,
),
reg_branch
=
(
(
256
,
),
# offset
(
256
,
),
# depth
(
256
,
),
# size
(
256
,
),
# rot
()
# velo
),
dir_branch
=
(
256
,
),
attr_branch
=
(
256
,
),
loss_cls
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
bbox_coder
=
dict
(
type
=
'FCOS3DBBoxCoder'
,
code_size
=
9
),
norm_on_bbox
=
True
,
centerness_on_reg
=
True
,
center_sampling
=
True
,
conv_bias
=
True
,
dcn_on_last_conv
=
True
),
train_cfg
=
dict
(
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
0.2
,
1.0
,
1.0
,
1.0
,
1.0
,
0.05
,
0.05
],
pos_weight
=-
1
,
debug
=
False
),
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.8
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_per_img
=
200
))
mmde/configs/_base_/models/groupfree3d.py
0 → 100644
View file @
eb1107e4
model
=
dict
(
type
=
'GroupFree3DNet'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
3
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
288
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
sa_cfg
=
dict
(
type
=
'PointSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
True
)),
bbox_head
=
dict
(
type
=
'GroupFree3DHead'
,
in_channels
=
288
,
num_decoder_layers
=
6
,
num_proposal
=
256
,
transformerlayers
=
dict
(
type
=
'BaseTransformerLayer'
,
attn_cfgs
=
dict
(
type
=
'GroupFree3DMHA'
,
embed_dims
=
288
,
num_heads
=
8
,
attn_drop
=
0.1
,
dropout_layer
=
dict
(
type
=
'Dropout'
,
drop_prob
=
0.1
)),
ffn_cfgs
=
dict
(
embed_dims
=
288
,
feedforward_channels
=
2048
,
ffn_drop
=
0.1
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
)),
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)),
pred_layer_cfg
=
dict
(
in_channels
=
288
,
shared_conv_channels
=
(
288
,
288
),
bias
=
True
),
sampling_objectness_loss
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
8.0
),
objectness_loss
=
dict
(
type
=
'mmdet.FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
beta
=
1.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
semantic_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
sample_mode
=
'kps'
),
test_cfg
=
dict
(
sample_mode
=
'kps'
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
prediction_stages
=
'last'
))
mmde/configs/_base_/models/h3dnet.py
0 → 100644
View file @
eb1107e4
primitive_z_cfg
=
dict
(
type
=
'PrimitiveHead'
,
num_dims
=
2
,
num_classes
=
18
,
primitive_mode
=
'z'
,
upper_thresh
=
100.0
,
surface_thresh
=
0.5
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
1
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
1024
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
class_weight
=
[
0.4
,
0.6
],
reduction
=
'mean'
,
loss_weight
=
30.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
0.5
,
loss_dst_weight
=
0.5
),
semantic_reg_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
0.5
,
loss_dst_weight
=
0.5
),
semantic_cls_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
train_cfg
=
dict
(
sample_mode
=
'vote'
,
dist_thresh
=
0.2
,
var_thresh
=
1e-2
,
lower_thresh
=
1e-6
,
num_point
=
100
,
num_point_line
=
10
,
line_thresh
=
0.2
),
test_cfg
=
dict
(
sample_mode
=
'seed'
))
primitive_xy_cfg
=
dict
(
type
=
'PrimitiveHead'
,
num_dims
=
1
,
num_classes
=
18
,
primitive_mode
=
'xy'
,
upper_thresh
=
100.0
,
surface_thresh
=
0.5
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
1
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
1024
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
class_weight
=
[
0.4
,
0.6
],
reduction
=
'mean'
,
loss_weight
=
30.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
0.5
,
loss_dst_weight
=
0.5
),
semantic_reg_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
0.5
,
loss_dst_weight
=
0.5
),
semantic_cls_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
train_cfg
=
dict
(
sample_mode
=
'vote'
,
dist_thresh
=
0.2
,
var_thresh
=
1e-2
,
lower_thresh
=
1e-6
,
num_point
=
100
,
num_point_line
=
10
,
line_thresh
=
0.2
),
test_cfg
=
dict
(
sample_mode
=
'seed'
))
primitive_line_cfg
=
dict
(
type
=
'PrimitiveHead'
,
num_dims
=
0
,
num_classes
=
18
,
primitive_mode
=
'line'
,
upper_thresh
=
100.0
,
surface_thresh
=
0.5
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
1
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
1024
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
class_weight
=
[
0.4
,
0.6
],
reduction
=
'mean'
,
loss_weight
=
30.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
1.0
,
loss_dst_weight
=
1.0
),
semantic_reg_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
1.0
,
loss_dst_weight
=
1.0
),
semantic_cls_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
2.0
),
train_cfg
=
dict
(
sample_mode
=
'vote'
,
dist_thresh
=
0.2
,
var_thresh
=
1e-2
,
lower_thresh
=
1e-6
,
num_point
=
100
,
num_point_line
=
10
,
line_thresh
=
0.2
),
test_cfg
=
dict
(
sample_mode
=
'seed'
))
model
=
dict
(
type
=
'H3DNet'
,
data_preprocessor
=
dict
(
type
=
'Det3DDataPreprocessor'
),
backbone
=
dict
(
type
=
'MultiBackbone'
,
num_streams
=
4
,
suffixes
=
[
'net0'
,
'net1'
,
'net2'
,
'net3'
],
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-5
,
momentum
=
0.01
),
act_cfg
=
dict
(
type
=
'ReLU'
),
backbones
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
sa_cfg
=
dict
(
type
=
'PointSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
True
))),
rpn_head
=
dict
(
type
=
'VoteHead'
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
pred_layer_cfg
=
dict
(
in_channels
=
128
,
shared_conv_channels
=
(
128
,
128
),
bias
=
True
),
objectness_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
semantic_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'H3DRoIHead'
,
primitive_list
=
[
primitive_z_cfg
,
primitive_xy_cfg
,
primitive_line_cfg
],
bbox_head
=
dict
(
type
=
'H3DBboxHead'
,
gt_per_seed
=
3
,
num_proposal
=
256
,
suface_matching_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
256
*
6
,
radius
=
0.5
,
num_sample
=
32
,
mlp_channels
=
[
128
+
6
,
128
,
64
,
32
],
use_xyz
=
True
,
normalize_xyz
=
True
),
line_matching_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
256
*
12
,
radius
=
0.5
,
num_sample
=
32
,
mlp_channels
=
[
128
+
12
,
128
,
64
,
32
],
use_xyz
=
True
,
normalize_xyz
=
True
),
primitive_refine_channels
=
[
128
,
128
,
128
],
upper_thresh
=
100.0
,
surface_thresh
=
0.5
,
line_thresh
=
0.5
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
0.1
),
dir_res_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
0.1
),
size_res_loss
=
dict
(
type
=
'mmdet.SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
semantic_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
0.1
),
cues_objectness_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
class_weight
=
[
0.3
,
0.7
],
reduction
=
'mean'
,
loss_weight
=
5.0
),
cues_semantic_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
class_weight
=
[
0.3
,
0.7
],
reduction
=
'mean'
,
loss_weight
=
5.0
),
proposal_objectness_loss
=
dict
(
type
=
'mmdet.CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'none'
,
loss_weight
=
5.0
),
primitive_center_loss
=
dict
(
type
=
'mmdet.MSELoss'
,
reduction
=
'none'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mode
=
'vote'
),
rpn_proposal
=
dict
(
use_nms
=
False
),
rcnn
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mode
=
'vote'
,
far_threshold
=
0.6
,
near_threshold
=
0.3
,
mask_surface_threshold
=
0.3
,
label_surface_threshold
=
0.3
,
mask_line_threshold
=
0.3
,
label_line_threshold
=
0.3
)),
test_cfg
=
dict
(
rpn
=
dict
(
sample_mode
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
,
use_nms
=
False
),
rcnn
=
dict
(
sample_mode
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)))
Prev
1
2
3
4
5
6
7
8
…
29
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment