Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
lishj6
BEVFomer
Commits
4cd43886
Commit
4cd43886
authored
Sep 01, 2025
by
lishj6
🏸
Browse files
init
parent
a9a1fe81
Changes
207
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
2171 additions
and
0 deletions
+2171
-0
projects/configs/_base_/datasets/s3dis_seg-3d-13class.py
projects/configs/_base_/datasets/s3dis_seg-3d-13class.py
+139
-0
projects/configs/_base_/datasets/scannet-3d-18class.py
projects/configs/_base_/datasets/scannet-3d-18class.py
+128
-0
projects/configs/_base_/datasets/scannet_seg-3d-20class.py
projects/configs/_base_/datasets/scannet_seg-3d-20class.py
+132
-0
projects/configs/_base_/datasets/sunrgbd-3d-10class.py
projects/configs/_base_/datasets/sunrgbd-3d-10class.py
+107
-0
projects/configs/_base_/datasets/waymoD5-3d-3class.py
projects/configs/_base_/datasets/waymoD5-3d-3class.py
+145
-0
projects/configs/_base_/datasets/waymoD5-3d-car.py
projects/configs/_base_/datasets/waymoD5-3d-car.py
+143
-0
projects/configs/_base_/default_runtime.py
projects/configs/_base_/default_runtime.py
+18
-0
projects/configs/_base_/models/3dssd.py
projects/configs/_base_/models/3dssd.py
+77
-0
projects/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py
projects/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py
+200
-0
projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
...gs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
+83
-0
projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
...s/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
+83
-0
projects/configs/_base_/models/fcos3d.py
projects/configs/_base_/models/fcos3d.py
+74
-0
projects/configs/_base_/models/groupfree3d.py
projects/configs/_base_/models/groupfree3d.py
+71
-0
projects/configs/_base_/models/h3dnet.py
projects/configs/_base_/models/h3dnet.py
+341
-0
projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py
projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py
+22
-0
projects/configs/_base_/models/hv_pointpillars_fpn_nus.py
projects/configs/_base_/models/hv_pointpillars_fpn_nus.py
+96
-0
projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py
...onfigs/_base_/models/hv_pointpillars_fpn_range100_lyft.py
+22
-0
projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py
...cts/configs/_base_/models/hv_pointpillars_secfpn_kitti.py
+93
-0
projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py
...cts/configs/_base_/models/hv_pointpillars_secfpn_waymo.py
+108
-0
projects/configs/_base_/models/hv_second_secfpn_kitti.py
projects/configs/_base_/models/hv_second_secfpn_kitti.py
+89
-0
No files found.
projects/configs/_base_/datasets/s3dis_seg-3d-13class.py
0 → 100644
View file @
4cd43886
# dataset settings
dataset_type
=
'S3DISSegDataset'
data_root
=
'./data/s3dis/'
class_names
=
(
'ceiling'
,
'floor'
,
'wall'
,
'beam'
,
'column'
,
'window'
,
'door'
,
'table'
,
'chair'
,
'sofa'
,
'bookcase'
,
'board'
,
'clutter'
)
num_points
=
4096
train_area
=
[
1
,
2
,
3
,
4
,
6
]
test_area
=
5
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
use_color
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
,
5
]),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_mask_3d
=
False
,
with_seg_3d
=
True
),
dict
(
type
=
'PointSegClassMapping'
,
valid_cat_ids
=
tuple
(
range
(
len
(
class_names
))),
max_cat_id
=
13
),
dict
(
type
=
'IndoorPatchPointSample'
,
num_points
=
num_points
,
block_size
=
1.0
,
ignore_index
=
len
(
class_names
),
use_normalized_coord
=
True
,
enlarge_size
=
0.2
,
min_unique_num
=
None
),
dict
(
type
=
'NormalizePointsColor'
,
color_mean
=
None
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
use_color
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
,
5
]),
dict
(
type
=
'NormalizePointsColor'
,
color_mean
=
None
),
dict
(
# a wrapper in order to successfully call test function
# actually we don't perform test-time-aug
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.0
,
flip_ratio_bev_vertical
=
0.0
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
use_color
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
,
5
]),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_mask_3d
=
False
,
with_seg_3d
=
True
),
dict
(
type
=
'PointSegClassMapping'
,
valid_cat_ids
=
tuple
(
range
(
len
(
class_names
))),
max_cat_id
=
13
),
dict
(
type
=
'DefaultFormatBundle3D'
,
with_label
=
False
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
# train on area 1, 2, 3, 4, 6
# test on area 5
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_files
=
[
data_root
+
f
's3dis_infos_Area_
{
i
}
.pkl'
for
i
in
train_area
],
pipeline
=
train_pipeline
,
classes
=
class_names
,
test_mode
=
False
,
ignore_index
=
len
(
class_names
),
scene_idxs
=
[
data_root
+
f
'seg_info/Area_
{
i
}
_resampled_scene_idxs.npy'
for
i
in
train_area
]),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_files
=
data_root
+
f
's3dis_infos_Area_
{
test_area
}
.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
,
ignore_index
=
len
(
class_names
),
scene_idxs
=
data_root
+
f
'seg_info/Area_
{
test_area
}
_resampled_scene_idxs.npy'
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_files
=
data_root
+
f
's3dis_infos_Area_
{
test_area
}
.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
,
ignore_index
=
len
(
class_names
)))
evaluation
=
dict
(
pipeline
=
eval_pipeline
)
projects/configs/_base_/datasets/scannet-3d-18class.py
0 → 100644
View file @
4cd43886
# dataset settings
dataset_type
=
'ScanNetDataset'
data_root
=
'./data/scannet/'
class_names
=
(
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'garbagebin'
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_mask_3d
=
True
,
with_seg_3d
=
True
),
dict
(
type
=
'GlobalAlignment'
,
rotation_axis
=
2
),
dict
(
type
=
'PointSegClassMapping'
,
valid_cat_ids
=
(
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
14
,
16
,
24
,
28
,
33
,
34
,
36
,
39
),
max_cat_id
=
40
),
dict
(
type
=
'PointSample'
,
num_points
=
40000
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.087266
,
0.087266
],
scale_ratio_range
=
[
1.0
,
1.0
],
shift_height
=
True
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'pts_semantic_mask'
,
'pts_instance_mask'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'GlobalAlignment'
,
rotation_axis
=
2
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'PointSample'
,
num_points
=
40000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'GlobalAlignment'
,
rotation_axis
=
2
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
filter_empty_gt
=
False
,
classes
=
class_names
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
evaluation
=
dict
(
pipeline
=
eval_pipeline
)
projects/configs/_base_/datasets/scannet_seg-3d-20class.py
0 → 100644
View file @
4cd43886
# dataset settings
dataset_type
=
'ScanNetSegDataset'
data_root
=
'./data/scannet/'
class_names
=
(
'wall'
,
'floor'
,
'cabinet'
,
'bed'
,
'chair'
,
'sofa'
,
'table'
,
'door'
,
'window'
,
'bookshelf'
,
'picture'
,
'counter'
,
'desk'
,
'curtain'
,
'refrigerator'
,
'showercurtrain'
,
'toilet'
,
'sink'
,
'bathtub'
,
'otherfurniture'
)
num_points
=
8192
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
use_color
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
,
5
]),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_mask_3d
=
False
,
with_seg_3d
=
True
),
dict
(
type
=
'PointSegClassMapping'
,
valid_cat_ids
=
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
14
,
16
,
24
,
28
,
33
,
34
,
36
,
39
),
max_cat_id
=
40
),
dict
(
type
=
'IndoorPatchPointSample'
,
num_points
=
num_points
,
block_size
=
1.5
,
ignore_index
=
len
(
class_names
),
use_normalized_coord
=
False
,
enlarge_size
=
0.2
,
min_unique_num
=
None
),
dict
(
type
=
'NormalizePointsColor'
,
color_mean
=
None
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
use_color
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
,
5
]),
dict
(
type
=
'NormalizePointsColor'
,
color_mean
=
None
),
dict
(
# a wrapper in order to successfully call test function
# actually we don't perform test-time-aug
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.0
,
flip_ratio_bev_vertical
=
0.0
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
# we need to load gt seg_mask!
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
use_color
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
,
5
]),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
False
,
with_label_3d
=
False
,
with_mask_3d
=
False
,
with_seg_3d
=
True
),
dict
(
type
=
'PointSegClassMapping'
,
valid_cat_ids
=
(
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
14
,
16
,
24
,
28
,
33
,
34
,
36
,
39
),
max_cat_id
=
40
),
dict
(
type
=
'DefaultFormatBundle3D'
,
with_label
=
False
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'pts_semantic_mask'
])
]
data
=
dict
(
samples_per_gpu
=
8
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
test_mode
=
False
,
ignore_index
=
len
(
class_names
),
scene_idxs
=
data_root
+
'seg_info/train_resampled_scene_idxs.npy'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
,
ignore_index
=
len
(
class_names
)),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'scannet_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
,
ignore_index
=
len
(
class_names
)))
evaluation
=
dict
(
pipeline
=
eval_pipeline
)
projects/configs/_base_/datasets/sunrgbd-3d-10class.py
0 → 100644
View file @
4cd43886
dataset_type
=
'SUNRGBDDataset'
data_root
=
'data/sunrgbd/'
class_names
=
(
'bed'
,
'table'
,
'sofa'
,
'chair'
,
'toilet'
,
'desk'
,
'dresser'
,
'night_stand'
,
'bookshelf'
,
'bathtub'
)
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'LoadAnnotations3D'
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.523599
,
0.523599
],
scale_ratio_range
=
[
0.85
,
1.15
],
shift_height
=
True
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
True
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
),
dict
(
type
=
'PointSample'
,
num_points
=
20000
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'DEPTH'
,
shift_height
=
False
,
load_dim
=
6
,
use_dim
=
[
0
,
1
,
2
]),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
16
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
5
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
filter_empty_gt
=
False
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'Depth'
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
,
box_type_3d
=
'Depth'
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'sunrgbd_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
test_mode
=
True
,
box_type_3d
=
'Depth'
))
evaluation
=
dict
(
pipeline
=
eval_pipeline
)
projects/configs/_base_/datasets/waymoD5-3d-3class.py
0 → 100644
View file @
4cd43886
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type
=
'LidarWaymoDataset'
data_root
=
'data/waymo-full/kitti_format/'
file_client_args
=
dict
(
backend
=
'disk'
)
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
class_names
=
[
'Car'
,
'Pedestrian'
,
'Cyclist'
]
point_cloud_range
=
[
-
74.88
,
-
74.88
,
-
2
,
74.88
,
74.88
,
4
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'waymo_dbinfos_train.pkl'
,
rate
=
1.0
,
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
)),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
,
Pedestrian
=
10
,
Cyclist
=
10
),
points_loader
=
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
file_client_args
=
file_client_args
))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.78539816
,
0.78539816
],
scale_ratio_range
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'waymo_infos_train.pkl'
,
split
=
'training'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
# load one frame every five frames
load_interval
=
5
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'waymo_infos_val.pkl'
,
split
=
'training'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'waymo_infos_val.pkl'
,
split
=
'training'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
))
evaluation
=
dict
(
interval
=
24
,
pipeline
=
eval_pipeline
)
projects/configs/_base_/datasets/waymoD5-3d-car.py
0 → 100644
View file @
4cd43886
# dataset settings
# D5 in the config name means the whole dataset is divided into 5 folds
# We only use one fold for efficient experiments
dataset_type
=
'WaymoDataset'
data_root
=
'data/waymo/kitti_format/'
file_client_args
=
dict
(
backend
=
'disk'
)
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://waymo_data/'))
class_names
=
[
'Car'
]
point_cloud_range
=
[
-
74.88
,
-
74.88
,
-
2
,
74.88
,
74.88
,
4
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'waymo_dbinfos_train.pkl'
,
rate
=
1.0
,
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
)),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
points_loader
=
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
[
0
,
1
,
2
,
3
,
4
],
file_client_args
=
file_client_args
))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'RandomFlip3D'
,
sync_2d
=
False
,
flip_ratio_bev_horizontal
=
0.5
,
flip_ratio_bev_vertical
=
0.5
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.78539816
,
0.78539816
],
scale_ratio_range
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
6
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'waymo_infos_train.pkl'
,
split
=
'training'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
,
# load one frame every five frames
load_interval
=
5
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'waymo_infos_val.pkl'
,
split
=
'training'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'waymo_infos_val.pkl'
,
split
=
'training'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
))
evaluation
=
dict
(
interval
=
24
,
pipeline
=
eval_pipeline
)
projects/configs/_base_/default_runtime.py
0 → 100644
View file @
4cd43886
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable push
# By default we use textlogger hook and tensorboard
# For more loggers see
# https://mmcv.readthedocs.io/en/latest/api.html#mmcv.runner.LoggerHook
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
None
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
projects/configs/_base_/models/3dssd.py
0 → 100644
View file @
4cd43886
model
=
dict
(
type
=
'SSD3DNet'
,
backbone
=
dict
(
type
=
'PointNet2SAMSG'
,
in_channels
=
4
,
num_points
=
(
4096
,
512
,
(
256
,
256
)),
radii
=
((
0.2
,
0.4
,
0.8
),
(
0.4
,
0.8
,
1.6
),
(
1.6
,
3.2
,
4.8
)),
num_samples
=
((
32
,
32
,
64
),
(
32
,
32
,
64
),
(
32
,
32
,
32
)),
sa_channels
=
(((
16
,
16
,
32
),
(
16
,
16
,
32
),
(
32
,
32
,
64
)),
((
64
,
64
,
128
),
(
64
,
64
,
128
),
(
64
,
96
,
128
)),
((
128
,
128
,
256
),
(
128
,
192
,
256
),
(
128
,
256
,
256
))),
aggregation_channels
=
(
64
,
128
,
256
),
fps_mods
=
((
'D-FPS'
),
(
'FS'
),
(
'F-FPS'
,
'D-FPS'
)),
fps_sample_range_lists
=
((
-
1
),
(
-
1
),
(
512
,
-
1
)),
norm_cfg
=
dict
(
type
=
'BN2d'
,
eps
=
1e-3
,
momentum
=
0.1
),
sa_cfg
=
dict
(
type
=
'PointSAModuleMSG'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
False
)),
bbox_head
=
dict
(
type
=
'SSD3DHead'
,
in_channels
=
256
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
num_points
=
256
,
gt_per_seed
=
1
,
conv_channels
=
(
128
,
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.1
),
with_res_feat
=
False
,
vote_xyz_range
=
(
3.0
,
3.0
,
2.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModuleMSG'
,
num_point
=
256
,
radii
=
(
4.8
,
6.4
),
sample_nums
=
(
16
,
32
),
mlp_channels
=
((
256
,
256
,
256
,
512
),
(
256
,
256
,
512
,
1024
)),
norm_cfg
=
dict
(
type
=
'BN2d'
,
eps
=
1e-3
,
momentum
=
0.1
),
use_xyz
=
True
,
normalize_xyz
=
False
,
bias
=
True
),
pred_layer_cfg
=
dict
(
in_channels
=
1536
,
shared_conv_channels
=
(
512
,
128
),
cls_conv_channels
=
(
128
,
),
reg_conv_channels
=
(
128
,
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.1
),
bias
=
True
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.1
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
corner_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
vote_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
sample_mod
=
'spec'
,
pos_distance_thr
=
10.0
,
expand_dims_length
=
0.05
),
test_cfg
=
dict
(
nms_cfg
=
dict
(
type
=
'nms'
,
iou_thr
=
0.1
),
sample_mod
=
'spec'
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
max_output_num
=
100
))
projects/configs/_base_/models/cascade_mask_rcnn_r50_fpn.py
0 → 100644
View file @
4cd43886
# model settings
model
=
dict
(
type
=
'CascadeRCNN'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'CascadeRoIHead'
,
num_stages
=
3
,
stage_loss_weights
=
[
1
,
0.5
,
0.25
],
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
[
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)),
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.05
,
0.05
,
0.1
,
0.1
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
)),
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.033
,
0.033
,
0.067
,
0.067
]),
reg_class_agnostic
=
True
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
loss_weight
=
1.0
))
],
mask_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
14
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
mask_head
=
dict
(
type
=
'FCNMaskHead'
,
num_convs
=
4
,
in_channels
=
256
,
conv_out_channels
=
256
,
num_classes
=
80
,
loss_mask
=
dict
(
type
=
'CrossEntropyLoss'
,
use_mask
=
True
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
2000
,
nms_post
=
2000
,
max_num
=
2000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
[
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
),
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.6
,
min_pos_iou
=
0.6
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
),
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.7
,
min_pos_iou
=
0.7
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
)
]),
test_cfg
=
dict
(
rpn
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
,
mask_thr_binary
=
0.5
)))
projects/configs/_base_/models/centerpoint_01voxel_second_secfpn_nus.py
0 → 100644
View file @
4cd43886
voxel_size
=
[
0.1
,
0.1
,
0.2
]
model
=
dict
(
type
=
'CenterPoint'
,
pts_voxel_layer
=
dict
(
max_num_points
=
10
,
voxel_size
=
voxel_size
,
max_voxels
=
(
90000
,
120000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
,
num_features
=
5
),
pts_middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
5
,
sparse_shape
=
[
41
,
1024
,
1024
],
output_channels
=
128
,
order
=
(
'conv'
,
'norm'
,
'act'
),
encoder_channels
=
((
16
,
16
,
32
),
(
32
,
32
,
64
),
(
64
,
64
,
128
),
(
128
,
128
)),
encoder_paddings
=
((
0
,
0
,
1
),
(
0
,
0
,
1
),
(
0
,
0
,
[
0
,
1
,
1
]),
(
0
,
0
)),
block_type
=
'basicblock'
),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
out_channels
=
[
128
,
256
],
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
conv_cfg
=
dict
(
type
=
'Conv2d'
,
bias
=
False
)),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
out_channels
=
[
256
,
256
],
upsample_strides
=
[
1
,
2
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
upsample_cfg
=
dict
(
type
=
'deconv'
,
bias
=
False
),
use_conv_for_no_stride
=
True
),
pts_bbox_head
=
dict
(
type
=
'CenterHead'
,
in_channels
=
sum
([
256
,
256
]),
tasks
=
[
dict
(
num_class
=
1
,
class_names
=
[
'car'
]),
dict
(
num_class
=
2
,
class_names
=
[
'truck'
,
'construction_vehicle'
]),
dict
(
num_class
=
2
,
class_names
=
[
'bus'
,
'trailer'
]),
dict
(
num_class
=
1
,
class_names
=
[
'barrier'
]),
dict
(
num_class
=
2
,
class_names
=
[
'motorcycle'
,
'bicycle'
]),
dict
(
num_class
=
2
,
class_names
=
[
'pedestrian'
,
'traffic_cone'
]),
],
common_heads
=
dict
(
reg
=
(
2
,
2
),
height
=
(
1
,
2
),
dim
=
(
3
,
2
),
rot
=
(
2
,
2
),
vel
=
(
2
,
2
)),
share_conv_channel
=
64
,
bbox_coder
=
dict
(
type
=
'CenterPointBBoxCoder'
,
post_center_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
max_num
=
500
,
score_threshold
=
0.1
,
out_size_factor
=
8
,
voxel_size
=
voxel_size
[:
2
],
code_size
=
9
),
separate_head
=
dict
(
type
=
'SeparateHead'
,
init_bias
=-
2.19
,
final_kernel
=
3
),
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
reduction
=
'mean'
,
loss_weight
=
0.25
),
norm_bbox
=
True
),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
1024
,
1024
,
40
],
voxel_size
=
voxel_size
,
out_size_factor
=
8
,
dense_reg
=
1
,
gaussian_overlap
=
0.1
,
max_objs
=
500
,
min_radius
=
2
,
code_weights
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
])),
test_cfg
=
dict
(
pts
=
dict
(
post_center_limit_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
max_per_img
=
500
,
max_pool_nms
=
False
,
min_radius
=
[
4
,
12
,
10
,
1
,
0.85
,
0.175
],
score_threshold
=
0.1
,
out_size_factor
=
8
,
voxel_size
=
voxel_size
[:
2
],
nms_type
=
'rotate'
,
pre_max_size
=
1000
,
post_max_size
=
83
,
nms_thr
=
0.2
)))
projects/configs/_base_/models/centerpoint_02pillar_second_secfpn_nus.py
0 → 100644
View file @
4cd43886
voxel_size
=
[
0.2
,
0.2
,
8
]
model
=
dict
(
type
=
'CenterPoint'
,
pts_voxel_layer
=
dict
(
max_num_points
=
20
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
in_channels
=
5
,
feat_channels
=
[
64
],
with_distance
=
False
,
voxel_size
=
(
0.2
,
0.2
,
8
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
legacy
=
False
),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
(
512
,
512
)),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
out_channels
=
[
64
,
128
,
256
],
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
conv_cfg
=
dict
(
type
=
'Conv2d'
,
bias
=
False
)),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
64
,
128
,
256
],
out_channels
=
[
128
,
128
,
128
],
upsample_strides
=
[
0.5
,
1
,
2
],
norm_cfg
=
dict
(
type
=
'BN'
,
eps
=
1e-3
,
momentum
=
0.01
),
upsample_cfg
=
dict
(
type
=
'deconv'
,
bias
=
False
),
use_conv_for_no_stride
=
True
),
pts_bbox_head
=
dict
(
type
=
'CenterHead'
,
in_channels
=
sum
([
128
,
128
,
128
]),
tasks
=
[
dict
(
num_class
=
1
,
class_names
=
[
'car'
]),
dict
(
num_class
=
2
,
class_names
=
[
'truck'
,
'construction_vehicle'
]),
dict
(
num_class
=
2
,
class_names
=
[
'bus'
,
'trailer'
]),
dict
(
num_class
=
1
,
class_names
=
[
'barrier'
]),
dict
(
num_class
=
2
,
class_names
=
[
'motorcycle'
,
'bicycle'
]),
dict
(
num_class
=
2
,
class_names
=
[
'pedestrian'
,
'traffic_cone'
]),
],
common_heads
=
dict
(
reg
=
(
2
,
2
),
height
=
(
1
,
2
),
dim
=
(
3
,
2
),
rot
=
(
2
,
2
),
vel
=
(
2
,
2
)),
share_conv_channel
=
64
,
bbox_coder
=
dict
(
type
=
'CenterPointBBoxCoder'
,
post_center_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
max_num
=
500
,
score_threshold
=
0.1
,
out_size_factor
=
4
,
voxel_size
=
voxel_size
[:
2
],
code_size
=
9
),
separate_head
=
dict
(
type
=
'SeparateHead'
,
init_bias
=-
2.19
,
final_kernel
=
3
),
loss_cls
=
dict
(
type
=
'GaussianFocalLoss'
,
reduction
=
'mean'
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
reduction
=
'mean'
,
loss_weight
=
0.25
),
norm_bbox
=
True
),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
512
,
512
,
1
],
voxel_size
=
voxel_size
,
out_size_factor
=
4
,
dense_reg
=
1
,
gaussian_overlap
=
0.1
,
max_objs
=
500
,
min_radius
=
2
,
code_weights
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
])),
test_cfg
=
dict
(
pts
=
dict
(
post_center_limit_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
max_per_img
=
500
,
max_pool_nms
=
False
,
min_radius
=
[
4
,
12
,
10
,
1
,
0.85
,
0.175
],
score_threshold
=
0.1
,
pc_range
=
[
-
51.2
,
-
51.2
],
out_size_factor
=
4
,
voxel_size
=
voxel_size
[:
2
],
nms_type
=
'rotate'
,
pre_max_size
=
1000
,
post_max_size
=
83
,
nms_thr
=
0.2
)))
projects/configs/_base_/models/fcos3d.py
0 → 100644
View file @
4cd43886
model
=
dict
(
type
=
'FCOSMono3D'
,
pretrained
=
'open-mmlab://detectron2/resnet101_caffe'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
start_level
=
1
,
add_extra_convs
=
'on_output'
,
num_outs
=
5
,
relu_before_extra_convs
=
True
),
bbox_head
=
dict
(
type
=
'FCOSMono3DHead'
,
num_classes
=
10
,
in_channels
=
256
,
stacked_convs
=
2
,
feat_channels
=
256
,
use_direction_classifier
=
True
,
diff_rad_by_sin
=
True
,
pred_attrs
=
True
,
pred_velo
=
True
,
dir_offset
=
0.7854
,
# pi/4
strides
=
[
8
,
16
,
32
,
64
,
128
],
group_reg_dims
=
(
2
,
1
,
3
,
1
,
2
),
# offset, depth, size, rot, velo
cls_branch
=
(
256
,
),
reg_branch
=
(
(
256
,
),
# offset
(
256
,
),
# depth
(
256
,
),
# size
(
256
,
),
# rot
()
# velo
),
dir_branch
=
(
256
,
),
attr_branch
=
(
256
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_attr
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_centerness
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
norm_on_bbox
=
True
,
centerness_on_reg
=
True
,
center_sampling
=
True
,
conv_bias
=
True
,
dcn_on_last_conv
=
True
),
train_cfg
=
dict
(
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
0.2
,
1.0
,
1.0
,
1.0
,
1.0
,
0.05
,
0.05
],
pos_weight
=-
1
,
debug
=
False
),
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.8
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_per_img
=
200
))
projects/configs/_base_/models/groupfree3d.py
0 → 100644
View file @
4cd43886
model
=
dict
(
type
=
'GroupFree3DNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
3
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
288
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
sa_cfg
=
dict
(
type
=
'PointSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
True
)),
bbox_head
=
dict
(
type
=
'GroupFree3DHead'
,
in_channels
=
288
,
num_decoder_layers
=
6
,
num_proposal
=
256
,
transformerlayers
=
dict
(
type
=
'BaseTransformerLayer'
,
attn_cfgs
=
dict
(
type
=
'GroupFree3DMHA'
,
embed_dims
=
288
,
num_heads
=
8
,
attn_drop
=
0.1
,
dropout_layer
=
dict
(
type
=
'Dropout'
,
drop_prob
=
0.1
)),
ffn_cfgs
=
dict
(
embed_dims
=
288
,
feedforward_channels
=
2048
,
ffn_drop
=
0.1
,
act_cfg
=
dict
(
type
=
'ReLU'
,
inplace
=
True
)),
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)),
pred_layer_cfg
=
dict
(
in_channels
=
288
,
shared_conv_channels
=
(
288
,
288
),
bias
=
True
),
sampling_objectness_loss
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
8.0
),
objectness_loss
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
center_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
sample_mod
=
'kps'
),
test_cfg
=
dict
(
sample_mod
=
'kps'
,
nms_thr
=
0.25
,
score_thr
=
0.0
,
per_class_proposal
=
True
,
prediction_stages
=
'last'
))
projects/configs/_base_/models/h3dnet.py
0 → 100644
View file @
4cd43886
primitive_z_cfg
=
dict
(
type
=
'PrimitiveHead'
,
num_dims
=
2
,
num_classes
=
18
,
primitive_mode
=
'z'
,
upper_thresh
=
100.0
,
surface_thresh
=
0.5
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
1
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
1024
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.4
,
0.6
],
reduction
=
'mean'
,
loss_weight
=
30.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
0.5
,
loss_dst_weight
=
0.5
),
semantic_reg_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
0.5
,
loss_dst_weight
=
0.5
),
semantic_cls_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
train_cfg
=
dict
(
dist_thresh
=
0.2
,
var_thresh
=
1e-2
,
lower_thresh
=
1e-6
,
num_point
=
100
,
num_point_line
=
10
,
line_thresh
=
0.2
))
primitive_xy_cfg
=
dict
(
type
=
'PrimitiveHead'
,
num_dims
=
1
,
num_classes
=
18
,
primitive_mode
=
'xy'
,
upper_thresh
=
100.0
,
surface_thresh
=
0.5
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
1
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
1024
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.4
,
0.6
],
reduction
=
'mean'
,
loss_weight
=
30.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
0.5
,
loss_dst_weight
=
0.5
),
semantic_reg_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
0.5
,
loss_dst_weight
=
0.5
),
semantic_cls_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
train_cfg
=
dict
(
dist_thresh
=
0.2
,
var_thresh
=
1e-2
,
lower_thresh
=
1e-6
,
num_point
=
100
,
num_point_line
=
10
,
line_thresh
=
0.2
))
primitive_line_cfg
=
dict
(
type
=
'PrimitiveHead'
,
num_dims
=
0
,
num_classes
=
18
,
primitive_mode
=
'line'
,
upper_thresh
=
100.0
,
surface_thresh
=
0.5
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
1
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
1024
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.4
,
0.6
],
reduction
=
'mean'
,
loss_weight
=
30.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
1.0
,
loss_dst_weight
=
1.0
),
semantic_reg_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'sum'
,
loss_src_weight
=
1.0
,
loss_dst_weight
=
1.0
),
semantic_cls_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
2.0
),
train_cfg
=
dict
(
dist_thresh
=
0.2
,
var_thresh
=
1e-2
,
lower_thresh
=
1e-6
,
num_point
=
100
,
num_point_line
=
10
,
line_thresh
=
0.2
))
model
=
dict
(
type
=
'H3DNet'
,
backbone
=
dict
(
type
=
'MultiBackbone'
,
num_streams
=
4
,
suffixes
=
[
'net0'
,
'net1'
,
'net2'
,
'net3'
],
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-5
,
momentum
=
0.01
),
act_cfg
=
dict
(
type
=
'ReLU'
),
backbones
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
sa_cfg
=
dict
(
type
=
'PointSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
True
))),
rpn_head
=
dict
(
type
=
'VoteHead'
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
pred_layer_cfg
=
dict
(
in_channels
=
128
,
shared_conv_channels
=
(
128
,
128
),
bias
=
True
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'H3DRoIHead'
,
primitive_list
=
[
primitive_z_cfg
,
primitive_xy_cfg
,
primitive_line_cfg
],
bbox_head
=
dict
(
type
=
'H3DBboxHead'
,
gt_per_seed
=
3
,
num_proposal
=
256
,
suface_matching_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
256
*
6
,
radius
=
0.5
,
num_sample
=
32
,
mlp_channels
=
[
128
+
6
,
128
,
64
,
32
],
use_xyz
=
True
,
normalize_xyz
=
True
),
line_matching_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
256
*
12
,
radius
=
0.5
,
num_sample
=
32
,
mlp_channels
=
[
128
+
12
,
128
,
64
,
32
],
use_xyz
=
True
,
normalize_xyz
=
True
),
feat_channels
=
(
128
,
128
),
primitive_refine_channels
=
[
128
,
128
,
128
],
upper_thresh
=
100.0
,
surface_thresh
=
0.5
,
line_thresh
=
0.5
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
0.1
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
0.1
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
0.1
),
cues_objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.3
,
0.7
],
reduction
=
'mean'
,
loss_weight
=
5.0
),
cues_semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.3
,
0.7
],
reduction
=
'mean'
,
loss_weight
=
5.0
),
proposal_objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'none'
,
loss_weight
=
5.0
),
primitive_center_loss
=
dict
(
type
=
'MSELoss'
,
reduction
=
'none'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
),
rpn_proposal
=
dict
(
use_nms
=
False
),
rcnn
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
,
far_threshold
=
0.6
,
near_threshold
=
0.3
,
mask_surface_threshold
=
0.3
,
label_surface_threshold
=
0.3
,
mask_line_threshold
=
0.3
,
label_line_threshold
=
0.3
)),
test_cfg
=
dict
(
rpn
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
,
use_nms
=
False
),
rcnn
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
)))
projects/configs/_base_/models/hv_pointpillars_fpn_lyft.py
0 → 100644
View file @
4cd43886
_base_
=
'./hv_pointpillars_fpn_nus.py'
# model settings (based on nuScenes model settings)
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
model
=
dict
(
pts_voxel_layer
=
dict
(
max_num_points
=
20
,
point_cloud_range
=
[
-
80
,
-
80
,
-
5
,
80
,
80
,
3
],
max_voxels
=
(
60000
,
60000
)),
pts_voxel_encoder
=
dict
(
feat_channels
=
[
64
],
point_cloud_range
=
[
-
80
,
-
80
,
-
5
,
80
,
80
,
3
]),
pts_middle_encoder
=
dict
(
output_shape
=
[
640
,
640
]),
pts_bbox_head
=
dict
(
num_classes
=
9
,
anchor_generator
=
dict
(
ranges
=
[[
-
80
,
-
80
,
-
1.8
,
80
,
80
,
-
1.8
]],
custom_values
=
[]),
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
7
)),
# model training settings (based on nuScenes model settings)
train_cfg
=
dict
(
pts
=
dict
(
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
])))
projects/configs/_base_/models/hv_pointpillars_fpn_nus.py
0 → 100644
View file @
4cd43886
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size
=
[
0.25
,
0.25
,
8
]
model
=
dict
(
type
=
'MVXFasterRCNN'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
],
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
],
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
]),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
pts_neck
=
dict
(
type
=
'FPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
act_cfg
=
dict
(
type
=
'ReLU'
),
in_channels
=
[
64
,
128
,
256
],
out_channels
=
256
,
start_level
=
0
,
num_outs
=
3
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
256
,
feat_channels
=
256
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
50
,
-
50
,
-
1.8
,
50
,
50
,
-
1.8
]],
scales
=
[
1
,
2
,
4
],
sizes
=
[
[
0.8660
,
2.5981
,
1.
],
# 1.5/sqrt(3)
[
0.5774
,
1.7321
,
1.
],
# 1/sqrt(3)
[
1.
,
1.
,
1.
],
[
0.4
,
0.4
,
1
],
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
)))
projects/configs/_base_/models/hv_pointpillars_fpn_range100_lyft.py
0 → 100644
View file @
4cd43886
_base_
=
'./hv_pointpillars_fpn_nus.py'
# model settings (based on nuScenes model settings)
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
model
=
dict
(
pts_voxel_layer
=
dict
(
max_num_points
=
20
,
point_cloud_range
=
[
-
100
,
-
100
,
-
5
,
100
,
100
,
3
],
max_voxels
=
(
60000
,
60000
)),
pts_voxel_encoder
=
dict
(
feat_channels
=
[
64
],
point_cloud_range
=
[
-
100
,
-
100
,
-
5
,
100
,
100
,
3
]),
pts_middle_encoder
=
dict
(
output_shape
=
[
800
,
800
]),
pts_bbox_head
=
dict
(
num_classes
=
9
,
anchor_generator
=
dict
(
ranges
=
[[
-
100
,
-
100
,
-
1.8
,
100
,
100
,
-
1.8
]],
custom_values
=
[]),
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
7
)),
# model training settings (based on nuScenes model settings)
train_cfg
=
dict
(
pts
=
dict
(
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
])))
projects/configs/_base_/models/hv_pointpillars_secfpn_kitti.py
0 → 100644
View file @
4cd43886
voxel_size
=
[
0.16
,
0.16
,
4
]
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
32
,
# max_points_per_voxel
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
],
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_voxels
),
voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
in_channels
=
4
,
feat_channels
=
[
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
]),
middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
496
,
432
]),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
3
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
39.68
,
-
0.6
,
70.4
,
39.68
,
-
0.6
],
[
0
,
-
39.68
,
-
0.6
,
70.4
,
39.68
,
-
0.6
],
[
0
,
-
39.68
,
-
1.78
,
70.4
,
39.68
,
-
1.78
],
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
# model training and testing settings
train_cfg
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
))
projects/configs/_base_/models/hv_pointpillars_secfpn_waymo.py
0 → 100644
View file @
4cd43886
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size
=
[
0.32
,
0.32
,
6
]
model
=
dict
(
type
=
'MVXFasterRCNN'
,
pts_voxel_layer
=
dict
(
max_num_points
=
20
,
point_cloud_range
=
[
-
74.88
,
-
74.88
,
-
2
,
74.88
,
74.88
,
4
],
voxel_size
=
voxel_size
,
max_voxels
=
(
32000
,
32000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
5
,
feat_channels
=
[
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
[
-
74.88
,
-
74.88
,
-
2
,
74.88
,
74.88
,
4
],
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
468
,
468
]),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
1
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
3
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
74.88
,
-
74.88
,
-
0.0345
,
74.88
,
74.88
,
-
0.0345
],
[
-
74.88
,
-
74.88
,
-
0.1188
,
74.88
,
74.88
,
-
0.1188
],
[
-
74.88
,
-
74.88
,
0
,
74.88
,
74.88
,
0
]],
sizes
=
[
[
2.08
,
4.73
,
1.77
],
# car
[
0.84
,
1.81
,
1.77
],
# cyclist
[
0.84
,
0.91
,
1.74
]
# pedestrian
],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
7
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
[
dict
(
# car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0.4
,
ignore_iof_thr
=-
1
),
dict
(
# cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
dict
(
# pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
],
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
4096
,
nms_thr
=
0.25
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
max_num
=
500
)))
projects/configs/_base_/models/hv_second_secfpn_kitti.py
0 → 100644
View file @
4cd43886
voxel_size
=
[
0.05
,
0.05
,
0.1
]
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
],
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
# model training and testing settings
train_cfg
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
))
Prev
1
2
3
4
5
6
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment