Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
lishj6
BEVFomer
Commits
4cd43886
Commit
4cd43886
authored
Sep 01, 2025
by
lishj6
🏸
Browse files
init
parent
a9a1fe81
Changes
207
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1386 additions
and
0 deletions
+1386
-0
projects/configs/_base_/models/hv_second_secfpn_waymo.py
projects/configs/_base_/models/hv_second_secfpn_waymo.py
+100
-0
projects/configs/_base_/models/imvotenet_image.py
projects/configs/_base_/models/imvotenet_image.py
+108
-0
projects/configs/_base_/models/mask_rcnn_r50_fpn.py
projects/configs/_base_/models/mask_rcnn_r50_fpn.py
+124
-0
projects/configs/_base_/models/paconv_cuda_ssg.py
projects/configs/_base_/models/paconv_cuda_ssg.py
+7
-0
projects/configs/_base_/models/paconv_ssg.py
projects/configs/_base_/models/paconv_ssg.py
+49
-0
projects/configs/_base_/models/parta2.py
projects/configs/_base_/models/parta2.py
+201
-0
projects/configs/_base_/models/pointnet2_msg.py
projects/configs/_base_/models/pointnet2_msg.py
+28
-0
projects/configs/_base_/models/pointnet2_ssg.py
projects/configs/_base_/models/pointnet2_ssg.py
+35
-0
projects/configs/_base_/models/votenet.py
projects/configs/_base_/models/votenet.py
+73
-0
projects/configs/_base_/schedules/cosine.py
projects/configs/_base_/schedules/cosine.py
+20
-0
projects/configs/_base_/schedules/cyclic_20e.py
projects/configs/_base_/schedules/cyclic_20e.py
+24
-0
projects/configs/_base_/schedules/cyclic_40e.py
projects/configs/_base_/schedules/cyclic_40e.py
+31
-0
projects/configs/_base_/schedules/mmdet_schedule_1x.py
projects/configs/_base_/schedules/mmdet_schedule_1x.py
+11
-0
projects/configs/_base_/schedules/schedule_2x.py
projects/configs/_base_/schedules/schedule_2x.py
+14
-0
projects/configs/_base_/schedules/schedule_3x.py
projects/configs/_base_/schedules/schedule_3x.py
+9
-0
projects/configs/_base_/schedules/seg_cosine_150e.py
projects/configs/_base_/schedules/seg_cosine_150e.py
+9
-0
projects/configs/_base_/schedules/seg_cosine_200e.py
projects/configs/_base_/schedules/seg_cosine_200e.py
+9
-0
projects/configs/_base_/schedules/seg_cosine_50e.py
projects/configs/_base_/schedules/seg_cosine_50e.py
+9
-0
projects/configs/bevformer/bevformer_base.py
projects/configs/bevformer/bevformer_base.py
+257
-0
projects/configs/bevformer/bevformer_small.py
projects/configs/bevformer/bevformer_small.py
+268
-0
No files found.
projects/configs/_base_/models/hv_second_secfpn_waymo.py
0 → 100644
View file @
4cd43886
# model settings
# Voxel size for voxel encoder
# Usually voxel size is changed consistently with the point cloud range
# If point cloud range is modified, do remember to change all related
# keys in the config.
voxel_size
=
[
0.08
,
0.08
,
0.1
]
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
10
,
point_cloud_range
=
[
-
76.8
,
-
51.2
,
-
2
,
76.8
,
51.2
,
4
],
voxel_size
=
voxel_size
,
max_voxels
=
(
80000
,
90000
)),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
,
num_features
=
5
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
5
,
sparse_shape
=
[
61
,
1280
,
1920
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
384
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
76.8
,
-
51.2
,
-
0.0345
,
76.8
,
51.2
,
-
0.0345
],
[
-
76.8
,
-
51.2
,
0
,
76.8
,
51.2
,
0
],
[
-
76.8
,
-
51.2
,
-
0.1188
,
76.8
,
51.2
,
-
0.1188
]],
sizes
=
[
[
2.08
,
4.73
,
1.77
],
# car
[
0.84
,
0.91
,
1.74
],
# pedestrian
[
0.84
,
1.81
,
1.77
]
# cyclist
],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
7
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
# model training and testing settings
train_cfg
=
dict
(
assigner
=
[
dict
(
# car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.4
,
min_pos_iou
=
0.4
,
ignore_iof_thr
=-
1
),
dict
(
# pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
dict
(
# cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
)
],
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
],
pos_weight
=-
1
,
debug
=
False
),
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
4096
,
nms_thr
=
0.25
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
max_num
=
500
))
projects/configs/_base_/models/imvotenet_image.py
0 → 100644
View file @
4cd43886
model
=
dict
(
type
=
'ImVoteNet'
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
),
img_neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
img_rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
img_roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
img_rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
img_rpn_proposal
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
2000
,
nms_post
=
1000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
img_rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
False
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
img_rpn
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_post
=
1000
,
max_per_img
=
1000
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.7
),
min_bbox_size
=
0
),
img_rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
)))
projects/configs/_base_/models/mask_rcnn_r50_fpn.py
0 → 100644
View file @
4cd43886
# model settings
model
=
dict
(
type
=
'MaskRCNN'
,
pretrained
=
'torchvision://resnet50'
,
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
0
,
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
True
),
norm_eval
=
True
,
style
=
'pytorch'
),
neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
256
,
512
,
1024
,
2048
],
out_channels
=
256
,
num_outs
=
5
),
rpn_head
=
dict
(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
roi_head
=
dict
(
type
=
'StandardRoIHead'
,
bbox_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
7
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
bbox_head
=
dict
(
type
=
'Shared2FCBBoxHead'
,
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
mask_roi_extractor
=
dict
(
type
=
'SingleRoIExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAlign'
,
output_size
=
14
,
sampling_ratio
=
0
),
out_channels
=
256
,
featmap_strides
=
[
4
,
8
,
16
,
32
]),
mask_head
=
dict
(
type
=
'FCNMaskHead'
,
num_convs
=
4
,
in_channels
=
256
,
conv_out_channels
=
256
,
num_classes
=
80
,
loss_mask
=
dict
(
type
=
'CrossEntropyLoss'
,
use_mask
=
True
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.7
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
256
,
pos_fraction
=
0.5
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
),
allowed_border
=-
1
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
2000
,
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.5
,
min_pos_iou
=
0.5
,
match_low_quality
=
True
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'RandomSampler'
,
num
=
512
,
pos_fraction
=
0.25
,
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
True
),
mask_size
=
28
,
pos_weight
=-
1
,
debug
=
False
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_post
=
1000
,
max_num
=
1000
,
nms_thr
=
0.7
,
min_bbox_size
=
0
),
rcnn
=
dict
(
score_thr
=
0.05
,
nms
=
dict
(
type
=
'nms'
,
iou_threshold
=
0.5
),
max_per_img
=
100
,
mask_thr_binary
=
0.5
)))
projects/configs/_base_/models/paconv_cuda_ssg.py
0 → 100644
View file @
4cd43886
_base_
=
'./paconv_ssg.py'
model
=
dict
(
backbone
=
dict
(
sa_cfg
=
dict
(
type
=
'PAConvCUDASAModule'
,
scorenet_cfg
=
dict
(
mlp_channels
=
[
8
,
16
,
16
]))))
projects/configs/_base_/models/paconv_ssg.py
0 → 100644
View file @
4cd43886
# model settings
model
=
dict
(
type
=
'EncoderDecoder3D'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
9
,
# [xyz, rgb, normalized_xyz]
num_points
=
(
1024
,
256
,
64
,
16
),
radius
=
(
None
,
None
,
None
,
None
),
# use kNN instead of ball query
num_samples
=
(
32
,
32
,
32
,
32
),
sa_channels
=
((
32
,
32
,
64
),
(
64
,
64
,
128
),
(
128
,
128
,
256
),
(
256
,
256
,
512
)),
fp_channels
=
(),
norm_cfg
=
dict
(
type
=
'BN2d'
,
momentum
=
0.1
),
sa_cfg
=
dict
(
type
=
'PAConvSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
False
,
paconv_num_kernels
=
[
16
,
16
,
16
],
paconv_kernel_input
=
'w_neighbor'
,
scorenet_input
=
'w_neighbor_dist'
,
scorenet_cfg
=
dict
(
mlp_channels
=
[
16
,
16
,
16
],
score_norm
=
'softmax'
,
temp_factor
=
1.0
,
last_bn
=
False
))),
decode_head
=
dict
(
type
=
'PAConvHead'
,
# PAConv model's decoder takes skip connections from beckbone
# different from PointNet++, it also concats input features in the last
# level of decoder, leading to `128 + 6` as the channel number
fp_channels
=
((
768
,
256
,
256
),
(
384
,
256
,
256
),
(
320
,
256
,
128
),
(
128
+
6
,
128
,
128
,
128
)),
channels
=
128
,
dropout_ratio
=
0.5
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
class_weight
=
None
,
# should be modified with dataset
loss_weight
=
1.0
)),
# correlation loss to regularize PAConv's kernel weights
loss_regularization
=
dict
(
type
=
'PAConvRegularizationLoss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'slide'
))
projects/configs/_base_/models/parta2.py
0 → 100644
View file @
4cd43886
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
model
=
dict
(
type
=
'PartA2'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_voxels
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
rpn_head
=
dict
(
type
=
'PartA2RPNHead'
,
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
assigner_per_size
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
roi_head
=
dict
(
type
=
'PartAggregationROIHead'
,
num_classes
=
3
,
semantic_head
=
dict
(
type
=
'PointwiseSemanticHead'
,
in_channels
=
16
,
extra_width
=
0.2
,
seg_score_thr
=
0.3
,
num_classes
=
3
,
loss_seg
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_part
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
)),
seg_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'max'
)),
part_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'avg'
)),
bbox_head
=
dict
(
type
=
'PartA2BboxHead'
,
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
],
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
)
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
9000
,
nms_post
=
512
,
max_num
=
512
,
nms_thr
=
0.8
,
score_thr
=
0
,
use_rotate_nms
=
False
),
rcnn
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
)
],
sampler
=
dict
(
type
=
'IoUNegPiecewiseSampler'
,
num
=
128
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
True
),
cls_pos_thr
=
0.75
,
cls_neg_thr
=
0.25
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
nms_post
=
100
,
max_num
=
100
,
nms_thr
=
0.7
,
score_thr
=
0
,
use_rotate_nms
=
True
),
rcnn
=
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
)))
projects/configs/_base_/models/pointnet2_msg.py
0 → 100644
View file @
4cd43886
_base_
=
'./pointnet2_ssg.py'
# model settings
model
=
dict
(
backbone
=
dict
(
_delete_
=
True
,
type
=
'PointNet2SAMSG'
,
in_channels
=
6
,
# [xyz, rgb], should be modified with dataset
num_points
=
(
1024
,
256
,
64
,
16
),
radii
=
((
0.05
,
0.1
),
(
0.1
,
0.2
),
(
0.2
,
0.4
),
(
0.4
,
0.8
)),
num_samples
=
((
16
,
32
),
(
16
,
32
),
(
16
,
32
),
(
16
,
32
)),
sa_channels
=
(((
16
,
16
,
32
),
(
32
,
32
,
64
)),
((
64
,
64
,
128
),
(
64
,
96
,
128
)),
((
128
,
196
,
256
),
(
128
,
196
,
256
)),
((
256
,
256
,
512
),
(
256
,
384
,
512
))),
aggregation_channels
=
(
None
,
None
,
None
,
None
),
fps_mods
=
((
'D-FPS'
),
(
'D-FPS'
),
(
'D-FPS'
),
(
'D-FPS'
)),
fps_sample_range_lists
=
((
-
1
),
(
-
1
),
(
-
1
),
(
-
1
)),
dilated_group
=
(
False
,
False
,
False
,
False
),
out_indices
=
(
0
,
1
,
2
,
3
),
sa_cfg
=
dict
(
type
=
'PointSAModuleMSG'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
False
)),
decode_head
=
dict
(
fp_channels
=
((
1536
,
256
,
256
),
(
512
,
256
,
256
),
(
352
,
256
,
128
),
(
128
,
128
,
128
,
128
))))
projects/configs/_base_/models/pointnet2_ssg.py
0 → 100644
View file @
4cd43886
# model settings
model
=
dict
(
type
=
'EncoderDecoder3D'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
6
,
# [xyz, rgb], should be modified with dataset
num_points
=
(
1024
,
256
,
64
,
16
),
radius
=
(
0.1
,
0.2
,
0.4
,
0.8
),
num_samples
=
(
32
,
32
,
32
,
32
),
sa_channels
=
((
32
,
32
,
64
),
(
64
,
64
,
128
),
(
128
,
128
,
256
),
(
256
,
256
,
512
)),
fp_channels
=
(),
norm_cfg
=
dict
(
type
=
'BN2d'
),
sa_cfg
=
dict
(
type
=
'PointSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
False
)),
decode_head
=
dict
(
type
=
'PointNet2Head'
,
fp_channels
=
((
768
,
256
,
256
),
(
384
,
256
,
256
),
(
320
,
256
,
128
),
(
128
,
128
,
128
,
128
)),
channels
=
128
,
dropout_ratio
=
0.5
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
class_weight
=
None
,
# should be modified with dataset
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'slide'
))
projects/configs/_base_/models/votenet.py
0 → 100644
View file @
4cd43886
model
=
dict
(
type
=
'VoteNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
sa_cfg
=
dict
(
type
=
'PointSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
True
)),
bbox_head
=
dict
(
type
=
'VoteHead'
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
pred_layer_cfg
=
dict
(
in_channels
=
128
,
shared_conv_channels
=
(
128
,
128
),
bias
=
True
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
),
test_cfg
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
))
projects/configs/_base_/schedules/cosine.py
0 → 100644
View file @
4cd43886
# This schedule is mainly used by models with dynamic voxelization
# optimizer
lr
=
0.003
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
min_lr_ratio
=
1e-5
)
momentum_config
=
None
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
40
)
projects/configs/_base_/schedules/cyclic_20e.py
0 → 100644
View file @
4cd43886
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 20. Please change the interval accordingly if you do not
# use a default schedule.
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
1e-4
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
20
)
projects/configs/_base_/schedules/cyclic_40e.py
0 → 100644
View file @
4cd43886
# The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr
=
0.0018
# The optimizer follows the setting in SECOND.Pytorch, but here we use
# the offcial AdamW optimizer implemented by PyTorch.
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
# We use cyclic learning rate and momentum schedule following SECOND.Pytorch
# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
# We implement them in mmcv, for more details, please refer to
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
# Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
40
)
projects/configs/_base_/schedules/mmdet_schedule_1x.py
0 → 100644
View file @
4cd43886
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
8
,
11
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
12
)
projects/configs/_base_/schedules/schedule_2x.py
0 → 100644
View file @
4cd43886
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.001
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
20
,
23
])
momentum_config
=
None
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
24
)
projects/configs/_base_/schedules/schedule_3x.py
0 → 100644
View file @
4cd43886
# optimizer
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
36
)
projects/configs/_base_/schedules/seg_cosine_150e.py
0 → 100644
View file @
4cd43886
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.2
,
weight_decay
=
0.0001
,
momentum
=
0.9
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
None
,
min_lr
=
0.002
)
momentum_config
=
None
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
150
)
projects/configs/_base_/schedules/seg_cosine_200e.py
0 → 100644
View file @
4cd43886
# optimizer
# This schedule is mainly used on ScanNet dataset in segmentation task
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
0.001
,
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
None
,
min_lr
=
1e-5
)
momentum_config
=
None
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
200
)
projects/configs/_base_/schedules/seg_cosine_50e.py
0 → 100644
View file @
4cd43886
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
0.001
,
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
None
,
min_lr
=
1e-5
)
momentum_config
=
None
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
50
)
projects/configs/bevformer/bevformer_base.py
0 → 100644
View file @
4cd43886
_base_
=
[
'../datasets/custom_nus-3d.py'
,
'../_base_/default_runtime.py'
]
#
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
voxel_size
=
[
0.2
,
0.2
,
8
]
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
True
)
_dim_
=
256
_pos_dim_
=
_dim_
//
2
_ffn_dim_
=
_dim_
*
2
_num_levels_
=
4
bev_h_
=
200
bev_w_
=
200
queue_length
=
4
# each sequence contains `queue_length` frames.
model
=
dict
(
type
=
'BEVFormer'
,
use_grid_mask
=
True
,
video_test_mode
=
True
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN2d'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
,
dcn
=
dict
(
type
=
'DCNv2'
,
deform_groups
=
1
,
fallback_on_stride
=
False
),
# original DCNv2 will print log when perform load_state_dict
stage_with_dcn
=
(
False
,
False
,
True
,
True
)),
img_neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
512
,
1024
,
2048
],
out_channels
=
_dim_
,
start_level
=
0
,
add_extra_convs
=
'on_output'
,
num_outs
=
4
,
relu_before_extra_convs
=
True
),
pts_bbox_head
=
dict
(
type
=
'BEVFormerHead'
,
bev_h
=
bev_h_
,
bev_w
=
bev_w_
,
num_query
=
900
,
num_classes
=
10
,
in_channels
=
_dim_
,
sync_cls_avg_factor
=
True
,
with_box_refine
=
True
,
as_two_stage
=
False
,
transformer
=
dict
(
type
=
'PerceptionTransformer'
,
rotate_prev_bev
=
True
,
use_shift
=
True
,
use_can_bus
=
True
,
embed_dims
=
_dim_
,
encoder
=
dict
(
type
=
'BEVFormerEncoder'
,
num_layers
=
6
,
pc_range
=
point_cloud_range
,
num_points_in_pillar
=
4
,
return_intermediate
=
False
,
transformerlayers
=
dict
(
type
=
'BEVFormerLayer'
,
attn_cfgs
=
[
dict
(
type
=
'TemporalSelfAttention'
,
embed_dims
=
_dim_
,
num_levels
=
1
),
dict
(
type
=
'SpatialCrossAttention'
,
pc_range
=
point_cloud_range
,
deformable_attention
=
dict
(
type
=
'MSDeformableAttention3D'
,
embed_dims
=
_dim_
,
num_points
=
8
,
num_levels
=
_num_levels_
),
embed_dims
=
_dim_
,
)
],
feedforward_channels
=
_ffn_dim_
,
ffn_dropout
=
0.1
,
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
decoder
=
dict
(
type
=
'DetectionTransformerDecoder'
,
num_layers
=
6
,
return_intermediate
=
True
,
transformerlayers
=
dict
(
type
=
'DetrTransformerDecoderLayer'
,
attn_cfgs
=
[
dict
(
type
=
'MultiheadAttention'
,
embed_dims
=
_dim_
,
num_heads
=
8
,
dropout
=
0.1
),
dict
(
type
=
'CustomMSDeformableAttention'
,
embed_dims
=
_dim_
,
num_levels
=
1
),
],
feedforward_channels
=
_ffn_dim_
,
ffn_dropout
=
0.1
,
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)))),
bbox_coder
=
dict
(
type
=
'NMSFreeCoder'
,
post_center_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
pc_range
=
point_cloud_range
,
max_num
=
300
,
voxel_size
=
voxel_size
,
num_classes
=
10
),
positional_encoding
=
dict
(
type
=
'LearnedPositionalEncoding'
,
num_feats
=
_pos_dim_
,
row_num_embed
=
bev_h_
,
col_num_embed
=
bev_w_
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
2.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.25
),
loss_iou
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
0.0
)),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
512
,
512
,
1
],
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
out_size_factor
=
4
,
assigner
=
dict
(
type
=
'HungarianAssigner3D'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBox3DL1Cost'
,
weight
=
0.25
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
weight
=
0.0
),
# Fake cost. This is just to make it compatible with DETR head.
pc_range
=
point_cloud_range
))))
dataset_type
=
'CustomNuScenesDataset'
data_root
=
'data/nuscenes/'
file_client_args
=
dict
(
backend
=
'disk'
)
train_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'PhotoMetricDistortionMultiViewImage'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_attr_label
=
False
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'img'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1600
,
900
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
0
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_temporal_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
False
,
use_valid_flag
=
True
,
bev_size
=
(
bev_h_
,
bev_w_
),
queue_length
=
queue_length
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
,
samples_per_gpu
=
1
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
),
shuffler_sampler
=
dict
(
type
=
'DistributedGroupSampler'
),
nonshuffler_sampler
=
dict
(
type
=
'DistributedSampler'
)
)
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
2e-4
,
paramwise_cfg
=
dict
(
custom_keys
=
{
'img_backbone'
:
dict
(
lr_mult
=
0.1
),
}),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
min_lr_ratio
=
1e-3
)
total_epochs
=
24
evaluation
=
dict
(
interval
=
1
,
pipeline
=
test_pipeline
)
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
total_epochs
)
load_from
=
'ckpts/r101_dcn_fcos3d_pretrain.pth'
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
checkpoint_config
=
dict
(
interval
=
8
)
projects/configs/bevformer/bevformer_small.py
0 → 100644
View file @
4cd43886
# BEvFormer-small consumes at lease 10500M GPU memory
# compared to bevformer_base, bevformer_small has
# smaller BEV: 200*200 -> 150*150
# less encoder layers: 6 -> 3
# smaller input size: 1600*900 -> (1600*900)*0.8
# multi-scale feautres -> single scale features (C5)
# with_cp of backbone = True
_base_
=
[
'../datasets/custom_nus-3d.py'
,
'../_base_/default_runtime.py'
]
#
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
51.2
,
-
51.2
,
-
5.0
,
51.2
,
51.2
,
3.0
]
voxel_size
=
[
0.2
,
0.2
,
8
]
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
True
)
_dim_
=
256
_pos_dim_
=
_dim_
//
2
_ffn_dim_
=
_dim_
*
2
_num_levels_
=
1
bev_h_
=
150
bev_w_
=
150
queue_length
=
3
# each sequence contains `queue_length` frames.
model
=
dict
(
type
=
'BEVFormer'
,
use_grid_mask
=
True
,
video_test_mode
=
True
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
3
,),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN2d'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
,
with_cp
=
True
,
# using checkpoint to save GPU memory
dcn
=
dict
(
type
=
'DCNv2'
,
deform_groups
=
1
,
fallback_on_stride
=
False
),
# original DCNv2 will print log when perform load_state_dict
stage_with_dcn
=
(
False
,
False
,
True
,
True
)),
img_neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
2048
],
out_channels
=
_dim_
,
start_level
=
0
,
add_extra_convs
=
'on_output'
,
num_outs
=
_num_levels_
,
relu_before_extra_convs
=
True
),
pts_bbox_head
=
dict
(
type
=
'BEVFormerHead'
,
bev_h
=
bev_h_
,
bev_w
=
bev_w_
,
num_query
=
900
,
num_classes
=
10
,
in_channels
=
_dim_
,
sync_cls_avg_factor
=
True
,
with_box_refine
=
True
,
as_two_stage
=
False
,
transformer
=
dict
(
type
=
'PerceptionTransformer'
,
rotate_prev_bev
=
True
,
use_shift
=
True
,
use_can_bus
=
True
,
embed_dims
=
_dim_
,
encoder
=
dict
(
type
=
'BEVFormerEncoder'
,
num_layers
=
3
,
pc_range
=
point_cloud_range
,
num_points_in_pillar
=
4
,
return_intermediate
=
False
,
transformerlayers
=
dict
(
type
=
'BEVFormerLayer'
,
attn_cfgs
=
[
dict
(
type
=
'TemporalSelfAttention'
,
embed_dims
=
_dim_
,
num_levels
=
1
),
dict
(
type
=
'SpatialCrossAttention'
,
pc_range
=
point_cloud_range
,
deformable_attention
=
dict
(
type
=
'MSDeformableAttention3D'
,
embed_dims
=
_dim_
,
num_points
=
8
,
num_levels
=
_num_levels_
),
embed_dims
=
_dim_
,
)
],
feedforward_channels
=
_ffn_dim_
,
ffn_dropout
=
0.1
,
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
decoder
=
dict
(
type
=
'DetectionTransformerDecoder'
,
num_layers
=
6
,
return_intermediate
=
True
,
transformerlayers
=
dict
(
type
=
'DetrTransformerDecoderLayer'
,
attn_cfgs
=
[
dict
(
type
=
'MultiheadAttention'
,
embed_dims
=
_dim_
,
num_heads
=
8
,
dropout
=
0.1
),
dict
(
type
=
'CustomMSDeformableAttention'
,
embed_dims
=
_dim_
,
num_levels
=
1
),
],
feedforward_channels
=
_ffn_dim_
,
ffn_dropout
=
0.1
,
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
)))),
bbox_coder
=
dict
(
type
=
'NMSFreeCoder'
,
post_center_range
=
[
-
61.2
,
-
61.2
,
-
10.0
,
61.2
,
61.2
,
10.0
],
pc_range
=
point_cloud_range
,
max_num
=
300
,
voxel_size
=
voxel_size
,
num_classes
=
10
),
positional_encoding
=
dict
(
type
=
'LearnedPositionalEncoding'
,
num_feats
=
_pos_dim_
,
row_num_embed
=
bev_h_
,
col_num_embed
=
bev_w_
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
2.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
0.25
),
loss_iou
=
dict
(
type
=
'GIoULoss'
,
loss_weight
=
0.0
)),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
512
,
512
,
1
],
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
out_size_factor
=
4
,
assigner
=
dict
(
type
=
'HungarianAssigner3D'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBox3DL1Cost'
,
weight
=
0.25
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
weight
=
0.0
),
# Fake cost. This is just to make it compatible with DETR head.
pc_range
=
point_cloud_range
))))
dataset_type
=
'CustomNuScenesDataset'
data_root
=
'data/nuscenes/'
file_client_args
=
dict
(
backend
=
'disk'
)
train_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'PhotoMetricDistortionMultiViewImage'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_attr_label
=
False
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'RandomScaleImageMultiViewImage'
,
scales
=
[
0.8
]),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'gt_bboxes_3d'
,
'gt_labels_3d'
,
'img'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
# dict(type='PadMultiViewImage', size_divisor=32),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1600
,
900
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomScaleImageMultiViewImage'
,
scales
=
[
0.8
]),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_temporal_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
False
,
use_valid_flag
=
True
,
bev_size
=
(
bev_h_
,
bev_w_
),
queue_length
=
queue_length
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
,
samples_per_gpu
=
1
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
),
shuffler_sampler
=
dict
(
type
=
'DistributedGroupSampler'
),
nonshuffler_sampler
=
dict
(
type
=
'DistributedSampler'
)
)
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
2e-4
,
paramwise_cfg
=
dict
(
custom_keys
=
{
'img_backbone'
:
dict
(
lr_mult
=
0.1
),
}),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
min_lr_ratio
=
1e-3
)
total_epochs
=
24
evaluation
=
dict
(
interval
=
1
,
pipeline
=
test_pipeline
)
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
total_epochs
)
load_from
=
'ckpts/r101_dcn_fcos3d_pretrain.pth'
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
checkpoint_config
=
dict
(
interval
=
1
)
Prev
1
2
3
4
5
6
7
…
11
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment