Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dcnv3
Commits
b64d9ca3
Unverified
Commit
b64d9ca3
authored
Apr 17, 2023
by
Wenhai Wang
Committed by
GitHub
Apr 17, 2023
Browse files
Merge pull request #105 from zhiqi-li/occupancy
support occupancy prediction
parents
bdd98bcb
df3c64a9
Changes
160
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1644 additions
and
0 deletions
+1644
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/paconv_ssg.py
...y_prediction/projects/configs/_base_/models/paconv_ssg.py
+49
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/parta2.py
...pancy_prediction/projects/configs/_base_/models/parta2.py
+201
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/pointnet2_msg.py
...rediction/projects/configs/_base_/models/pointnet2_msg.py
+28
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/pointnet2_ssg.py
...rediction/projects/configs/_base_/models/pointnet2_ssg.py
+35
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/votenet.py
...ancy_prediction/projects/configs/_base_/models/votenet.py
+73
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cosine.py
...cy_prediction/projects/configs/_base_/schedules/cosine.py
+20
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cyclic_20e.py
...rediction/projects/configs/_base_/schedules/cyclic_20e.py
+24
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cyclic_40e.py
...rediction/projects/configs/_base_/schedules/cyclic_40e.py
+31
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/mmdet_schedule_1x.py
...on/projects/configs/_base_/schedules/mmdet_schedule_1x.py
+11
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/schedule_2x.py
...ediction/projects/configs/_base_/schedules/schedule_2x.py
+14
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/schedule_3x.py
...ediction/projects/configs/_base_/schedules/schedule_3x.py
+9
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_150e.py
...tion/projects/configs/_base_/schedules/seg_cosine_150e.py
+9
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_200e.py
...tion/projects/configs/_base_/schedules/seg_cosine_200e.py
+9
-0
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_50e.py
...ction/projects/configs/_base_/schedules/seg_cosine_50e.py
+9
-0
autonomous_driving/occupancy_prediction/projects/configs/bevformer/.ipynb_checkpoints/bevformer_small_occ-checkpoint.py
...rmer/.ipynb_checkpoints/bevformer_small_occ-checkpoint.py
+241
-0
autonomous_driving/occupancy_prediction/projects/configs/bevformer/bevformer_base_occ.py
...ediction/projects/configs/bevformer/bevformer_base_occ.py
+242
-0
autonomous_driving/occupancy_prediction/projects/configs/bevformer/bevformer_intern-s_occ.py
...tion/projects/configs/bevformer/bevformer_intern-s_occ.py
+246
-0
autonomous_driving/occupancy_prediction/projects/configs/bevformer/bevformer_small_occ.py
...diction/projects/configs/bevformer/bevformer_small_occ.py
+241
-0
autonomous_driving/occupancy_prediction/projects/configs/datasets/custom_nus-3d.py
...ncy_prediction/projects/configs/datasets/custom_nus-3d.py
+141
-0
autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/__init__.py
.../occupancy_prediction/projects/mmdet3d_plugin/__init__.py
+11
-0
No files found.
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/paconv_ssg.py
0 → 100644
View file @
b64d9ca3
# model settings
model
=
dict
(
type
=
'EncoderDecoder3D'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
9
,
# [xyz, rgb, normalized_xyz]
num_points
=
(
1024
,
256
,
64
,
16
),
radius
=
(
None
,
None
,
None
,
None
),
# use kNN instead of ball query
num_samples
=
(
32
,
32
,
32
,
32
),
sa_channels
=
((
32
,
32
,
64
),
(
64
,
64
,
128
),
(
128
,
128
,
256
),
(
256
,
256
,
512
)),
fp_channels
=
(),
norm_cfg
=
dict
(
type
=
'BN2d'
,
momentum
=
0.1
),
sa_cfg
=
dict
(
type
=
'PAConvSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
False
,
paconv_num_kernels
=
[
16
,
16
,
16
],
paconv_kernel_input
=
'w_neighbor'
,
scorenet_input
=
'w_neighbor_dist'
,
scorenet_cfg
=
dict
(
mlp_channels
=
[
16
,
16
,
16
],
score_norm
=
'softmax'
,
temp_factor
=
1.0
,
last_bn
=
False
))),
decode_head
=
dict
(
type
=
'PAConvHead'
,
# PAConv model's decoder takes skip connections from beckbone
# different from PointNet++, it also concats input features in the last
# level of decoder, leading to `128 + 6` as the channel number
fp_channels
=
((
768
,
256
,
256
),
(
384
,
256
,
256
),
(
320
,
256
,
128
),
(
128
+
6
,
128
,
128
,
128
)),
channels
=
128
,
dropout_ratio
=
0.5
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
class_weight
=
None
,
# should be modified with dataset
loss_weight
=
1.0
)),
# correlation loss to regularize PAConv's kernel weights
loss_regularization
=
dict
(
type
=
'PAConvRegularizationLoss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'slide'
))
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/parta2.py
0 → 100644
View file @
b64d9ca3
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
model
=
dict
(
type
=
'PartA2'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_voxels
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
rpn_head
=
dict
(
type
=
'PartA2RPNHead'
,
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
assigner_per_size
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
roi_head
=
dict
(
type
=
'PartAggregationROIHead'
,
num_classes
=
3
,
semantic_head
=
dict
(
type
=
'PointwiseSemanticHead'
,
in_channels
=
16
,
extra_width
=
0.2
,
seg_score_thr
=
0.3
,
num_classes
=
3
,
loss_seg
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_part
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
)),
seg_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'max'
)),
part_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'avg'
)),
bbox_head
=
dict
(
type
=
'PartA2BboxHead'
,
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
],
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
))),
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
)
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
9000
,
nms_post
=
512
,
max_num
=
512
,
nms_thr
=
0.8
,
score_thr
=
0
,
use_rotate_nms
=
False
),
rcnn
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
)
],
sampler
=
dict
(
type
=
'IoUNegPiecewiseSampler'
,
num
=
128
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
True
),
cls_pos_thr
=
0.75
,
cls_neg_thr
=
0.25
)),
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
nms_post
=
100
,
max_num
=
100
,
nms_thr
=
0.7
,
score_thr
=
0
,
use_rotate_nms
=
True
),
rcnn
=
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
)))
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/pointnet2_msg.py
0 → 100644
View file @
b64d9ca3
_base_
=
'./pointnet2_ssg.py'
# model settings
model
=
dict
(
backbone
=
dict
(
_delete_
=
True
,
type
=
'PointNet2SAMSG'
,
in_channels
=
6
,
# [xyz, rgb], should be modified with dataset
num_points
=
(
1024
,
256
,
64
,
16
),
radii
=
((
0.05
,
0.1
),
(
0.1
,
0.2
),
(
0.2
,
0.4
),
(
0.4
,
0.8
)),
num_samples
=
((
16
,
32
),
(
16
,
32
),
(
16
,
32
),
(
16
,
32
)),
sa_channels
=
(((
16
,
16
,
32
),
(
32
,
32
,
64
)),
((
64
,
64
,
128
),
(
64
,
96
,
128
)),
((
128
,
196
,
256
),
(
128
,
196
,
256
)),
((
256
,
256
,
512
),
(
256
,
384
,
512
))),
aggregation_channels
=
(
None
,
None
,
None
,
None
),
fps_mods
=
((
'D-FPS'
),
(
'D-FPS'
),
(
'D-FPS'
),
(
'D-FPS'
)),
fps_sample_range_lists
=
((
-
1
),
(
-
1
),
(
-
1
),
(
-
1
)),
dilated_group
=
(
False
,
False
,
False
,
False
),
out_indices
=
(
0
,
1
,
2
,
3
),
sa_cfg
=
dict
(
type
=
'PointSAModuleMSG'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
False
)),
decode_head
=
dict
(
fp_channels
=
((
1536
,
256
,
256
),
(
512
,
256
,
256
),
(
352
,
256
,
128
),
(
128
,
128
,
128
,
128
))))
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/pointnet2_ssg.py
0 → 100644
View file @
b64d9ca3
# model settings
model
=
dict
(
type
=
'EncoderDecoder3D'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
6
,
# [xyz, rgb], should be modified with dataset
num_points
=
(
1024
,
256
,
64
,
16
),
radius
=
(
0.1
,
0.2
,
0.4
,
0.8
),
num_samples
=
(
32
,
32
,
32
,
32
),
sa_channels
=
((
32
,
32
,
64
),
(
64
,
64
,
128
),
(
128
,
128
,
256
),
(
256
,
256
,
512
)),
fp_channels
=
(),
norm_cfg
=
dict
(
type
=
'BN2d'
),
sa_cfg
=
dict
(
type
=
'PointSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
False
)),
decode_head
=
dict
(
type
=
'PointNet2Head'
,
fp_channels
=
((
768
,
256
,
256
),
(
384
,
256
,
256
),
(
320
,
256
,
128
),
(
128
,
128
,
128
,
128
)),
channels
=
128
,
dropout_ratio
=
0.5
,
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
act_cfg
=
dict
(
type
=
'ReLU'
),
loss_decode
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
class_weight
=
None
,
# should be modified with dataset
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(),
test_cfg
=
dict
(
mode
=
'slide'
))
autonomous_driving/occupancy_prediction/projects/configs/_base_/models/votenet.py
0 → 100644
View file @
b64d9ca3
model
=
dict
(
type
=
'VoteNet'
,
backbone
=
dict
(
type
=
'PointNet2SASSG'
,
in_channels
=
4
,
num_points
=
(
2048
,
1024
,
512
,
256
),
radius
=
(
0.2
,
0.4
,
0.8
,
1.2
),
num_samples
=
(
64
,
32
,
16
,
16
),
sa_channels
=
((
64
,
64
,
128
),
(
128
,
128
,
256
),
(
128
,
128
,
256
),
(
128
,
128
,
256
)),
fp_channels
=
((
256
,
256
),
(
256
,
256
)),
norm_cfg
=
dict
(
type
=
'BN2d'
),
sa_cfg
=
dict
(
type
=
'PointSAModule'
,
pool_mod
=
'max'
,
use_xyz
=
True
,
normalize_xyz
=
True
)),
bbox_head
=
dict
(
type
=
'VoteHead'
,
vote_module_cfg
=
dict
(
in_channels
=
256
,
vote_per_seed
=
1
,
gt_per_seed
=
3
,
conv_channels
=
(
256
,
256
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
norm_feats
=
True
,
vote_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l1'
,
reduction
=
'none'
,
loss_dst_weight
=
10.0
)),
vote_aggregation_cfg
=
dict
(
type
=
'PointSAModule'
,
num_point
=
256
,
radius
=
0.3
,
num_sample
=
16
,
mlp_channels
=
[
256
,
128
,
128
,
128
],
use_xyz
=
True
,
normalize_xyz
=
True
),
pred_layer_cfg
=
dict
(
in_channels
=
128
,
shared_conv_channels
=
(
128
,
128
),
bias
=
True
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
),
objectness_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
class_weight
=
[
0.2
,
0.8
],
reduction
=
'sum'
,
loss_weight
=
5.0
),
center_loss
=
dict
(
type
=
'ChamferDistance'
,
mode
=
'l2'
,
reduction
=
'sum'
,
loss_src_weight
=
10.0
,
loss_dst_weight
=
10.0
),
dir_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
dir_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
),
size_class_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
size_res_loss
=
dict
(
type
=
'SmoothL1Loss'
,
reduction
=
'sum'
,
loss_weight
=
10.0
/
3.0
),
semantic_loss
=
dict
(
type
=
'CrossEntropyLoss'
,
reduction
=
'sum'
,
loss_weight
=
1.0
)),
# model training and testing settings
train_cfg
=
dict
(
pos_distance_thr
=
0.3
,
neg_distance_thr
=
0.6
,
sample_mod
=
'vote'
),
test_cfg
=
dict
(
sample_mod
=
'seed'
,
nms_thr
=
0.25
,
score_thr
=
0.05
,
per_class_proposal
=
True
))
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cosine.py
0 → 100644
View file @
b64d9ca3
# This schedule is mainly used by models with dynamic voxelization
# optimizer
lr
=
0.003
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
min_lr_ratio
=
1e-5
)
momentum_config
=
None
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
40
)
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cyclic_20e.py
0 → 100644
View file @
b64d9ca3
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 20. Please change the interval accordingly if you do not
# use a default schedule.
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
1e-4
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
20
)
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/cyclic_40e.py
0 → 100644
View file @
b64d9ca3
# The schedule is usually used by models trained on KITTI dataset
# The learning rate set in the cyclic schedule is the initial learning rate
# rather than the max learning rate. Since the target_ratio is (10, 1e-4),
# the learning rate will change from 0.0018 to 0.018, than go to 0.0018*1e-4
lr
=
0.0018
# The optimizer follows the setting in SECOND.Pytorch, but here we use
# the offcial AdamW optimizer implemented by PyTorch.
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
# We use cyclic learning rate and momentum schedule following SECOND.Pytorch
# https://github.com/traveller59/second.pytorch/blob/3aba19c9688274f75ebb5e576f65cfe54773c021/torchplus/train/learning_schedules_fastai.py#L69 # noqa
# We implement them in mmcv, for more details, please refer to
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/lr_updater.py#L327 # noqa
# https://github.com/open-mmlab/mmcv/blob/f48241a65aebfe07db122e9db320c31b685dc674/mmcv/runner/hooks/momentum_updater.py#L130 # noqa
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
# Although the max_epochs is 40, this schedule is usually used we
# RepeatDataset with repeat ratio N, thus the actual max epoch
# number could be Nx40
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
40
)
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/mmdet_schedule_1x.py
0 → 100644
View file @
b64d9ca3
# optimizer
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.02
,
momentum
=
0.9
,
weight_decay
=
0.0001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
# learning policy
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
0.001
,
step
=
[
8
,
11
])
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
12
)
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/schedule_2x.py
0 → 100644
View file @
b64d9ca3
# optimizer
# This schedule is mainly used by models on nuScenes dataset
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
0.001
,
weight_decay
=
0.01
)
# max_norm=10 is better for SECOND
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
20
,
23
])
momentum_config
=
None
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
24
)
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/schedule_3x.py
0 → 100644
View file @
b64d9ca3
# optimizer
# This schedule is mainly used by models on indoor dataset,
# e.g., VoteNet on SUNRGBD and ScanNet
lr
=
0.008
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'step'
,
warmup
=
None
,
step
=
[
24
,
32
])
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
36
)
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_150e.py
0 → 100644
View file @
b64d9ca3
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer
=
dict
(
type
=
'SGD'
,
lr
=
0.2
,
weight_decay
=
0.0001
,
momentum
=
0.9
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
None
,
min_lr
=
0.002
)
momentum_config
=
None
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
150
)
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_200e.py
0 → 100644
View file @
b64d9ca3
# optimizer
# This schedule is mainly used on ScanNet dataset in segmentation task
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
0.001
,
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
None
,
min_lr
=
1e-5
)
momentum_config
=
None
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
200
)
autonomous_driving/occupancy_prediction/projects/configs/_base_/schedules/seg_cosine_50e.py
0 → 100644
View file @
b64d9ca3
# optimizer
# This schedule is mainly used on S3DIS dataset in segmentation task
optimizer
=
dict
(
type
=
'Adam'
,
lr
=
0.001
,
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
None
)
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
None
,
min_lr
=
1e-5
)
momentum_config
=
None
# runtime settings
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
50
)
autonomous_driving/occupancy_prediction/projects/configs/bevformer/.ipynb_checkpoints/bevformer_small_occ-checkpoint.py
0 → 100644
View file @
b64d9ca3
_base_
=
[
'../datasets/custom_nus-3d.py'
,
'../_base_/default_runtime.py'
]
#
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
40
,
-
40
,
-
1.0
,
40
,
40
,
5.4
]
voxel_size
=
[
0.2
,
0.2
,
8
]
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
True
)
_dim_
=
256
_pos_dim_
=
_dim_
//
2
_ffn_dim_
=
_dim_
*
2
_num_levels_
=
2
bev_h_
=
200
bev_w_
=
200
queue_length
=
4
# each sequence contains `queue_length` frames.
model
=
dict
(
type
=
'BEVFormerOcc'
,
use_grid_mask
=
True
,
video_test_mode
=
True
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN2d'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
,
dcn
=
dict
(
type
=
'DCNv2'
,
deform_groups
=
1
,
fallback_on_stride
=
False
),
# original DCNv2 will print log when perform load_state_dict
stage_with_dcn
=
(
False
,
False
,
True
,
True
)),
img_neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
1024
,
2048
],
out_channels
=
_dim_
,
start_level
=
0
,
add_extra_convs
=
'on_output'
,
num_outs
=
_num_levels_
,
relu_before_extra_convs
=
True
),
pts_bbox_head
=
dict
(
type
=
'BEVFormerOccHead'
,
pc_range
=
point_cloud_range
,
bev_h
=
bev_h_
,
bev_w
=
bev_w_
,
num_classes
=
18
,
in_channels
=
_dim_
,
sync_cls_avg_factor
=
True
,
with_box_refine
=
True
,
as_two_stage
=
False
,
# loss_occ=dict(
# type='FocalLoss',
# use_sigmoid=False,
# gamma=2.0,
# alpha=0.25,
# loss_weight=10.0),
use_mask
=
False
,
loss_occ
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
transformer
=
dict
(
type
=
'TransformerOcc'
,
pillar_h
=
16
,
num_classes
=
18
,
norm_cfg
=
dict
(
type
=
'BN'
,
),
norm_cfg_3d
=
dict
(
type
=
'BN3d'
,
),
use_3d
=
True
,
use_conv
=
False
,
rotate_prev_bev
=
True
,
use_shift
=
True
,
use_can_bus
=
True
,
embed_dims
=
_dim_
,
encoder
=
dict
(
type
=
'BEVFormerEncoder'
,
num_layers
=
1
,
pc_range
=
point_cloud_range
,
num_points_in_pillar
=
8
,
return_intermediate
=
False
,
transformerlayers
=
dict
(
type
=
'BEVFormerLayer'
,
attn_cfgs
=
[
dict
(
type
=
'TemporalSelfAttention'
,
embed_dims
=
_dim_
,
num_levels
=
1
),
dict
(
type
=
'SpatialCrossAttention'
,
pc_range
=
point_cloud_range
,
deformable_attention
=
dict
(
type
=
'MSDeformableAttention3D'
,
embed_dims
=
_dim_
,
num_points
=
8
,
num_levels
=
_num_levels_
),
embed_dims
=
_dim_
,
)
],
feedforward_channels
=
_ffn_dim_
,
ffn_dropout
=
0.1
,
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
),
positional_encoding
=
dict
(
type
=
'LearnedPositionalEncoding'
,
num_feats
=
_pos_dim_
,
row_num_embed
=
bev_h_
,
col_num_embed
=
bev_w_
,
),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
512
,
512
,
1
],
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
out_size_factor
=
4
,
assigner
=
dict
(
type
=
'HungarianAssigner3D'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBox3DL1Cost'
,
weight
=
0.25
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
weight
=
0.0
),
# Fake cost. This is just to make it compatible with DETR head.
pc_range
=
point_cloud_range
)))))
dataset_type
=
'NuSceneOcc'
data_root
=
'data/occ3d-nus/'
file_client_args
=
dict
(
backend
=
'disk'
)
occ_gt_data_root
=
'data/occ3d-nus'
train_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
,
data_root
=
occ_gt_data_root
),
dict
(
type
=
'PhotoMetricDistortionMultiViewImage'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_attr_label
=
False
),
dict
(
type
=
'RandomScaleImageMultiViewImage'
,
scales
=
[
0.2
]),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
]
)
]
test_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
,
data_root
=
occ_gt_data_root
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1600
,
900
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomScaleImageMultiViewImage'
,
scales
=
[
0.8
]),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
0
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
False
,
use_valid_flag
=
True
,
bev_size
=
(
bev_h_
,
bev_w_
),
queue_length
=
queue_length
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
,
samples_per_gpu
=
1
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
),
shuffler_sampler
=
dict
(
type
=
'DistributedGroupSampler'
),
nonshuffler_sampler
=
dict
(
type
=
'DistributedSampler'
)
)
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
2e-4
,
paramwise_cfg
=
dict
(
custom_keys
=
{
'img_backbone'
:
dict
(
lr_mult
=
0.1
),
}),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
min_lr_ratio
=
1e-3
)
total_epochs
=
24
evaluation
=
dict
(
interval
=
1
,
pipeline
=
test_pipeline
)
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
total_epochs
)
# load_from = 'ckpts/r101_dcn_fcos3d_pretrain.pth'
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
checkpoint_config
=
dict
(
interval
=
1
)
autonomous_driving/occupancy_prediction/projects/configs/bevformer/bevformer_base_occ.py
0 → 100644
View file @
b64d9ca3
_base_
=
[
'../datasets/custom_nus-3d.py'
,
'../_base_/default_runtime.py'
]
#
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
40
,
-
40
,
-
1.0
,
40
,
40
,
5.4
]
voxel_size
=
[
0.2
,
0.2
,
8
]
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
True
)
_dim_
=
256
_pos_dim_
=
_dim_
//
2
_ffn_dim_
=
_dim_
*
2
_num_levels_
=
4
bev_h_
=
200
bev_w_
=
200
queue_length
=
4
# each sequence contains `queue_length` frames.
model
=
dict
(
type
=
'BEVFormerOcc'
,
use_grid_mask
=
True
,
video_test_mode
=
True
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
101
,
num_stages
=
4
,
out_indices
=
(
1
,
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN2d'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
,
dcn
=
dict
(
type
=
'DCNv2'
,
deform_groups
=
1
,
fallback_on_stride
=
False
),
# original DCNv2 will print log when perform load_state_dict
stage_with_dcn
=
(
False
,
False
,
True
,
True
)),
img_neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
512
,
1024
,
2048
],
out_channels
=
_dim_
,
start_level
=
0
,
add_extra_convs
=
'on_output'
,
num_outs
=
4
,
relu_before_extra_convs
=
True
),
pts_bbox_head
=
dict
(
type
=
'BEVFormerOccHead'
,
pc_range
=
point_cloud_range
,
bev_h
=
bev_h_
,
bev_w
=
bev_w_
,
num_classes
=
18
,
in_channels
=
_dim_
,
sync_cls_avg_factor
=
True
,
with_box_refine
=
True
,
as_two_stage
=
False
,
# loss_occ=dict(
# type='FocalLoss',
# use_sigmoid=False,
# gamma=2.0,
# alpha=0.25,
# loss_weight=10.0),
use_mask
=
False
,
loss_occ
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
transformer
=
dict
(
type
=
'TransformerOcc'
,
pillar_h
=
16
,
num_classes
=
18
,
norm_cfg
=
dict
(
type
=
'BN'
,
),
norm_cfg_3d
=
dict
(
type
=
'BN3d'
,
),
use_3d
=
True
,
use_conv
=
False
,
rotate_prev_bev
=
True
,
use_shift
=
True
,
use_can_bus
=
True
,
embed_dims
=
_dim_
,
encoder
=
dict
(
type
=
'BEVFormerEncoder'
,
num_layers
=
4
,
pc_range
=
point_cloud_range
,
num_points_in_pillar
=
8
,
return_intermediate
=
False
,
transformerlayers
=
dict
(
type
=
'BEVFormerLayer'
,
attn_cfgs
=
[
dict
(
type
=
'TemporalSelfAttention'
,
embed_dims
=
_dim_
,
num_levels
=
1
),
dict
(
type
=
'SpatialCrossAttention'
,
pc_range
=
point_cloud_range
,
deformable_attention
=
dict
(
type
=
'MSDeformableAttention3D'
,
embed_dims
=
_dim_
,
num_points
=
8
,
num_levels
=
_num_levels_
),
embed_dims
=
_dim_
,
)
],
feedforward_channels
=
_ffn_dim_
,
ffn_dropout
=
0.1
,
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
),
positional_encoding
=
dict
(
type
=
'LearnedPositionalEncoding'
,
num_feats
=
_pos_dim_
,
row_num_embed
=
bev_h_
,
col_num_embed
=
bev_w_
,
),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
512
,
512
,
1
],
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
out_size_factor
=
4
,
assigner
=
dict
(
type
=
'HungarianAssigner3D'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBox3DL1Cost'
,
weight
=
0.25
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
weight
=
0.0
),
# Fake cost. This is just to make it compatible with DETR head.
pc_range
=
point_cloud_range
)))))
dataset_type
=
'NuSceneOcc'
data_root
=
'data/occ3d-nus/'
file_client_args
=
dict
(
backend
=
'disk'
)
occ_gt_data_root
=
'data/occ3d-nus'
train_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
,
data_root
=
occ_gt_data_root
),
dict
(
type
=
'PhotoMetricDistortionMultiViewImage'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_attr_label
=
False
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
]
)
]
test_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
,
data_root
=
occ_gt_data_root
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1600
,
900
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
0
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
False
,
use_valid_flag
=
True
,
bev_size
=
(
bev_h_
,
bev_w_
),
queue_length
=
queue_length
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
,
samples_per_gpu
=
1
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
),
shuffler_sampler
=
dict
(
type
=
'DistributedGroupSampler'
),
nonshuffler_sampler
=
dict
(
type
=
'DistributedSampler'
)
)
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
2e-4
,
paramwise_cfg
=
dict
(
custom_keys
=
{
'img_backbone'
:
dict
(
lr_mult
=
0.1
),
}),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
min_lr_ratio
=
1e-3
)
total_epochs
=
24
evaluation
=
dict
(
interval
=
1
,
pipeline
=
test_pipeline
)
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
total_epochs
)
load_from
=
'ckpts/r101_dcn_fcos3d_pretrain.pth'
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
checkpoint_config
=
dict
(
interval
=
1
)
autonomous_driving/occupancy_prediction/projects/configs/bevformer/bevformer_intern-s_occ.py
0 → 100644
View file @
b64d9ca3
_base_
=
[
'../datasets/custom_nus-3d.py'
,
'../_base_/default_runtime.py'
]
#
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
40
,
-
40
,
-
1.0
,
40
,
40
,
5.4
]
voxel_size
=
[
0.2
,
0.2
,
8
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
True
)
_dim_
=
256
_pos_dim_
=
_dim_
//
2
_ffn_dim_
=
_dim_
*
2
_num_levels_
=
4
bev_h_
=
200
bev_w_
=
200
queue_length
=
4
# each sequence contains `queue_length` frames.
pretrained
=
'https://huggingface.co/OpenGVLab/InternImage/resolve/main/mask_rcnn_internimage_s_fpn_3x_coco.pth'
model
=
dict
(
type
=
'BEVFormerOcc'
,
use_grid_mask
=
True
,
video_test_mode
=
True
,
img_backbone
=
dict
(
_delete_
=
True
,
type
=
'InternImage'
,
core_op
=
'DCNv3'
,
channels
=
80
,
depths
=
[
4
,
4
,
21
,
4
],
groups
=
[
5
,
10
,
20
,
40
],
mlp_ratio
=
4.
,
drop_path_rate
=
0.3
,
norm_layer
=
'LN'
,
layer_scale
=
1.0
,
offset_scale
=
1.0
,
post_norm
=
True
,
with_cp
=
True
,
out_indices
=
(
1
,
2
,
3
),
init_cfg
=
dict
(
type
=
'Pretrained'
,
checkpoint
=
pretrained
)),
img_neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
160
,
320
,
640
],
out_channels
=
_dim_
,
start_level
=
0
,
add_extra_convs
=
'on_output'
,
num_outs
=
4
,
relu_before_extra_convs
=
True
),
pts_bbox_head
=
dict
(
type
=
'BEVFormerOccHead'
,
pc_range
=
point_cloud_range
,
bev_h
=
bev_h_
,
bev_w
=
bev_w_
,
num_classes
=
18
,
in_channels
=
_dim_
,
sync_cls_avg_factor
=
True
,
with_box_refine
=
True
,
as_two_stage
=
False
,
# loss_occ=dict(
# type='FocalLoss',
# use_sigmoid=False,
# gamma=2.0,
# alpha=0.25,
# loss_weight=10.0),
use_mask
=
False
,
loss_occ
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
transformer
=
dict
(
type
=
'TransformerOcc'
,
pillar_h
=
16
,
num_classes
=
18
,
norm_cfg
=
dict
(
type
=
'BN'
,
),
norm_cfg_3d
=
dict
(
type
=
'BN3d'
,
),
use_3d
=
True
,
use_conv
=
False
,
rotate_prev_bev
=
True
,
use_shift
=
True
,
use_can_bus
=
True
,
embed_dims
=
_dim_
,
encoder
=
dict
(
type
=
'BEVFormerEncoder'
,
num_layers
=
4
,
pc_range
=
point_cloud_range
,
num_points_in_pillar
=
8
,
return_intermediate
=
False
,
transformerlayers
=
dict
(
type
=
'BEVFormerLayer'
,
attn_cfgs
=
[
dict
(
type
=
'TemporalSelfAttention'
,
embed_dims
=
_dim_
,
num_levels
=
1
),
dict
(
type
=
'SpatialCrossAttention'
,
pc_range
=
point_cloud_range
,
deformable_attention
=
dict
(
type
=
'MSDeformableAttention3D'
,
embed_dims
=
_dim_
,
num_points
=
8
,
num_levels
=
_num_levels_
),
embed_dims
=
_dim_
,
)
],
feedforward_channels
=
_ffn_dim_
,
ffn_dropout
=
0.1
,
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
),
positional_encoding
=
dict
(
type
=
'LearnedPositionalEncoding'
,
num_feats
=
_pos_dim_
,
row_num_embed
=
bev_h_
,
col_num_embed
=
bev_w_
,
),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
512
,
512
,
1
],
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
out_size_factor
=
4
,
assigner
=
dict
(
type
=
'HungarianAssigner3D'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBox3DL1Cost'
,
weight
=
0.25
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
weight
=
0.0
),
# Fake cost. This is just to make it compatible with DETR head.
pc_range
=
point_cloud_range
)))))
dataset_type
=
'NuSceneOcc'
data_root
=
'data/occ3d-nus/'
file_client_args
=
dict
(
backend
=
'disk'
)
occ_gt_data_root
=
'data/occ3d-nus'
train_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
,
data_root
=
occ_gt_data_root
),
dict
(
type
=
'PhotoMetricDistortionMultiViewImage'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_attr_label
=
False
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
]
)
]
test_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
,
data_root
=
occ_gt_data_root
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1600
,
900
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
6
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
False
,
use_valid_flag
=
True
,
bev_size
=
(
bev_h_
,
bev_w_
),
queue_length
=
queue_length
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
,
samples_per_gpu
=
1
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
),
shuffler_sampler
=
dict
(
type
=
'DistributedGroupSampler'
),
nonshuffler_sampler
=
dict
(
type
=
'DistributedSampler'
)
)
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
2e-4
,
weight_decay
=
0.05
,
constructor
=
'CustomLayerDecayOptimizerConstructor'
,
paramwise_cfg
=
dict
(
num_layers
=
33
,
layer_decay_rate
=
1.0
,
depths
=
[
4
,
4
,
21
,
4
]))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
min_lr_ratio
=
1e-3
)
total_epochs
=
24
evaluation
=
dict
(
interval
=
1
,
pipeline
=
test_pipeline
)
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
total_epochs
)
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
checkpoint_config
=
dict
(
interval
=
1
)
autonomous_driving/occupancy_prediction/projects/configs/bevformer/bevformer_small_occ.py
0 → 100644
View file @
b64d9ca3
_base_
=
[
'../datasets/custom_nus-3d.py'
,
'../_base_/default_runtime.py'
]
#
plugin
=
True
plugin_dir
=
'projects/mmdet3d_plugin/'
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
40
,
-
40
,
-
1.0
,
40
,
40
,
5.4
]
voxel_size
=
[
0.2
,
0.2
,
8
]
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'construction_vehicle'
,
'bus'
,
'trailer'
,
'barrier'
,
'motorcycle'
,
'bicycle'
,
'pedestrian'
,
'traffic_cone'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_camera
=
True
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
True
)
_dim_
=
256
_pos_dim_
=
_dim_
//
2
_ffn_dim_
=
_dim_
*
2
_num_levels_
=
2
bev_h_
=
200
bev_w_
=
200
queue_length
=
4
# each sequence contains `queue_length` frames.
model
=
dict
(
type
=
'BEVFormerOcc'
,
use_grid_mask
=
True
,
video_test_mode
=
True
,
img_backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
num_stages
=
4
,
out_indices
=
(
2
,
3
),
frozen_stages
=
1
,
norm_cfg
=
dict
(
type
=
'BN2d'
,
requires_grad
=
False
),
norm_eval
=
True
,
style
=
'caffe'
,
dcn
=
dict
(
type
=
'DCNv2'
,
deform_groups
=
1
,
fallback_on_stride
=
False
),
# original DCNv2 will print log when perform load_state_dict
stage_with_dcn
=
(
False
,
False
,
True
,
True
)),
img_neck
=
dict
(
type
=
'FPN'
,
in_channels
=
[
1024
,
2048
],
out_channels
=
_dim_
,
start_level
=
0
,
add_extra_convs
=
'on_output'
,
num_outs
=
_num_levels_
,
relu_before_extra_convs
=
True
),
pts_bbox_head
=
dict
(
type
=
'BEVFormerOccHead'
,
pc_range
=
point_cloud_range
,
bev_h
=
bev_h_
,
bev_w
=
bev_w_
,
num_classes
=
18
,
in_channels
=
_dim_
,
sync_cls_avg_factor
=
True
,
with_box_refine
=
True
,
as_two_stage
=
False
,
# loss_occ=dict(
# type='FocalLoss',
# use_sigmoid=False,
# gamma=2.0,
# alpha=0.25,
# loss_weight=10.0),
use_mask
=
False
,
loss_occ
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
transformer
=
dict
(
type
=
'TransformerOcc'
,
pillar_h
=
16
,
num_classes
=
18
,
norm_cfg
=
dict
(
type
=
'BN'
,
),
norm_cfg_3d
=
dict
(
type
=
'BN3d'
,
),
use_3d
=
True
,
use_conv
=
False
,
rotate_prev_bev
=
True
,
use_shift
=
True
,
use_can_bus
=
True
,
embed_dims
=
_dim_
,
encoder
=
dict
(
type
=
'BEVFormerEncoder'
,
num_layers
=
1
,
pc_range
=
point_cloud_range
,
num_points_in_pillar
=
8
,
return_intermediate
=
False
,
transformerlayers
=
dict
(
type
=
'BEVFormerLayer'
,
attn_cfgs
=
[
dict
(
type
=
'TemporalSelfAttention'
,
embed_dims
=
_dim_
,
num_levels
=
1
),
dict
(
type
=
'SpatialCrossAttention'
,
pc_range
=
point_cloud_range
,
deformable_attention
=
dict
(
type
=
'MSDeformableAttention3D'
,
embed_dims
=
_dim_
,
num_points
=
8
,
num_levels
=
_num_levels_
),
embed_dims
=
_dim_
,
)
],
feedforward_channels
=
_ffn_dim_
,
ffn_dropout
=
0.1
,
operation_order
=
(
'self_attn'
,
'norm'
,
'cross_attn'
,
'norm'
,
'ffn'
,
'norm'
))),
),
positional_encoding
=
dict
(
type
=
'LearnedPositionalEncoding'
,
num_feats
=
_pos_dim_
,
row_num_embed
=
bev_h_
,
col_num_embed
=
bev_w_
,
),
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
grid_size
=
[
512
,
512
,
1
],
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
,
out_size_factor
=
4
,
assigner
=
dict
(
type
=
'HungarianAssigner3D'
,
cls_cost
=
dict
(
type
=
'FocalLossCost'
,
weight
=
2.0
),
reg_cost
=
dict
(
type
=
'BBox3DL1Cost'
,
weight
=
0.25
),
iou_cost
=
dict
(
type
=
'IoUCost'
,
weight
=
0.0
),
# Fake cost. This is just to make it compatible with DETR head.
pc_range
=
point_cloud_range
)))))
dataset_type
=
'NuSceneOcc'
data_root
=
'data/occ3d-nus/'
file_client_args
=
dict
(
backend
=
'disk'
)
occ_gt_data_root
=
'data/occ3d-nus'
train_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
,
data_root
=
occ_gt_data_root
),
dict
(
type
=
'PhotoMetricDistortionMultiViewImage'
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
with_attr_label
=
False
),
dict
(
type
=
'RandomScaleImageMultiViewImage'
,
scales
=
[
0.2
]),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
,
'voxel_semantics'
,
'mask_lidar'
,
'mask_camera'
]
)
]
test_pipeline
=
[
dict
(
type
=
'LoadMultiViewImageFromFiles'
,
to_float32
=
True
),
dict
(
type
=
'LoadOccGTFromFile'
,
data_root
=
occ_gt_data_root
),
dict
(
type
=
'NormalizeMultiviewImage'
,
**
img_norm_cfg
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1600
,
900
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'RandomScaleImageMultiViewImage'
,
scales
=
[
0.8
]),
dict
(
type
=
'PadMultiViewImage'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'CustomCollect3D'
,
keys
=
[
'img'
])
])
]
data
=
dict
(
samples_per_gpu
=
1
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
False
,
use_valid_flag
=
True
,
bev_size
=
(
bev_h_
,
bev_w_
),
queue_length
=
queue_length
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
,
samples_per_gpu
=
1
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'occ_infos_temporal_val.pkl'
,
pipeline
=
test_pipeline
,
bev_size
=
(
bev_h_
,
bev_w_
),
classes
=
class_names
,
modality
=
input_modality
),
shuffler_sampler
=
dict
(
type
=
'DistributedGroupSampler'
),
nonshuffler_sampler
=
dict
(
type
=
'DistributedSampler'
)
)
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
2e-4
,
paramwise_cfg
=
dict
(
custom_keys
=
{
'img_backbone'
:
dict
(
lr_mult
=
0.1
),
}),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'CosineAnnealing'
,
warmup
=
'linear'
,
warmup_iters
=
500
,
warmup_ratio
=
1.0
/
3
,
min_lr_ratio
=
1e-3
)
total_epochs
=
24
evaluation
=
dict
(
interval
=
1
,
pipeline
=
test_pipeline
)
runner
=
dict
(
type
=
'EpochBasedRunner'
,
max_epochs
=
total_epochs
)
# load_from = 'ckpts/r101_dcn_fcos3d_pretrain.pth'
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
checkpoint_config
=
dict
(
interval
=
1
)
autonomous_driving/occupancy_prediction/projects/configs/datasets/custom_nus-3d.py
0 → 100644
View file @
b64d9ca3
# If point cloud range is changed, the models should also change their point
# cloud range accordingly
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
# For nuScenes we usually do 10-class detection
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
dataset_type
=
'NuScenesDataset_eval_modified'
data_root
=
'data/nuscenes/'
# Input modality for nuScenes dataset, this is consistent with the submission
# format which requires the information in input_modality.
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
file_client_args
=
dict
(
backend
=
'disk'
)
# Uncomment the following if use ceph or other file clients.
# See https://mmcv.readthedocs.io/en/latest/api.html#mmcv.fileio.FileClient
# for more details.
# file_client_args = dict(
# backend='petrel',
# path_mapping=dict({
# './data/nuscenes/': 's3://nuscenes/nuscenes/',
# 'data/nuscenes/': 's3://nuscenes/nuscenes/'
# }))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
-
0.3925
,
0.3925
],
scale_ratio_range
=
[
0.95
,
1.05
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio_bev_horizontal
=
0.5
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
# construct a pipeline for data and gt loading in show function
# please keep its loading function consistent with test_pipeline (e.g. client)
eval_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
coord_type
=
'LIDAR'
,
load_dim
=
5
,
use_dim
=
5
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadPointsFromMultiSweeps'
,
sweeps_num
=
10
,
file_client_args
=
file_client_args
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
]
data
=
dict
(
samples_per_gpu
=
4
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_train.pkl'
,
pipeline
=
train_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
False
,
# we use box_type_3d='LiDAR' in kitti and nuscenes dataset
# and box_type_3d='Depth' in sunrgbd and scannet dataset.
box_type_3d
=
'LiDAR'
),
val
=
dict
(
type
=
dataset_type
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'nuscenes_infos_val.pkl'
,
pipeline
=
test_pipeline
,
classes
=
class_names
,
modality
=
input_modality
,
test_mode
=
True
,
box_type_3d
=
'LiDAR'
))
# For nuScenes dataset, we usually evaluate the model at the end of training.
# Since the models are trained by 24 epochs by default, we set evaluation
# interval to be 24. Please change the interval accordingly if you do not
# use a default schedule.
evaluation
=
dict
(
interval
=
24
,
pipeline
=
eval_pipeline
)
autonomous_driving/occupancy_prediction/projects/mmdet3d_plugin/__init__.py
0 → 100644
View file @
b64d9ca3
from
.core.bbox.assigners.hungarian_assigner_3d
import
HungarianAssigner3D
from
.core.bbox.coders.nms_free_coder
import
NMSFreeCoder
from
.core.bbox.match_costs
import
BBox3DL1Cost
from
.core.evaluation.eval_hooks
import
CustomDistEvalHook
from
.datasets.pipelines
import
(
PhotoMetricDistortionMultiViewImage
,
PadMultiViewImage
,
NormalizeMultiviewImage
,
CustomCollect3D
)
from
.models.backbones.vovnet
import
VoVNet
from
.models.utils
import
*
from
.models.opt.adamw
import
AdamW2
from
.bevformer
import
*
Prev
1
2
3
4
5
6
7
8
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment