Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
ce79da2e
Commit
ce79da2e
authored
Jun 17, 2020
by
zhangwenwei
Browse files
Merge branch 'add-tta' into 'master'
Support test time augmentation See merge request open-mmlab/mmdet.3d!70
parents
f6e95edd
3c5ff9fa
Changes
72
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
250 additions
and
1170 deletions
+250
-1170
configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py
...gs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py
+24
-10
configs/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py
...pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py
+1
-121
configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py
...illars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py
+46
-175
configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...ntpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+32
-130
configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
...ntpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+8
-90
configs/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py
...hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py
+7
-103
configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py
...pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py
+5
-94
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
+4
-236
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+9
-190
configs/votenet/votenet_16x8_sunrgbd-3d-10class.py
configs/votenet/votenet_16x8_sunrgbd-3d-10class.py
+0
-10
configs/votenet/votenet_8x8_scannet-3d-18class.py
configs/votenet/votenet_8x8_scannet-3d-18class.py
+1
-1
docs/tutorials/data_pipeline.md
docs/tutorials/data_pipeline.md
+1
-1
mmdet3d/apis/train.py
mmdet3d/apis/train.py
+2
-2
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+4
-3
mmdet3d/core/bbox/structures/__init__.py
mmdet3d/core/bbox/structures/__init__.py
+2
-1
mmdet3d/core/bbox/structures/base_box3d.py
mmdet3d/core/bbox/structures/base_box3d.py
+4
-1
mmdet3d/core/bbox/transforms.py
mmdet3d/core/bbox/transforms.py
+10
-0
mmdet3d/core/post_processing/__init__.py
mmdet3d/core/post_processing/__init__.py
+2
-1
mmdet3d/core/post_processing/box3d_nms.py
mmdet3d/core/post_processing/box3d_nms.py
+1
-1
mmdet3d/core/post_processing/merge_augs.py
mmdet3d/core/post_processing/merge_augs.py
+87
-0
No files found.
configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py
View file @
ce79da2e
...
...
@@ -180,14 +180,14 @@ train_pipeline = [
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise
_std
=
[
1.0
,
1.0
,
0.5
],
translation
_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_
uniform_nois
e
=
[
-
0.78539816
,
0.78539816
]),
rot_
rang
e
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_
uniform_nois
e
=
[
-
0.78539816
,
0.78539816
],
scal
ing_uniform_nois
e
=
[
0.95
,
1.05
]),
type
=
'GlobalRotScale
Trans
'
,
rot_
rang
e
=
[
-
0.78539816
,
0.78539816
],
scal
e_ratio_rang
e
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
...
...
@@ -197,12 +197,26 @@ train_pipeline = [
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
data
=
dict
(
...
...
configs/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py
View file @
ce79da2e
_base_
=
[
'../_base_/models/pointpillars_second_fpn.py'
,
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
]
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
# velodyne coordinates, x, y, z
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
),
# (training, testing) max_coxels
),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
],
# checked from PointCloud3D
),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
],
),
pts_neck
=
dict
(
type
=
'FPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
act_cfg
=
dict
(
type
=
'ReLU'
),
in_channels
=
[
64
,
128
,
256
],
out_channels
=
256
,
start_level
=
0
,
num_outs
=
3
,
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
256
,
feat_channels
=
256
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
50
,
-
50
,
-
1.8
,
50
,
50
,
-
1.8
]],
scales
=
[
1
,
2
,
4
],
sizes
=
[
[
0.8660
,
2.5981
,
1.
],
# 1.5/sqrt(3)
[
0.5774
,
1.7321
,
1.
],
# 1/sqrt(3)
[
1.
,
1.
,
1.
],
[
0.4
,
0.4
,
1
],
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
data
=
dict
(
train
=
dict
(
modality
=
input_modality
),
val
=
dict
(
modality
=
input_modality
),
test
=
dict
(
modality
=
input_modality
))
evaluation
=
dict
(
interval
=
24
)
configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py
View file @
ce79da2e
# model settings
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
]
voxel_size
=
[
0.16
,
0.16
,
4
]
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
32
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)),
voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
in_channels
=
4
,
feat_channels
=
[
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
496
,
432
]),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
3
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
39.68
,
-
0.6
,
70.4
,
39.68
,
-
0.6
],
[
0
,
-
39.68
,
-
0.6
,
70.4
,
39.68
,
-
0.6
],
[
0
,
-
39.68
,
-
1.78
,
70.4
,
39.68
,
-
1.78
],
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
_base_
=
[
'../_base_/models/hv_pointpillars_secfpn.py'
,
'../_base_/datasets/kitti-3d-3class.py'
,
'../_base_/schedules/cyclic_40e.py'
,
'../_base_/default_runtime.py'
]
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
]
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
Fal
se
)
# PointPillars adopted a different sampling strategies among clas
se
s
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
...
...
@@ -108,6 +20,7 @@ db_sampler = dict(
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
,
Pedestrian
=
10
,
Cyclist
=
10
))
# PointPillars uses different augmentation hyper parameters
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
...
...
@@ -115,14 +28,14 @@ train_pipeline = [
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise
_std
=
[
0.25
,
0.25
,
0.25
],
translation
_std
=
[
0.25
,
0.25
,
0.25
],
global_rot_range
=
[
0.0
,
0.0
],
rot_
uniform_nois
e
=
[
-
0.15707963267
,
0.15707963267
]),
rot_
rang
e
=
[
-
0.15707963267
,
0.15707963267
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_
uniform_nois
e
=
[
-
0.78539816
,
0.78539816
],
scal
ing_uniform_nois
e
=
[
0.95
,
1.05
]),
type
=
'GlobalRotScale
Trans
'
,
rot_
rang
e
=
[
-
0.78539816
,
0.78539816
],
scal
e_ratio_rang
e
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
...
...
@@ -131,86 +44,44 @@ train_pipeline = [
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
train
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
,
classes
=
class_names
)),
val
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
),
test
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
))
# In practice PointPillars also uses a different schedule
# optimizer
lr
=
0.001
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.01
)
lr
=
0.001
optimizer
=
dict
(
lr
=
lr
)
# max_norm=35 is slightly better than 10 for PointPillars in the earlier
# development of the codebase thus we keep the setting. But we does not
# specifically tune this parameter.
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
# Use evaluation interval=2 reduce the number of evaluation timese
evaluation
=
dict
(
interval
=
2
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
# PointPillars usually need longer schedule than second, we simply double
# the training schedule. Do remind that since we use RepeatDataset and
# repeat factor is 2, so we actually train 160 epochs.
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/pp_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
ce79da2e
# model settings
voxel_size
=
[
0.16
,
0.16
,
4
]
_base_
=
'./hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py'
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
]
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
12000
,
20000
)),
voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
in_channels
=
4
,
feat_channels
=
[
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
496
,
432
]),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
1
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
_delete_
=
True
,
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
reshape_out
=
True
)))
# model training and testing settings
train_cfg
=
dict
(
_delete_
=
True
,
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
...
...
@@ -63,20 +26,11 @@ train_cfg = dict(
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
...
...
@@ -93,14 +47,14 @@ train_pipeline = [
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise
_std
=
[
0.25
,
0.25
,
0.25
],
translation
_std
=
[
0.25
,
0.25
,
0.25
],
global_rot_range
=
[
0.0
,
0.0
],
rot_
uniform_nois
e
=
[
-
0.15707963267
,
0.15707963267
]),
rot_
rang
e
=
[
-
0.15707963267
,
0.15707963267
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_
uniform_nois
e
=
[
-
0.78539816
,
0.78539816
],
scal
ing_uniform_nois
e
=
[
0.95
,
1.05
]),
type
=
'GlobalRotScale
Trans
'
,
rot_
rang
e
=
[
-
0.78539816
,
0.78539816
],
scal
e_ratio_rang
e
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
...
...
@@ -109,84 +63,32 @@ train_pipeline = [
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
type
=
'MultiScaleFlipAug3D'
,
img_scale
=
(
1333
,
800
),
pts_scale_ratio
=
1
,
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.001
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
checkpoint_config
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/pp_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
dataset
=
dict
(
pipeline
=
train_pipeline
,
classes
=
class_names
)),
val
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
),
test
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
))
configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
View file @
ce79da2e
_base_
=
[
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
'../_base_/models/pointpillars_second_fpn.py'
,
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
,
]
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
]),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
pts_neck
=
dict
(
_delete_
=
True
,
type
=
'SECONDFPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
_delete_
=
True
,
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
...
...
@@ -69,56 +39,4 @@ model = dict(
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
))
# dataset settings
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
data
=
dict
(
train
=
dict
(
modality
=
input_modality
),
val
=
dict
(
modality
=
input_modality
),
test
=
dict
(
modality
=
input_modality
))
evaluation
=
dict
(
interval
=
24
)
reshape_out
=
True
)))
configs/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py
View file @
ce79da2e
_base_
=
[
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
'../_base_/models/pointpillars_second_fpn.py'
,
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
,
]
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pretrained
=
dict
(
pts
=
'open-mmlab://regnetx_400mf'
),
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
]),
pts_backbone
=
dict
(
_delete_
=
True
,
type
=
'NoStemRegNet'
,
arch
=
dict
(
w0
=
24
,
wa
=
24.48
,
wm
=
2.54
,
group_w
=
16
,
depth
=
22
,
bot_mul
=
1.0
),
out_indices
=
(
1
,
2
,
3
),
frozen_stages
=-
1
,
strides
=
(
1
,
2
,
2
,
2
),
base_channels
=
64
,
stem_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
norm_eval
=
False
,
style
=
'pytorch'
),
pts_neck
=
dict
(
type
=
'FPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
act_cfg
=
dict
(
type
=
'ReLU'
),
in_channels
=
[
64
,
160
,
384
],
out_channels
=
256
,
start_level
=
0
,
num_outs
=
3
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
256
,
feat_channels
=
256
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
50
,
-
50
,
-
1.8
,
50
,
50
,
-
1.8
]],
scales
=
[
1
,
2
,
4
],
sizes
=
[
[
0.8660
,
2.5981
,
1.
],
# 1.5/sqrt(3)
[
0.5774
,
1.7321
,
1.
],
# 1/sqrt(3)
[
1.
,
1.
,
1.
],
[
0.4
,
0.4
,
1
],
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
))
# dataset settings
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
)
data
=
dict
(
train
=
dict
(
modality
=
input_modality
),
val
=
dict
(
modality
=
input_modality
),
test
=
dict
(
modality
=
input_modality
))
evaluation
=
dict
(
interval
=
24
)
pts_neck
=
dict
(
in_channels
=
[
64
,
160
,
384
]))
configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py
View file @
ce79da2e
_base_
=
[
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
]
_base_
=
'./hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py'
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pretrained
=
dict
(
pts
=
'open-mmlab://regnetx_400mf'
),
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
]),
pts_backbone
=
dict
(
type
=
'NoStemRegNet'
,
arch
=
dict
(
w0
=
24
,
wa
=
24.48
,
wm
=
2.54
,
group_w
=
16
,
depth
=
22
,
bot_mul
=
1.0
),
out_indices
=
(
1
,
2
,
3
),
frozen_stages
=-
1
,
strides
=
(
1
,
2
,
2
,
2
),
base_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
norm_eval
=
False
,
style
=
'pytorch'
),
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
_delete_
=
True
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
160
,
384
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
_delete_
=
True
,
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
...
...
@@ -73,56 +35,5 @@ model = dict(
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
reshape_out
=
True
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
))
# dataset settings
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
data
=
dict
(
train
=
dict
(
modality
=
input_modality
),
val
=
dict
(
modality
=
input_modality
),
test
=
dict
(
modality
=
input_modality
))
evaluation
=
dict
(
interval
=
24
)
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
View file @
ce79da2e
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
],
),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
],
),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
# model training and testing settings
train_cfg
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
,
)),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
,
))
file_client_args
=
dict
(
backend
=
'disk'
)
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
_base_
=
[
'../_base_/models/hv_second_secfpn.py'
,
'../_base_/datasets/kitti-3d-3class.py'
,
'../_base_/schedules/cyclic_40e.py'
,
'../_base_/default_runtime.py'
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.0018
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
40
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
ce79da2e
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
_base_
=
[
'../_base_/models/hv_second_secfpn.py'
,
'../_base_/datasets/kitti-3d-car.py'
,
'../_base_/schedules/cyclic_40e.py'
,
'../_base_/default_runtime.py'
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
],
),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
],
),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
1
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
_delete_
=
True
,
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
reshape_out
=
True
)))
# model training and testing settings
train_cfg
=
dict
(
_delete_
=
True
,
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
...
...
@@ -66,146 +28,3 @@ train_cfg = dict(
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
)
file_client_args
=
dict
(
backend
=
'disk'
)
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.0018
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
40
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/votenet/votenet_16x8_sunrgbd-3d-10class.py
View file @
ce79da2e
...
...
@@ -19,13 +19,3 @@ model = dict(
[
0.404671
,
1.071108
,
1.688889
],
[
0.76584
,
1.398258
,
0.472728
]
]),
))
# optimizer
# yapf:disable
log_config
=
dict
(
interval
=
30
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
configs/votenet/votenet_8x8_scannet-3d-18class.py
View file @
ce79da2e
...
...
@@ -34,7 +34,7 @@ model = dict(
# optimizer
# yapf:disable
log_config
=
dict
(
interval
=
5
0
,
interval
=
3
0
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
...
...
docs/tutorials/data_pipeline.md
View file @
ce79da2e
...
...
@@ -117,7 +117,7 @@ For each operation, we list the related dict fields that are added/updated/remov
-
update: img, proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg
`Collect`
-
add: img_meta (the keys of img_meta is specified by
`meta_keys`
)
-
add: img_meta
s
(the keys of img_meta
s
is specified by
`meta_keys`
)
-
remove: all other keys except for those specified by
`keys`
### Test time augmentation
...
...
mmdet3d/apis/train.py
View file @
ce79da2e
...
...
@@ -28,8 +28,8 @@ def batch_processor(model, data, train_mode):
losses
=
model
(
**
data
)
loss
,
log_vars
=
parse_losses
(
losses
)
if
'img_meta'
in
data
:
num_samples
=
len
(
data
[
'img_meta'
].
data
)
if
'img_meta
s
'
in
data
:
num_samples
=
len
(
data
[
'img_meta
s
'
].
data
)
else
:
num_samples
=
len
(
data
[
'img'
].
data
)
outputs
=
dict
(
loss
=
loss
,
log_vars
=
log_vars
,
num_samples
=
num_samples
)
...
...
mmdet3d/core/bbox/__init__.py
View file @
ce79da2e
...
...
@@ -8,8 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
from
.structures
import
(
BaseInstance3DBoxes
,
Box3DMode
,
CameraInstance3DBoxes
,
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
)
from
.transforms
import
(
bbox3d2result
,
bbox3d2roi
,
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
,
xywhr2xyxyr
)
from
.transforms
import
(
bbox3d2result
,
bbox3d2roi
,
bbox3d_mapping_back
,
box3d_to_corner3d_upright_depth
,
boxes3d_to_bev_torch_lidar
)
...
...
@@ -27,5 +28,5 @@ __all__ = [
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
,
'box3d_to_corner3d_upright_depth'
,
'DepthInstance3DBoxes'
,
'BaseInstance3DBoxes'
'BaseInstance3DBoxes'
,
'bbox3d_mapping_back'
,
'xywhr2xyxyr'
]
mmdet3d/core/bbox/structures/__init__.py
View file @
ce79da2e
...
...
@@ -3,8 +3,9 @@ from .box_3d_mode import Box3DMode
from
.cam_box3d
import
CameraInstance3DBoxes
from
.depth_box3d
import
DepthInstance3DBoxes
from
.lidar_box3d
import
LiDARInstance3DBoxes
from
.utils
import
xywhr2xyxyr
__all__
=
[
'Box3DMode'
,
'BaseInstance3DBoxes'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
,
'DepthInstance3DBoxes'
'CameraInstance3DBoxes'
,
'DepthInstance3DBoxes'
,
'xywhr2xyxyr'
]
mmdet3d/core/bbox/structures/base_box3d.py
View file @
ce79da2e
...
...
@@ -334,7 +334,10 @@ class BaseInstance3DBoxes(object):
# use torch.cat (v.s. layers.cat)
# so the returned boxes never share storage with input
cat_boxes
=
cls
(
torch
.
cat
([
b
.
tensor
for
b
in
boxes_list
],
dim
=
0
))
cat_boxes
=
cls
(
torch
.
cat
([
b
.
tensor
for
b
in
boxes_list
],
dim
=
0
),
box_dim
=
boxes_list
[
0
].
tensor
.
shape
[
1
],
with_yaw
=
boxes_list
[
0
].
with_yaw
)
return
cat_boxes
def
to
(
self
,
device
):
...
...
mmdet3d/core/bbox/transforms.py
View file @
ce79da2e
import
torch
def
bbox3d_mapping_back
(
bboxes
,
scale_factor
,
flip
):
"""Map bboxes from testing scale to original image scale"""
new_bboxes
=
bboxes
.
clone
()
if
flip
:
new_bboxes
.
flip
()
new_bboxes
.
scale
(
1
/
scale_factor
)
return
new_bboxes
def
transform_lidar_to_cam
(
boxes_lidar
):
"""
Only transform format, not exactly in camera coords
...
...
mmdet3d/core/post_processing/__init__.py
View file @
ce79da2e
...
...
@@ -2,9 +2,10 @@ from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals
,
merge_aug_scores
,
multiclass_nms
)
from
.box3d_nms
import
aligned_3d_nms
,
box3d_multiclass_nms
from
.merge_augs
import
merge_aug_bboxes_3d
__all__
=
[
'multiclass_nms'
,
'merge_aug_proposals'
,
'merge_aug_bboxes'
,
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
,
'aligned_3d_nms'
'aligned_3d_nms'
,
'merge_aug_bboxes_3d'
]
mmdet3d/core/post_processing/box3d_nms.py
View file @
ce79da2e
...
...
@@ -61,7 +61,7 @@ def box3d_multiclass_nms(mlvl_bboxes,
else
:
bboxes
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_bboxes
.
size
(
-
1
)))
scores
=
mlvl_scores
.
new_zeros
((
0
,
))
labels
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_scores
.
size
(
-
1
))
)
labels
=
mlvl_scores
.
new_zeros
((
0
,
),
dtype
=
torch
.
long
)
dir_scores
=
mlvl_scores
.
new_zeros
((
0
,
))
return
bboxes
,
scores
,
labels
,
dir_scores
...
...
mmdet3d/core/post_processing/merge_augs.py
0 → 100644
View file @
ce79da2e
import
torch
from
mmdet3d.ops.iou3d.iou3d_utils
import
nms_gpu
,
nms_normal_gpu
from
..bbox
import
bbox3d2result
,
bbox3d_mapping_back
,
xywhr2xyxyr
def
merge_aug_bboxes_3d
(
aug_results
,
img_metas
,
test_cfg
):
"""Merge augmented detection 3D bboxes and scores.
Args:
aug_results (list[dict]): The dict of detection results.
The dict contains the following keys
- boxes_3d (:obj:BaseInstance3DBoxes): detection bbox
- scores_3d (torch.Tensor): detection scores
- labels_3d (torch.Tensor): predicted box labels
img_metas (list[dict]): Meta information of each sample
test_cfg (dict): Test config.
Returns:
dict: bbox results in cpu mode, containing the merged results
- boxes_3d (:obj:BaseInstance3DBoxes): merged detection bbox
- scores_3d (torch.Tensor): merged detection scores
- labels_3d (torch.Tensor): merged predicted box labels
"""
assert
len
(
aug_results
)
==
len
(
img_metas
),
\
'"aug_results" should have the same length as "img_metas", got len('
\
f
'aug_results)=
{
len
(
aug_results
)
}
and len(img_metas)=
{
len
(
img_metas
)
}
'
recovered_bboxes
=
[]
recovered_scores
=
[]
recovered_labels
=
[]
for
bboxes
,
img_info
in
zip
(
aug_results
,
img_metas
):
scale_factor
=
img_info
[
0
][
'pcd_scale_factor'
]
flip
=
img_info
[
0
][
'pcd_flip'
]
recovered_scores
.
append
(
bboxes
[
'scores_3d'
])
recovered_labels
.
append
(
bboxes
[
'labels_3d'
])
bboxes
=
bbox3d_mapping_back
(
bboxes
[
'boxes_3d'
],
scale_factor
,
flip
)
recovered_bboxes
.
append
(
bboxes
)
aug_bboxes
=
recovered_bboxes
[
0
].
cat
(
recovered_bboxes
)
aug_bboxes_for_nms
=
xywhr2xyxyr
(
aug_bboxes
.
bev
)
aug_scores
=
torch
.
cat
(
recovered_scores
,
dim
=
0
)
aug_labels
=
torch
.
cat
(
recovered_labels
,
dim
=
0
)
# TODO: use a more elegent way to deal with nms
if
test_cfg
.
use_rotate_nms
:
nms_func
=
nms_gpu
else
:
nms_func
=
nms_normal_gpu
merged_bboxes
=
[]
merged_scores
=
[]
merged_labels
=
[]
# Apply multi-class nms when merge bboxes
if
len
(
aug_labels
)
==
0
:
return
bbox3d2result
(
aug_bboxes
,
aug_scores
,
aug_labels
)
for
class_id
in
range
(
torch
.
max
(
aug_labels
).
item
()
+
1
):
class_inds
=
(
aug_labels
==
class_id
)
bboxes_i
=
aug_bboxes
[
class_inds
]
bboxes_nms_i
=
aug_bboxes_for_nms
[
class_inds
,
:]
scores_i
=
aug_scores
[
class_inds
]
labels_i
=
aug_labels
[
class_inds
]
if
len
(
bboxes_nms_i
)
==
0
:
continue
selected
=
nms_func
(
bboxes_nms_i
,
scores_i
,
test_cfg
.
nms_thr
)
merged_bboxes
.
append
(
bboxes_i
[
selected
,
:])
merged_scores
.
append
(
scores_i
[
selected
])
merged_labels
.
append
(
labels_i
[
selected
])
merged_bboxes
=
merged_bboxes
[
0
].
cat
(
merged_bboxes
)
merged_scores
=
torch
.
cat
(
merged_scores
,
dim
=
0
)
merged_labels
=
torch
.
cat
(
merged_labels
,
dim
=
0
)
_
,
order
=
merged_scores
.
sort
(
0
,
descending
=
True
)
num
=
min
(
test_cfg
.
max_num
,
len
(
aug_bboxes
))
order
=
order
[:
num
]
merged_bboxes
=
merged_bboxes
[
order
]
merged_scores
=
merged_scores
[
order
]
merged_labels
=
merged_labels
[
order
]
return
bbox3d2result
(
merged_bboxes
,
merged_scores
,
merged_labels
)
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment