Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
3c5ff9fa
Commit
3c5ff9fa
authored
Jun 17, 2020
by
zhangwenwei
Browse files
Support test time augmentation
parent
f6e95edd
Changes
72
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
250 additions
and
1170 deletions
+250
-1170
configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py
...gs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py
+24
-10
configs/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py
...pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py
+1
-121
configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py
...illars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py
+46
-175
configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...ntpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+32
-130
configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
...ntpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+8
-90
configs/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py
...hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py
+7
-103
configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py
...pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py
+5
-94
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
+4
-236
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+9
-190
configs/votenet/votenet_16x8_sunrgbd-3d-10class.py
configs/votenet/votenet_16x8_sunrgbd-3d-10class.py
+0
-10
configs/votenet/votenet_8x8_scannet-3d-18class.py
configs/votenet/votenet_8x8_scannet-3d-18class.py
+1
-1
docs/tutorials/data_pipeline.md
docs/tutorials/data_pipeline.md
+1
-1
mmdet3d/apis/train.py
mmdet3d/apis/train.py
+2
-2
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+4
-3
mmdet3d/core/bbox/structures/__init__.py
mmdet3d/core/bbox/structures/__init__.py
+2
-1
mmdet3d/core/bbox/structures/base_box3d.py
mmdet3d/core/bbox/structures/base_box3d.py
+4
-1
mmdet3d/core/bbox/transforms.py
mmdet3d/core/bbox/transforms.py
+10
-0
mmdet3d/core/post_processing/__init__.py
mmdet3d/core/post_processing/__init__.py
+2
-1
mmdet3d/core/post_processing/box3d_nms.py
mmdet3d/core/post_processing/box3d_nms.py
+1
-1
mmdet3d/core/post_processing/merge_augs.py
mmdet3d/core/post_processing/merge_augs.py
+87
-0
No files found.
configs/parta2/hv_PartA2_secfpn_2x8_cyclic_80e_kitti-3d-car.py
View file @
3c5ff9fa
...
@@ -180,14 +180,14 @@ train_pipeline = [
...
@@ -180,14 +180,14 @@ train_pipeline = [
dict
(
dict
(
type
=
'ObjectNoise'
,
type
=
'ObjectNoise'
,
num_try
=
100
,
num_try
=
100
,
loc_noise
_std
=
[
1.0
,
1.0
,
0.5
],
translation
_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
global_rot_range
=
[
0.0
,
0.0
],
rot_
uniform_nois
e
=
[
-
0.78539816
,
0.78539816
]),
rot_
rang
e
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
dict
(
type
=
'GlobalRotScale'
,
type
=
'GlobalRotScale
Trans
'
,
rot_
uniform_nois
e
=
[
-
0.78539816
,
0.78539816
],
rot_
rang
e
=
[
-
0.78539816
,
0.78539816
],
scal
ing_uniform_nois
e
=
[
0.95
,
1.05
]),
scal
e_ratio_rang
e
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
dict
(
type
=
'ObjectNameFilter'
,
classes
=
class_names
),
...
@@ -197,12 +197,26 @@ train_pipeline = [
...
@@ -197,12 +197,26 @@ train_pipeline = [
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
dict
(
type
=
'DefaultFormatBundle3D'
,
type
=
'MultiScaleFlipAug3D'
,
class_names
=
class_names
,
img_scale
=
(
1333
,
800
),
with_label
=
False
),
pts_scale_ratio
=
1
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
]
data
=
dict
(
data
=
dict
(
...
...
configs/pointpillars/hv_pointpillars_fpn_sbn-all_4x8_2x_nus-3d.py
View file @
3c5ff9fa
_base_
=
[
_base_
=
[
'../_base_/models/pointpillars_second_fpn.py'
,
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
'../_base_/default_runtime.py'
]
]
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
# velodyne coordinates, x, y, z
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
),
# (training, testing) max_coxels
),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
],
# checked from PointCloud3D
),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
],
),
pts_neck
=
dict
(
type
=
'FPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
act_cfg
=
dict
(
type
=
'ReLU'
),
in_channels
=
[
64
,
128
,
256
],
out_channels
=
256
,
start_level
=
0
,
num_outs
=
3
,
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
256
,
feat_channels
=
256
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
50
,
-
50
,
-
1.8
,
50
,
50
,
-
1.8
]],
scales
=
[
1
,
2
,
4
],
sizes
=
[
[
0.8660
,
2.5981
,
1.
],
# 1.5/sqrt(3)
[
0.5774
,
1.7321
,
1.
],
# 1/sqrt(3)
[
1.
,
1.
,
1.
],
[
0.4
,
0.4
,
1
],
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
# dataset settings
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
data
=
dict
(
train
=
dict
(
modality
=
input_modality
),
val
=
dict
(
modality
=
input_modality
),
test
=
dict
(
modality
=
input_modality
))
evaluation
=
dict
(
interval
=
24
)
configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py
View file @
3c5ff9fa
# model settings
_base_
=
[
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
]
'../_base_/models/hv_pointpillars_secfpn.py'
,
voxel_size
=
[
0.16
,
0.16
,
4
]
'../_base_/datasets/kitti-3d-3class.py'
,
model
=
dict
(
'../_base_/schedules/cyclic_40e.py'
,
'../_base_/default_runtime.py'
type
=
'VoxelNet'
,
]
voxel_layer
=
dict
(
max_num_points
=
32
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)),
voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
in_channels
=
4
,
feat_channels
=
[
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
496
,
432
]),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
3
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
39.68
,
-
0.6
,
70.4
,
39.68
,
-
0.6
],
[
0
,
-
39.68
,
-
0.6
,
70.4
,
39.68
,
-
0.6
],
[
0
,
-
39.68
,
-
1.78
,
70.4
,
39.68
,
-
1.78
],
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
]
# dataset settings
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
Fal
se
)
# PointPillars adopted a different sampling strategies among clas
se
s
db_sampler
=
dict
(
db_sampler
=
dict
(
data_root
=
data_root
,
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
...
@@ -108,6 +20,7 @@ db_sampler = dict(
...
@@ -108,6 +20,7 @@ db_sampler = dict(
classes
=
class_names
,
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
,
Pedestrian
=
10
,
Cyclist
=
10
))
sample_groups
=
dict
(
Car
=
15
,
Pedestrian
=
10
,
Cyclist
=
10
))
# PointPillars uses different augmentation hyper parameters
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
),
...
@@ -115,14 +28,14 @@ train_pipeline = [
...
@@ -115,14 +28,14 @@ train_pipeline = [
dict
(
dict
(
type
=
'ObjectNoise'
,
type
=
'ObjectNoise'
,
num_try
=
100
,
num_try
=
100
,
loc_noise
_std
=
[
0.25
,
0.25
,
0.25
],
translation
_std
=
[
0.25
,
0.25
,
0.25
],
global_rot_range
=
[
0.0
,
0.0
],
global_rot_range
=
[
0.0
,
0.0
],
rot_
uniform_nois
e
=
[
-
0.15707963267
,
0.15707963267
]),
rot_
rang
e
=
[
-
0.15707963267
,
0.15707963267
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
dict
(
type
=
'GlobalRotScale'
,
type
=
'GlobalRotScale
Trans
'
,
rot_
uniform_nois
e
=
[
-
0.78539816
,
0.78539816
],
rot_
rang
e
=
[
-
0.78539816
,
0.78539816
],
scal
ing_uniform_nois
e
=
[
0.95
,
1.05
]),
scal
e_ratio_rang
e
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'PointShuffle'
),
...
@@ -131,86 +44,44 @@ train_pipeline = [
...
@@ -131,86 +44,44 @@ train_pipeline = [
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
dict
(
type
=
'DefaultFormatBundle3D'
,
type
=
'MultiScaleFlipAug3D'
,
class_names
=
class_names
,
img_scale
=
(
1333
,
800
),
with_label
=
False
),
pts_scale_ratio
=
1
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
]
data
=
dict
(
data
=
dict
(
samples_per_gpu
=
6
,
train
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
,
classes
=
class_names
)),
workers_per_gpu
=
4
,
val
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
),
train
=
dict
(
test
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
))
type
=
'RepeatDataset'
,
times
=
2
,
# In practice PointPillars also uses a different schedule
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
# optimizer
lr
=
0.001
# max learning rate
lr
=
0.001
optimizer
=
dict
(
optimizer
=
dict
(
lr
=
lr
)
type
=
'AdamW'
,
# max_norm=35 is slightly better than 10 for PointPillars in the earlier
lr
=
lr
,
# development of the codebase thus we keep the setting. But we does not
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
# specifically tune this parameter.
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
# Use evaluation interval=2 reduce the number of evaluation timese
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
2
)
evaluation
=
dict
(
interval
=
2
)
# yapf:disable
# PointPillars usually need longer schedule than second, we simply double
log_config
=
dict
(
# the training schedule. Do remind that since we use RepeatDataset and
interval
=
50
,
# repeat factor is 2, so we actually train 160 epochs.
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/pp_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/pointpillars/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
3c5ff9fa
# model settings
# model settings
voxel_size
=
[
0.16
,
0.16
,
4
]
_base_
=
'./hv_pointpillars_secfpn_6x8_160e_kitti-3d-3class.py'
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
]
point_cloud_range
=
[
0
,
-
39.68
,
-
3
,
69.12
,
39.68
,
1
]
model
=
dict
(
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
12000
,
20000
)),
voxel_encoder
=
dict
(
type
=
'PillarFeatureNet'
,
in_channels
=
4
,
feat_channels
=
[
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
point_cloud_range
=
point_cloud_range
),
middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
496
,
432
]),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
bbox_head
=
dict
(
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
type
=
'Anchor3DHead'
,
num_classes
=
1
,
num_classes
=
1
,
in_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
anchor_generator
=
dict
(
_delete_
=
True
,
type
=
'Anchor3DRangeGenerator'
,
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
]],
ranges
=
[[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
reshape_out
=
True
)))
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
# model training and testing settings
train_cfg
=
dict
(
train_cfg
=
dict
(
_delete_
=
True
,
assigner
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
...
@@ -63,20 +26,11 @@ train_cfg = dict(
...
@@ -63,20 +26,11 @@ train_cfg = dict(
allowed_border
=
0
,
allowed_border
=
0
,
pos_weight
=-
1
,
pos_weight
=-
1
,
debug
=
False
)
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
# dataset settings
dataset_type
=
'KittiDataset'
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
class_names
=
[
'Car'
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
db_sampler
=
dict
(
data_root
=
data_root
,
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
...
@@ -93,14 +47,14 @@ train_pipeline = [
...
@@ -93,14 +47,14 @@ train_pipeline = [
dict
(
dict
(
type
=
'ObjectNoise'
,
type
=
'ObjectNoise'
,
num_try
=
100
,
num_try
=
100
,
loc_noise
_std
=
[
0.25
,
0.25
,
0.25
],
translation
_std
=
[
0.25
,
0.25
,
0.25
],
global_rot_range
=
[
0.0
,
0.0
],
global_rot_range
=
[
0.0
,
0.0
],
rot_
uniform_nois
e
=
[
-
0.15707963267
,
0.15707963267
]),
rot_
rang
e
=
[
-
0.15707963267
,
0.15707963267
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
dict
(
type
=
'GlobalRotScale'
,
type
=
'GlobalRotScale
Trans
'
,
rot_
uniform_nois
e
=
[
-
0.78539816
,
0.78539816
],
rot_
rang
e
=
[
-
0.78539816
,
0.78539816
],
scal
ing_uniform_nois
e
=
[
0.95
,
1.05
]),
scal
e_ratio_rang
e
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'PointShuffle'
),
...
@@ -109,84 +63,32 @@ train_pipeline = [
...
@@ -109,84 +63,32 @@ train_pipeline = [
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
dict
(
type
=
'DefaultFormatBundle3D'
,
type
=
'MultiScaleFlipAug3D'
,
class_names
=
class_names
,
img_scale
=
(
1333
,
800
),
with_label
=
False
),
pts_scale_ratio
=
1
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
flip
=
False
,
transforms
=
[
dict
(
type
=
'GlobalRotScaleTrans'
,
rot_range
=
[
0
,
0
],
scale_ratio_range
=
[
1.
,
1.
],
translation_std
=
[
0
,
0
,
0
]),
dict
(
type
=
'RandomFlip3D'
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
])
])
]
]
data
=
dict
(
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
train
=
dict
(
type
=
'RepeatDataset'
,
type
=
'RepeatDataset'
,
times
=
2
,
times
=
2
,
dataset
=
dict
(
dataset
=
dict
(
pipeline
=
train_pipeline
,
classes
=
class_names
)),
type
=
dataset_type
,
val
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
),
data_root
=
data_root
,
test
=
dict
(
pipeline
=
test_pipeline
,
classes
=
class_names
))
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.001
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
35
,
norm_type
=
2
))
# learning policy
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
checkpoint_config
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/pp_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/pointpillars/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
View file @
3c5ff9fa
_base_
=
[
_base_
=
[
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/models/pointpillars_second_fpn.py'
,
'../_base_/default_runtime.py'
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
,
]
]
# model settings
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
]),
pts_backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
layer_nums
=
[
3
,
5
,
5
],
layer_strides
=
[
2
,
2
,
2
],
out_channels
=
[
64
,
128
,
256
]),
pts_neck
=
dict
(
pts_neck
=
dict
(
_delete_
=
True
,
type
=
'SECONDFPN'
,
type
=
'SECONDFPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
128
,
256
],
in_channels
=
[
64
,
128
,
256
],
upsample_strides
=
[
1
,
2
,
4
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
out_channels
=
[
128
,
128
,
128
]),
pts_bbox_head
=
dict
(
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
384
,
in_channels
=
384
,
feat_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
_delete_
=
True
,
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
...
@@ -69,56 +39,4 @@ model = dict(
...
@@ -69,56 +39,4 @@ model = dict(
],
],
custom_values
=
[
0
,
0
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
reshape_out
=
True
)))
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
))
# dataset settings
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
,
use_radar
=
False
,
use_map
=
False
,
use_external
=
False
)
data
=
dict
(
train
=
dict
(
modality
=
input_modality
),
val
=
dict
(
modality
=
input_modality
),
test
=
dict
(
modality
=
input_modality
))
evaluation
=
dict
(
interval
=
24
)
configs/regnet/hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py
View file @
3c5ff9fa
_base_
=
[
_base_
=
[
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/models/pointpillars_second_fpn.py'
,
'../_base_/default_runtime.py'
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
,
]
]
# model settings
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
type
=
'MVXFasterRCNNV2'
,
pretrained
=
dict
(
pts
=
'open-mmlab://regnetx_400mf'
),
pretrained
=
dict
(
pts
=
'open-mmlab://regnetx_400mf'
),
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
]),
pts_backbone
=
dict
(
pts_backbone
=
dict
(
_delete_
=
True
,
type
=
'NoStemRegNet'
,
type
=
'NoStemRegNet'
,
arch
=
dict
(
w0
=
24
,
wa
=
24.48
,
wm
=
2.54
,
group_w
=
16
,
depth
=
22
,
bot_mul
=
1.0
),
arch
=
dict
(
w0
=
24
,
wa
=
24.48
,
wm
=
2.54
,
group_w
=
16
,
depth
=
22
,
bot_mul
=
1.0
),
out_indices
=
(
1
,
2
,
3
),
out_indices
=
(
1
,
2
,
3
),
frozen_stages
=-
1
,
frozen_stages
=-
1
,
strides
=
(
1
,
2
,
2
,
2
),
strides
=
(
1
,
2
,
2
,
2
),
base_channels
=
64
,
base_channels
=
64
,
stem_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
norm_eval
=
False
,
norm_eval
=
False
,
style
=
'pytorch'
),
style
=
'pytorch'
),
pts_neck
=
dict
(
pts_neck
=
dict
(
in_channels
=
[
64
,
160
,
384
]))
type
=
'FPN'
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
act_cfg
=
dict
(
type
=
'ReLU'
),
in_channels
=
[
64
,
160
,
384
],
out_channels
=
256
,
start_level
=
0
,
num_outs
=
3
),
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
256
,
feat_channels
=
256
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[[
-
50
,
-
50
,
-
1.8
,
50
,
50
,
-
1.8
]],
scales
=
[
1
,
2
,
4
],
sizes
=
[
[
0.8660
,
2.5981
,
1.
],
# 1.5/sqrt(3)
[
0.5774
,
1.7321
,
1.
],
# 1/sqrt(3)
[
1.
,
1.
,
1.
],
[
0.4
,
0.4
,
1
],
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
))
# dataset settings
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
)
data
=
dict
(
train
=
dict
(
modality
=
input_modality
),
val
=
dict
(
modality
=
input_modality
),
test
=
dict
(
modality
=
input_modality
))
evaluation
=
dict
(
interval
=
24
)
configs/regnet/hv_pointpillars_regnet-400mf_secfpn_sbn-all_4x8_2x_nus-3d.py
View file @
3c5ff9fa
_base_
=
[
_base_
=
'./hv_pointpillars_regnet-400mf_fpn_sbn-all_4x8_2x_nus-3d.py'
'../_base_/datasets/nus-3d.py'
,
'../_base_/schedules/schedule_2x.py'
,
'../_base_/default_runtime.py'
]
# model settings
# model settings
voxel_size
=
[
0.25
,
0.25
,
8
]
point_cloud_range
=
[
-
50
,
-
50
,
-
5
,
50
,
50
,
3
]
class_names
=
[
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
]
model
=
dict
(
model
=
dict
(
type
=
'MVXFasterRCNNV2'
,
pretrained
=
dict
(
pts
=
'open-mmlab://regnetx_400mf'
),
pts_voxel_layer
=
dict
(
max_num_points
=
64
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
30000
,
40000
)),
pts_voxel_encoder
=
dict
(
type
=
'HardVFE'
,
in_channels
=
4
,
feat_channels
=
[
64
,
64
],
with_distance
=
False
,
voxel_size
=
voxel_size
,
with_cluster_center
=
True
,
with_voxel_center
=
True
,
point_cloud_range
=
point_cloud_range
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN1d'
,
eps
=
1e-3
,
momentum
=
0.01
)),
pts_middle_encoder
=
dict
(
type
=
'PointPillarsScatter'
,
in_channels
=
64
,
output_shape
=
[
400
,
400
]),
pts_backbone
=
dict
(
type
=
'NoStemRegNet'
,
arch
=
dict
(
w0
=
24
,
wa
=
24.48
,
wm
=
2.54
,
group_w
=
16
,
depth
=
22
,
bot_mul
=
1.0
),
out_indices
=
(
1
,
2
,
3
),
frozen_stages
=-
1
,
strides
=
(
1
,
2
,
2
,
2
),
base_channels
=
64
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
norm_eval
=
False
,
style
=
'pytorch'
),
pts_neck
=
dict
(
pts_neck
=
dict
(
type
=
'SECONDFPN'
,
type
=
'SECONDFPN'
,
_delete_
=
True
,
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
norm_cfg
=
dict
(
type
=
'naiveSyncBN2d'
,
eps
=
1e-3
,
momentum
=
0.01
),
in_channels
=
[
64
,
160
,
384
],
in_channels
=
[
64
,
160
,
384
],
upsample_strides
=
[
1
,
2
,
4
],
upsample_strides
=
[
1
,
2
,
4
],
out_channels
=
[
128
,
128
,
128
]),
out_channels
=
[
128
,
128
,
128
]),
pts_bbox_head
=
dict
(
pts_bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
type
=
'Anchor3DHead'
,
num_classes
=
10
,
in_channels
=
384
,
in_channels
=
384
,
feat_channels
=
384
,
feat_channels
=
384
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
_delete_
=
True
,
type
=
'AlignedAnchor3DRangeGenerator'
,
ranges
=
[
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
...
@@ -73,56 +35,5 @@ model = dict(
...
@@ -73,56 +35,5 @@ model = dict(
],
],
custom_values
=
[
0
,
0
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
reshape_out
=
True
)))
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
,
code_size
=
9
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
1.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)))
# model training and testing settings
# model training and testing settings
train_cfg
=
dict
(
pts
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.3
,
min_pos_iou
=
0.3
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
code_weight
=
[
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
1.0
,
0.2
,
0.2
],
pos_weight
=-
1
,
debug
=
False
))
test_cfg
=
dict
(
pts
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_pre
=
1000
,
nms_thr
=
0.2
,
score_thr
=
0.05
,
min_bbox_size
=
0
,
max_num
=
500
))
# dataset settings
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
,
)
data
=
dict
(
train
=
dict
(
modality
=
input_modality
),
val
=
dict
(
modality
=
input_modality
),
test
=
dict
(
modality
=
input_modality
))
evaluation
=
dict
(
interval
=
24
)
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-3class.py
View file @
3c5ff9fa
# model settings
_base_
=
[
voxel_size
=
[
0.05
,
0.05
,
0.1
]
'../_base_/models/hv_second_secfpn.py'
,
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
'../_base_/datasets/kitti-3d-3class.py'
,
'../_base_/schedules/cyclic_40e.py'
,
'../_base_/default_runtime.py'
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
],
),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
],
),
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
num_classes
=
3
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
# model training and testing settings
train_cfg
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.35
,
neg_iou_thr
=
0.2
,
min_pos_iou
=
0.2
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
,
)),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
,
))
file_client_args
=
dict
(
backend
=
'disk'
)
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.0018
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
40
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/second/hv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
3c5ff9fa
# model settings
_base_
=
[
voxel_size
=
[
0.05
,
0.05
,
0.1
]
'../_base_/models/hv_second_secfpn.py'
,
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
'../_base_/datasets/kitti-3d-car.py'
,
'../_base_/schedules/cyclic_40e.py'
,
'../_base_/default_runtime.py'
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
model
=
dict
(
model
=
dict
(
type
=
'VoxelNet'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
),
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'HardSimpleVFE'
),
middle_encoder
=
dict
(
type
=
'SparseEncoder'
,
in_channels
=
4
,
sparse_shape
=
[
41
,
1600
,
1408
],
order
=
(
'conv'
,
'norm'
,
'act'
)),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
],
),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
],
),
bbox_head
=
dict
(
bbox_head
=
dict
(
type
=
'Anchor3DHead'
,
type
=
'Anchor3DHead'
,
num_classes
=
1
,
num_classes
=
1
,
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
anchor_generator
=
dict
(
anchor_generator
=
dict
(
_delete_
=
True
,
type
=
'Anchor3DRangeGenerator'
,
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
reshape_out
=
True
)))
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
),
)
# model training and testing settings
# model training and testing settings
train_cfg
=
dict
(
train_cfg
=
dict
(
_delete_
=
True
,
assigner
=
dict
(
assigner
=
dict
(
type
=
'MaxIoUAssigner'
,
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
...
@@ -66,146 +28,3 @@ train_cfg = dict(
...
@@ -66,146 +28,3 @@ train_cfg = dict(
allowed_border
=
0
,
allowed_border
=
0
,
pos_weight
=-
1
,
pos_weight
=-
1
,
debug
=
False
)
debug
=
False
)
test_cfg
=
dict
(
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.01
,
score_thr
=
0.1
,
min_bbox_size
=
0
,
nms_pre
=
100
,
max_num
=
50
)
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
input_modality
=
dict
(
use_lidar
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
data_root
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
classes
=
class_names
,
sample_groups
=
dict
(
Car
=
15
),
)
file_client_args
=
dict
(
backend
=
'disk'
)
# file_client_args = dict(
# backend='petrel', path_mapping=dict(data='s3://kitti_data/'))
train_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'LoadAnnotations3D'
,
with_bbox_3d
=
True
,
with_label_3d
=
True
,
file_client_args
=
file_client_args
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'LoadPointsFromFile'
,
load_dim
=
4
,
use_dim
=
4
,
file_client_args
=
file_client_args
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
]),
]
data
=
dict
(
samples_per_gpu
=
6
,
workers_per_gpu
=
4
,
train
=
dict
(
type
=
'RepeatDataset'
,
times
=
2
,
dataset
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
False
)),
val
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
),
test
=
dict
(
type
=
dataset_type
,
data_root
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pts_prefix
=
'velodyne_reduced'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
classes
=
class_names
,
test_mode
=
True
))
# optimizer
lr
=
0.0018
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
checkpoint_config
=
dict
(
interval
=
1
)
evaluation
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
40
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/sec_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/votenet/votenet_16x8_sunrgbd-3d-10class.py
View file @
3c5ff9fa
...
@@ -19,13 +19,3 @@ model = dict(
...
@@ -19,13 +19,3 @@ model = dict(
[
0.404671
,
1.071108
,
1.688889
],
[
0.76584
,
1.398258
,
0.472728
]
[
0.404671
,
1.071108
,
1.688889
],
[
0.76584
,
1.398258
,
0.472728
]
]),
]),
))
))
# optimizer
# yapf:disable
log_config
=
dict
(
interval
=
30
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
configs/votenet/votenet_8x8_scannet-3d-18class.py
View file @
3c5ff9fa
...
@@ -34,7 +34,7 @@ model = dict(
...
@@ -34,7 +34,7 @@ model = dict(
# optimizer
# optimizer
# yapf:disable
# yapf:disable
log_config
=
dict
(
log_config
=
dict
(
interval
=
5
0
,
interval
=
3
0
,
hooks
=
[
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
dict
(
type
=
'TensorboardLoggerHook'
)
...
...
docs/tutorials/data_pipeline.md
View file @
3c5ff9fa
...
@@ -117,7 +117,7 @@ For each operation, we list the related dict fields that are added/updated/remov
...
@@ -117,7 +117,7 @@ For each operation, we list the related dict fields that are added/updated/remov
-
update: img, proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg
-
update: img, proposals, gt_bboxes, gt_bboxes_ignore, gt_labels, gt_masks, gt_semantic_seg
`Collect`
`Collect`
-
add: img_meta (the keys of img_meta is specified by
`meta_keys`
)
-
add: img_meta
s
(the keys of img_meta
s
is specified by
`meta_keys`
)
-
remove: all other keys except for those specified by
`keys`
-
remove: all other keys except for those specified by
`keys`
### Test time augmentation
### Test time augmentation
...
...
mmdet3d/apis/train.py
View file @
3c5ff9fa
...
@@ -28,8 +28,8 @@ def batch_processor(model, data, train_mode):
...
@@ -28,8 +28,8 @@ def batch_processor(model, data, train_mode):
losses
=
model
(
**
data
)
losses
=
model
(
**
data
)
loss
,
log_vars
=
parse_losses
(
losses
)
loss
,
log_vars
=
parse_losses
(
losses
)
if
'img_meta'
in
data
:
if
'img_meta
s
'
in
data
:
num_samples
=
len
(
data
[
'img_meta'
].
data
)
num_samples
=
len
(
data
[
'img_meta
s
'
].
data
)
else
:
else
:
num_samples
=
len
(
data
[
'img'
].
data
)
num_samples
=
len
(
data
[
'img'
].
data
)
outputs
=
dict
(
loss
=
loss
,
log_vars
=
log_vars
,
num_samples
=
num_samples
)
outputs
=
dict
(
loss
=
loss
,
log_vars
=
log_vars
,
num_samples
=
num_samples
)
...
...
mmdet3d/core/bbox/__init__.py
View file @
3c5ff9fa
...
@@ -8,8 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
...
@@ -8,8 +8,9 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
PseudoSampler
,
RandomSampler
,
SamplingResult
)
from
.structures
import
(
BaseInstance3DBoxes
,
Box3DMode
,
CameraInstance3DBoxes
,
from
.structures
import
(
BaseInstance3DBoxes
,
Box3DMode
,
CameraInstance3DBoxes
,
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
)
DepthInstance3DBoxes
,
LiDARInstance3DBoxes
,
from
.transforms
import
(
bbox3d2result
,
bbox3d2roi
,
xywhr2xyxyr
)
from
.transforms
import
(
bbox3d2result
,
bbox3d2roi
,
bbox3d_mapping_back
,
box3d_to_corner3d_upright_depth
,
box3d_to_corner3d_upright_depth
,
boxes3d_to_bev_torch_lidar
)
boxes3d_to_bev_torch_lidar
)
...
@@ -27,5 +28,5 @@ __all__ = [
...
@@ -27,5 +28,5 @@ __all__ = [
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
,
'CameraInstance3DBoxes'
,
'bbox3d2roi'
,
'bbox3d2result'
,
'box3d_to_corner3d_upright_depth'
,
'DepthInstance3DBoxes'
,
'box3d_to_corner3d_upright_depth'
,
'DepthInstance3DBoxes'
,
'BaseInstance3DBoxes'
'BaseInstance3DBoxes'
,
'bbox3d_mapping_back'
,
'xywhr2xyxyr'
]
]
mmdet3d/core/bbox/structures/__init__.py
View file @
3c5ff9fa
...
@@ -3,8 +3,9 @@ from .box_3d_mode import Box3DMode
...
@@ -3,8 +3,9 @@ from .box_3d_mode import Box3DMode
from
.cam_box3d
import
CameraInstance3DBoxes
from
.cam_box3d
import
CameraInstance3DBoxes
from
.depth_box3d
import
DepthInstance3DBoxes
from
.depth_box3d
import
DepthInstance3DBoxes
from
.lidar_box3d
import
LiDARInstance3DBoxes
from
.lidar_box3d
import
LiDARInstance3DBoxes
from
.utils
import
xywhr2xyxyr
__all__
=
[
__all__
=
[
'Box3DMode'
,
'BaseInstance3DBoxes'
,
'LiDARInstance3DBoxes'
,
'Box3DMode'
,
'BaseInstance3DBoxes'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
,
'DepthInstance3DBoxes'
'CameraInstance3DBoxes'
,
'DepthInstance3DBoxes'
,
'xywhr2xyxyr'
]
]
mmdet3d/core/bbox/structures/base_box3d.py
View file @
3c5ff9fa
...
@@ -334,7 +334,10 @@ class BaseInstance3DBoxes(object):
...
@@ -334,7 +334,10 @@ class BaseInstance3DBoxes(object):
# use torch.cat (v.s. layers.cat)
# use torch.cat (v.s. layers.cat)
# so the returned boxes never share storage with input
# so the returned boxes never share storage with input
cat_boxes
=
cls
(
torch
.
cat
([
b
.
tensor
for
b
in
boxes_list
],
dim
=
0
))
cat_boxes
=
cls
(
torch
.
cat
([
b
.
tensor
for
b
in
boxes_list
],
dim
=
0
),
box_dim
=
boxes_list
[
0
].
tensor
.
shape
[
1
],
with_yaw
=
boxes_list
[
0
].
with_yaw
)
return
cat_boxes
return
cat_boxes
def
to
(
self
,
device
):
def
to
(
self
,
device
):
...
...
mmdet3d/core/bbox/transforms.py
View file @
3c5ff9fa
import
torch
import
torch
def
bbox3d_mapping_back
(
bboxes
,
scale_factor
,
flip
):
"""Map bboxes from testing scale to original image scale"""
new_bboxes
=
bboxes
.
clone
()
if
flip
:
new_bboxes
.
flip
()
new_bboxes
.
scale
(
1
/
scale_factor
)
return
new_bboxes
def
transform_lidar_to_cam
(
boxes_lidar
):
def
transform_lidar_to_cam
(
boxes_lidar
):
"""
"""
Only transform format, not exactly in camera coords
Only transform format, not exactly in camera coords
...
...
mmdet3d/core/post_processing/__init__.py
View file @
3c5ff9fa
...
@@ -2,9 +2,10 @@ from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
...
@@ -2,9 +2,10 @@ from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals
,
merge_aug_scores
,
merge_aug_proposals
,
merge_aug_scores
,
multiclass_nms
)
multiclass_nms
)
from
.box3d_nms
import
aligned_3d_nms
,
box3d_multiclass_nms
from
.box3d_nms
import
aligned_3d_nms
,
box3d_multiclass_nms
from
.merge_augs
import
merge_aug_bboxes_3d
__all__
=
[
__all__
=
[
'multiclass_nms'
,
'merge_aug_proposals'
,
'merge_aug_bboxes'
,
'multiclass_nms'
,
'merge_aug_proposals'
,
'merge_aug_bboxes'
,
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
,
'merge_aug_scores'
,
'merge_aug_masks'
,
'box3d_multiclass_nms'
,
'aligned_3d_nms'
'aligned_3d_nms'
,
'merge_aug_bboxes_3d'
]
]
mmdet3d/core/post_processing/box3d_nms.py
View file @
3c5ff9fa
...
@@ -61,7 +61,7 @@ def box3d_multiclass_nms(mlvl_bboxes,
...
@@ -61,7 +61,7 @@ def box3d_multiclass_nms(mlvl_bboxes,
else
:
else
:
bboxes
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_bboxes
.
size
(
-
1
)))
bboxes
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_bboxes
.
size
(
-
1
)))
scores
=
mlvl_scores
.
new_zeros
((
0
,
))
scores
=
mlvl_scores
.
new_zeros
((
0
,
))
labels
=
mlvl_scores
.
new_zeros
((
0
,
mlvl_scores
.
size
(
-
1
))
)
labels
=
mlvl_scores
.
new_zeros
((
0
,
),
dtype
=
torch
.
long
)
dir_scores
=
mlvl_scores
.
new_zeros
((
0
,
))
dir_scores
=
mlvl_scores
.
new_zeros
((
0
,
))
return
bboxes
,
scores
,
labels
,
dir_scores
return
bboxes
,
scores
,
labels
,
dir_scores
...
...
mmdet3d/core/post_processing/merge_augs.py
0 → 100644
View file @
3c5ff9fa
import
torch
from
mmdet3d.ops.iou3d.iou3d_utils
import
nms_gpu
,
nms_normal_gpu
from
..bbox
import
bbox3d2result
,
bbox3d_mapping_back
,
xywhr2xyxyr
def
merge_aug_bboxes_3d
(
aug_results
,
img_metas
,
test_cfg
):
"""Merge augmented detection 3D bboxes and scores.
Args:
aug_results (list[dict]): The dict of detection results.
The dict contains the following keys
- boxes_3d (:obj:BaseInstance3DBoxes): detection bbox
- scores_3d (torch.Tensor): detection scores
- labels_3d (torch.Tensor): predicted box labels
img_metas (list[dict]): Meta information of each sample
test_cfg (dict): Test config.
Returns:
dict: bbox results in cpu mode, containing the merged results
- boxes_3d (:obj:BaseInstance3DBoxes): merged detection bbox
- scores_3d (torch.Tensor): merged detection scores
- labels_3d (torch.Tensor): merged predicted box labels
"""
assert
len
(
aug_results
)
==
len
(
img_metas
),
\
'"aug_results" should have the same length as "img_metas", got len('
\
f
'aug_results)=
{
len
(
aug_results
)
}
and len(img_metas)=
{
len
(
img_metas
)
}
'
recovered_bboxes
=
[]
recovered_scores
=
[]
recovered_labels
=
[]
for
bboxes
,
img_info
in
zip
(
aug_results
,
img_metas
):
scale_factor
=
img_info
[
0
][
'pcd_scale_factor'
]
flip
=
img_info
[
0
][
'pcd_flip'
]
recovered_scores
.
append
(
bboxes
[
'scores_3d'
])
recovered_labels
.
append
(
bboxes
[
'labels_3d'
])
bboxes
=
bbox3d_mapping_back
(
bboxes
[
'boxes_3d'
],
scale_factor
,
flip
)
recovered_bboxes
.
append
(
bboxes
)
aug_bboxes
=
recovered_bboxes
[
0
].
cat
(
recovered_bboxes
)
aug_bboxes_for_nms
=
xywhr2xyxyr
(
aug_bboxes
.
bev
)
aug_scores
=
torch
.
cat
(
recovered_scores
,
dim
=
0
)
aug_labels
=
torch
.
cat
(
recovered_labels
,
dim
=
0
)
# TODO: use a more elegent way to deal with nms
if
test_cfg
.
use_rotate_nms
:
nms_func
=
nms_gpu
else
:
nms_func
=
nms_normal_gpu
merged_bboxes
=
[]
merged_scores
=
[]
merged_labels
=
[]
# Apply multi-class nms when merge bboxes
if
len
(
aug_labels
)
==
0
:
return
bbox3d2result
(
aug_bboxes
,
aug_scores
,
aug_labels
)
for
class_id
in
range
(
torch
.
max
(
aug_labels
).
item
()
+
1
):
class_inds
=
(
aug_labels
==
class_id
)
bboxes_i
=
aug_bboxes
[
class_inds
]
bboxes_nms_i
=
aug_bboxes_for_nms
[
class_inds
,
:]
scores_i
=
aug_scores
[
class_inds
]
labels_i
=
aug_labels
[
class_inds
]
if
len
(
bboxes_nms_i
)
==
0
:
continue
selected
=
nms_func
(
bboxes_nms_i
,
scores_i
,
test_cfg
.
nms_thr
)
merged_bboxes
.
append
(
bboxes_i
[
selected
,
:])
merged_scores
.
append
(
scores_i
[
selected
])
merged_labels
.
append
(
labels_i
[
selected
])
merged_bboxes
=
merged_bboxes
[
0
].
cat
(
merged_bboxes
)
merged_scores
=
torch
.
cat
(
merged_scores
,
dim
=
0
)
merged_labels
=
torch
.
cat
(
merged_labels
,
dim
=
0
)
_
,
order
=
merged_scores
.
sort
(
0
,
descending
=
True
)
num
=
min
(
test_cfg
.
max_num
,
len
(
aug_bboxes
))
order
=
order
[:
num
]
merged_bboxes
=
merged_bboxes
[
order
]
merged_scores
=
merged_scores
[
order
]
merged_labels
=
merged_labels
[
order
]
return
bbox3d2result
(
merged_bboxes
,
merged_scores
,
merged_labels
)
Prev
1
2
3
4
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment