Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
e0d892c7
Commit
e0d892c7
authored
May 08, 2020
by
liyinhao
Browse files
Merge branch 'master_temp' into indoor_loading
# Conflicts: # tools/data_converter/sunrgbd_data_utils.py
parents
929ebfe8
f584b970
Changes
111
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
607 additions
and
192 deletions
+607
-192
.gitlab-ci.yml
.gitlab-ci.yml
+2
-2
.pre-commit-config.yaml
.pre-commit-config.yaml
+1
-1
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
...second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
+12
-9
configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...igs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+13
-10
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
.../kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
+13
-10
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
+14
-11
configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
...rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
+14
-9
configs/kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py
.../kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py
+217
-0
configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
...igs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
+13
-10
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
+13
-10
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
+21
-11
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d.py
+29
-38
configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
+11
-8
docs/GETTING_STARTED.md
docs/GETTING_STARTED.md
+3
-3
mmdet3d/apis/__init__.py
mmdet3d/apis/__init__.py
+2
-4
mmdet3d/apis/train.py
mmdet3d/apis/train.py
+93
-0
mmdet3d/core/anchor/__init__.py
mmdet3d/core/anchor/__init__.py
+4
-14
mmdet3d/core/anchor/anchor_3d_generator.py
mmdet3d/core/anchor/anchor_3d_generator.py
+129
-39
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+2
-2
mmdet3d/core/bbox/assign_sampling.py
mmdet3d/core/bbox/assign_sampling.py
+1
-1
No files found.
.gitlab-ci.yml
View file @
e0d892c7
variables
:
PYTORCH_IMAGE
:
registry.sensetime.com/eig-research/pytorch:
pytorch
1.3.1-cuda10.1-devel
PYTORCH_IMAGE
:
registry.sensetime.com/eig-research/pytorch:1.3.1-cuda10.1-
cudnn7-
devel
stages
:
-
linting
...
...
@@ -26,7 +26,7 @@ before_script:
script
:
-
echo "Start building..."
-
pip install "git+https://github.com/cocodataset/cocoapi.git#subdirectory=PythonAPI"
-
pip install git+https://github.com/open-mmlab/mmdetection.git
@v2.0
-
pip install git+https://github.com/open-mmlab/mmdetection.git
-
python -c "import mmdet; print(mmdet.__version__)"
-
pip install -v -e .[all]
-
python -c "import mmdet3d; print(mmdet3d.__version__)"
...
...
.pre-commit-config.yaml
View file @
e0d892c7
...
...
@@ -12,7 +12,7 @@ repos:
hooks
:
-
id
:
isort
-
repo
:
https://github.com/pre-commit/mirrors-yapf
rev
:
v0.
29
.0
rev
:
v0.
30
.0
hooks
:
-
id
:
yapf
-
repo
:
https://github.com/pre-commit/pre-commit-hooks
...
...
configs/kitti/dv_mvx-v2_second_secfpn_fpn-fusion_adamw_2x8_80e_kitti-3d-3class.py
View file @
e0d892c7
...
...
@@ -72,18 +72,21 @@ model = dict(
feat_channels
=
512
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_range
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
strides
=
[
2
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
assigner_per_size
=
True
,
anchor_strides
=
[
2
],
anchor_sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
anchor_rotations
=
[
0
,
1.57
],
diff_rad_by_sin
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'
Residual3D
BoxCoder'
,
),
bbox_coder
=
dict
(
type
=
'
DeltaXYZWLHRB
BoxCoder'
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
configs/kitti/dv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
e0d892c7
...
...
@@ -43,12 +43,15 @@ model = dict(
feat_channels
=
384
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_range
=
[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
],
anchor_strides
=
[
2
],
anchor_sizes
=
[[
1.6
,
3.9
,
1.56
]],
anchor_rotations
=
[
0
,
1.57
],
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
]],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'
Residual3D
BoxCoder'
,
),
bbox_coder
=
dict
(
type
=
'
DeltaXYZWLHRB
BoxCoder'
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
@@ -116,7 +119,7 @@ train_pipeline = [
loc_noise_std
=
[
0.25
,
0.25
,
0.25
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.15707963267
,
0.15707963267
]),
dict
(
type
=
'
Points
RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip
3D
'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
...
...
@@ -125,7 +128,7 @@ train_pipeline = [
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes'
,
'gt_labels'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes
_3d
'
,
'gt_labels
_3d
'
]),
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
...
...
@@ -133,7 +136,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes
_3d
'
]),
]
data
=
dict
(
...
...
@@ -174,13 +177,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
[
10
,
1e-4
]
,
target_ratio
=
(
10
,
1e-4
)
,
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
[
0.85
/
0.95
,
1
]
,
target_ratio
=
(
0.85
/
0.95
,
1
)
,
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
...
...
configs/kitti/dv_second_secfpn_2x8_cosine_80e_kitti-3d-3class.py
View file @
e0d892c7
...
...
@@ -41,18 +41,21 @@ model = dict(
feat_channels
=
512
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_range
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
anchor_strides
=
[
2
],
anchor_sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
anchor_rotations
=
[
0
,
1.57
],
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
],
strides
=
[
2
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
assigner_per_size
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'
Residual3D
BoxCoder'
,
),
bbox_coder
=
dict
(
type
=
'
DeltaXYZWLHRB
BoxCoder'
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
@@ -113,7 +116,7 @@ input_modality = dict(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
Fals
e
,
use_camera
=
Tru
e
,
)
db_sampler
=
dict
(
root_path
=
data_root
,
...
...
configs/kitti/dv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
e0d892c7
...
...
@@ -41,12 +41,15 @@ model = dict(
feat_channels
=
512
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_range
=
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
anchor_strides
=
[
2
],
anchor_sizes
=
[[
1.6
,
3.9
,
1.56
]],
anchor_rotations
=
[
0
,
1.57
],
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'
Residual3D
BoxCoder'
,
),
bbox_coder
=
dict
(
type
=
'
DeltaXYZWLHRB
BoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
@@ -91,7 +94,7 @@ input_modality = dict(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
Fals
e
,
use_camera
=
Tru
e
,
)
db_sampler
=
dict
(
root_path
=
data_root
,
...
...
@@ -113,7 +116,7 @@ train_pipeline = [
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'
Points
RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip
3D
'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
...
...
@@ -122,7 +125,7 @@ train_pipeline = [
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes'
,
'gt_labels'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes
_3d
'
,
'gt_labels
_3d
'
]),
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
...
...
@@ -130,7 +133,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes
_3d
'
]),
]
data
=
dict
(
...
...
@@ -170,13 +173,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
[
10
,
1e-4
]
,
target_ratio
=
(
10
,
1e-4
)
,
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
[
0.85
/
0.95
,
1
]
,
target_ratio
=
(
0.85
/
0.95
,
1
)
,
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
...
...
configs/kitti/faster_rcnn_r50_fpn_caffe_1x_kitti-2d-3class_coco-3x-pretrain.py
View file @
e0d892c7
...
...
@@ -2,8 +2,7 @@
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
pretrained
=
(
'./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'
),
pretrained
=
(
'open-mmlab://resnet50_caffe_bgr'
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
...
...
@@ -22,11 +21,15 @@ model = dict(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_scales
=
[
8
],
anchor_ratios
=
[
1
/
3
,
0.5
,
1.0
,
2.0
,
3.0
],
anchor_strides
=
[
4
,
8
,
16
,
32
,
64
],
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
],
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
...
...
@@ -43,8 +46,10 @@ model = dict(
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
],
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
...
...
configs/kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py
0 → 100644
View file @
e0d892c7
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
model
=
dict
(
type
=
'PartA2'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
output_shape
=
[
41
,
1600
,
1408
],
pre_act
=
False
,
),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
num_filters
=
[
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
num_upsample_filters
=
[
256
,
256
]),
rpn_head
=
dict
(
type
=
'PartA2RPNHead'
,
class_name
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
],
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
0.6
,
70.4
,
40.0
,
-
0.6
],
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
strides
=
[
2
],
sizes
=
[[
0.6
,
0.8
,
1.73
],
[
0.6
,
1.76
,
1.73
],
[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
assigner_per_size
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.5
,
neg_iou_thr
=
0.35
,
min_pos_iou
=
0.35
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
],
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
9000
,
nms_post
=
512
,
nms_thr
=
0.8
,
score_thr
=
0
,
use_rotate_nms
=
False
),
)
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
max_per_img
=
100
,
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.7
,
score_thr
=
0
))
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
True
)
db_sampler
=
dict
(
root_path
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
use_road_plane
=
False
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
)),
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
),
)
train_pipeline
=
[
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
0
,
0
,
0
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.39269908
,
0.39269908
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0.2
,
0.2
,
0.2
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
]),
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
training
=
True
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
),
val
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
),
test
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'testing'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
))
# optimizer
lr
=
0.003
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cosine'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
target_lr
=
1e-5
,
as_ratio
=
True
)
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29502
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/parta2_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/kitti/hv_pointpillars_secfpn_6x8_160e_kitti-3d-car.py
View file @
e0d892c7
...
...
@@ -42,12 +42,15 @@ model = dict(
feat_channels
=
384
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_range
=
[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
],
anchor_strides
=
[
2
],
anchor_sizes
=
[[
1.6
,
3.9
,
1.56
]],
anchor_rotations
=
[
0
,
1.57
],
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
39.68
,
-
1.78
,
69.12
,
39.68
,
-
1.78
]],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'
Residual3D
BoxCoder'
,
),
bbox_coder
=
dict
(
type
=
'
DeltaXYZWLHRB
BoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
@@ -113,7 +116,7 @@ train_pipeline = [
loc_noise_std
=
[
0.25
,
0.25
,
0.25
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.15707963267
,
0.15707963267
]),
dict
(
type
=
'
Points
RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip
3D
'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
...
...
@@ -122,7 +125,7 @@ train_pipeline = [
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes'
,
'gt_labels'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes
_3d
'
,
'gt_labels
_3d
'
]),
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
...
...
@@ -130,7 +133,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes
_3d
'
]),
]
data
=
dict
(
...
...
@@ -175,13 +178,13 @@ optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
# learning policy
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
[
10
,
1e-4
]
,
target_ratio
=
(
10
,
1e-4
)
,
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
[
0.85
/
0.95
,
1
]
,
target_ratio
=
(
0.85
/
0.95
,
1
)
,
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
...
...
configs/kitti/hv_second_secfpn_6x8_80e_kitti-3d-car.py
View file @
e0d892c7
...
...
@@ -41,12 +41,15 @@ model = dict(
feat_channels
=
512
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_range
=
[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
],
anchor_strides
=
[
2
],
anchor_sizes
=
[[
1.6
,
3.9
,
1.56
]],
anchor_rotations
=
[
0
,
1.57
],
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
diff_rad_by_sin
=
True
,
bbox_coder
=
dict
(
type
=
'
Residual3D
BoxCoder'
,
),
bbox_coder
=
dict
(
type
=
'
DeltaXYZWLHRB
BoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
@@ -111,7 +114,7 @@ train_pipeline = [
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'
Points
RandomFlip'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip
3D
'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
...
...
@@ -120,7 +123,7 @@ train_pipeline = [
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes'
,
'gt_labels'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes
_3d
'
,
'gt_labels
_3d
'
]),
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
...
...
@@ -128,7 +131,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes'
]),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes
_3d
'
]),
]
data
=
dict
(
...
...
@@ -168,13 +171,13 @@ optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
[
10
,
1e-4
]
,
target_ratio
=
(
10
,
1e-4
)
,
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
[
0.85
/
0.95
,
1
]
,
target_ratio
=
(
0.85
/
0.95
,
1
)
,
cyclic_times
=
1
,
step_ratio_up
=
0.4
,
)
...
...
configs/nus/faster_rcnn_r50_fpn_caffe_2x8_1x_nus.py
View file @
e0d892c7
...
...
@@ -2,8 +2,7 @@
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'FasterRCNN'
,
pretrained
=
(
'./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'
),
pretrained
=
(
'open-mmlab://resnet50_caffe_bgr'
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
...
...
@@ -22,11 +21,15 @@ model = dict(
type
=
'RPNHead'
,
in_channels
=
256
,
feat_channels
=
256
,
anchor_scales
=
[
8
],
anchor_ratios
=
[
0.5
,
1.0
,
2.0
],
anchor_strides
=
[
4
,
8
,
16
,
32
,
64
],
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
],
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
scales
=
[
8
],
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
4
,
8
,
16
,
32
,
64
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'L1Loss'
,
loss_weight
=
1.0
)),
...
...
@@ -42,9 +45,11 @@ model = dict(
in_channels
=
256
,
fc_out_channels
=
1024
,
roi_feat_size
=
7
,
num_classes
=
80
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
],
num_classes
=
10
,
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[
0.
,
0.
,
0.
,
0.
],
target_stds
=
[
0.1
,
0.1
,
0.2
,
0.2
]),
reg_class_agnostic
=
False
,
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
1.0
),
...
...
@@ -105,12 +110,14 @@ test_cfg = dict(
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings
dataset_type
=
'
NuScenes2D
Dataset'
dataset_type
=
'
Coco
Dataset'
data_root
=
'data/nuscenes/'
# Values to be used for image normalization (BGR order)
# Default mean pixel values are from ImageNet: [103.53, 116.28, 123.675]
# When using pre-trained models in Detectron1 or any MSRA models,
# std has been absorbed into its conv1 weights, so the std needs to be set 1.
classes
=
(
'car'
,
'truck'
,
'trailer'
,
'bus'
,
'construction_vehicle'
,
'bicycle'
,
'motorcycle'
,
'pedestrian'
,
'traffic_cone'
,
'barrier'
)
img_norm_cfg
=
dict
(
mean
=
[
103.530
,
116.280
,
123.675
],
std
=
[
1.0
,
1.0
,
1.0
],
to_rgb
=
False
)
train_pipeline
=
[
...
...
@@ -147,14 +154,17 @@ data = dict(
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_train.coco.json'
,
pipeline
=
train_pipeline
),
val
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
),
test
=
dict
(
type
=
dataset_type
,
classes
=
classes
,
ann_file
=
data_root
+
'nuscenes_infos_val.coco.json'
,
pipeline
=
test_pipeline
))
# optimizer
...
...
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2
0e
_nus-3d.py
→
configs/nus/hv_pointpillars_secfpn_sbn-all_4x8_2
x
_nus-3d.py
View file @
e0d892c7
...
...
@@ -51,33 +51,35 @@ model = dict(
feat_channels
=
384
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_range
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
# car
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
# truck
[
-
49.6
,
-
49.6
,
-
1.68526504
,
49.6
,
49.6
,
-
1.68526504
],
# trailer
[
-
49.6
,
-
49.6
,
-
1.67339111
,
49.6
,
49.6
,
-
1.67339111
],
# bicycle
[
-
49.6
,
-
49.6
,
-
1.61785072
,
49.6
,
49.6
,
-
1.61785072
],
# pedestrian
[
-
49.6
,
-
49.6
,
-
1.80984986
,
49.6
,
49.6
,
-
1.80984986
],
# traffic_cone
[
-
49.6
,
-
49.6
,
-
1.763965
,
49.6
,
49.6
,
-
1.763965
],
# barrier
],
anchor_strides
=
[
2
],
anchor_sizes
=
[
[
1.95017717
,
4.60718145
,
1.72270761
],
# car
[
2.4560939
,
6.73778078
,
2.73004906
],
# truck
[
2.87427237
,
12.01320693
,
3.81509561
],
# trailer
[
0.60058911
,
1.68452161
,
1.27192197
],
# bicycle
[
0.66344886
,
0.7256437
,
1.75748069
],
# pedestrian
[
0.39694519
,
0.40359262
,
1.06232151
],
# traffic_cone
[
2.49008838
,
0.48578221
,
0.98297065
],
# barrier
],
anchor_custom_values
=
[
0
,
0
],
anchor_rotations
=
[
0
,
1.57
],
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[
[
-
49.6
,
-
49.6
,
-
1.80032795
,
49.6
,
49.6
,
-
1.80032795
],
[
-
49.6
,
-
49.6
,
-
1.74440365
,
49.6
,
49.6
,
-
1.74440365
],
[
-
49.6
,
-
49.6
,
-
1.68526504
,
49.6
,
49.6
,
-
1.68526504
],
[
-
49.6
,
-
49.6
,
-
1.67339111
,
49.6
,
49.6
,
-
1.67339111
],
[
-
49.6
,
-
49.6
,
-
1.61785072
,
49.6
,
49.6
,
-
1.61785072
],
[
-
49.6
,
-
49.6
,
-
1.80984986
,
49.6
,
49.6
,
-
1.80984986
],
[
-
49.6
,
-
49.6
,
-
1.763965
,
49.6
,
49.6
,
-
1.763965
],
],
strides
=
[
2
],
sizes
=
[
[
1.95017717
,
4.60718145
,
1.72270761
],
# car
[
2.4560939
,
6.73778078
,
2.73004906
],
# truck
[
2.87427237
,
12.01320693
,
3.81509561
],
# trailer
[
0.60058911
,
1.68452161
,
1.27192197
],
# bicycle
[
0.66344886
,
0.7256437
,
1.75748069
],
# pedestrian
[
0.39694519
,
0.40359262
,
1.06232151
],
# traffic_cone
[
2.49008838
,
0.48578221
,
0.98297065
],
# barrier
],
custom_values
=
[
0
,
0
],
rotations
=
[
0
,
1.57
],
reshape_out
=
True
),
assigner_per_size
=
False
,
diff_rad_by_sin
=
True
,
dir_offset
=
0.7854
,
# pi/4
dir_limit_offset
=
0
,
bbox_coder
=
dict
(
type
=
'
Residual3D
BoxCoder'
,
),
bbox_coder
=
dict
(
type
=
'
DeltaXYZWLHRB
BoxCoder'
,
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
@@ -128,7 +130,7 @@ input_modality = dict(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
Tru
e
,
use_camera
=
Fals
e
,
)
db_sampler
=
dict
(
root_path
=
data_root
,
...
...
@@ -154,23 +156,12 @@ train_pipeline = [
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
]),
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'Resize'
,
img_scale
=
[
(
1280
,
720
),
],
multiscale_mode
=
'value'
,
keep_ratio
=
True
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0
),
dict
(
type
=
'Normalize'
,
**
img_norm_cfg
),
dict
(
type
=
'Pad'
,
size_divisor
=
32
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
...
...
@@ -214,7 +205,7 @@ lr_config = dict(
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
1000
,
step
=
[
16
,
19
])
step
=
[
20
,
23
])
momentum_config
=
None
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
...
...
@@ -227,10 +218,10 @@ log_config = dict(
])
# yapf:enable
# runtime settings
total_epochs
=
2
0
total_epochs
=
2
4
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
work_dir
=
'./work_dirs/
pp_secfpn_80e
'
work_dir
=
'./work_dirs/
hv_pointpillars_secfpn_sbn-all_4x8_2x_nus-3d
'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
configs/nus/retinanet_r50_fpn_caffe_2x8_1x_nus.py
View file @
e0d892c7
...
...
@@ -2,8 +2,7 @@
norm_cfg
=
dict
(
type
=
'BN'
,
requires_grad
=
False
)
model
=
dict
(
type
=
'RetinaNet'
,
pretrained
=
(
'./pretrain_detectron/'
'ImageNetPretrained/MSRA/resnet50_msra.pth'
),
pretrained
=
(
'open-mmlab://resnet50_caffe_bgr'
),
backbone
=
dict
(
type
=
'ResNet'
,
depth
=
50
,
...
...
@@ -26,12 +25,16 @@ model = dict(
in_channels
=
256
,
stacked_convs
=
4
,
feat_channels
=
256
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
anchor_ratios
=
[
0.5
,
1.0
,
2.0
],
anchor_strides
=
[
8
,
16
,
32
,
64
,
128
],
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
],
anchor_generator
=
dict
(
type
=
'AnchorGenerator'
,
octave_base_scale
=
4
,
scales_per_octave
=
3
,
ratios
=
[
0.5
,
1.0
,
2.0
],
strides
=
[
8
,
16
,
32
,
64
,
128
]),
bbox_coder
=
dict
(
type
=
'DeltaXYWHBBoxCoder'
,
target_means
=
[.
0
,
.
0
,
.
0
,
.
0
],
target_stds
=
[
1.0
,
1.0
,
1.0
,
1.0
]),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
...
...
docs/GETTING_STARTED.md
View file @
e0d892c7
...
...
@@ -374,7 +374,7 @@ from .coco import CocoDataset
from .registry import DATASETS
@DATASETS.register_module
@DATASETS.register_module
()
class MyDataset(CocoDataset):
CLASSES = ('a', 'b', 'c', 'd', 'e')
...
...
@@ -444,7 +444,7 @@ from .registry import OPTIMIZERS
from torch.optim import Optimizer
@OPTIMIZERS.register_module
@OPTIMIZERS.register_module
()
class MyOptimizer(Optimizer):
```
...
...
@@ -476,7 +476,7 @@ import torch.nn as nn
from ..registry import BACKBONES
@BACKBONES.register_module
@BACKBONES.register_module
()
class MobileNet(nn.Module):
def __init__(self, arg1, arg2):
...
...
mmdet3d/apis/__init__.py
View file @
e0d892c7
from
.train
import
train_detector
from
.train
import
batch_processor
,
train_detector
__all__
=
[
'train_detector'
,
]
__all__
=
[
'batch_processor'
,
'train_detector'
]
mmdet3d/apis/train.py
View file @
e0d892c7
import
torch
from
mmcv.parallel
import
MMDataParallel
,
MMDistributedDataParallel
from
mmcv.runner
import
DistSamplerSeedHook
,
Runner
from
mmdet3d.utils
import
get_root_logger
from
mmdet.apis.train
import
parse_losses
from
mmdet.core
import
(
DistEvalHook
,
DistOptimizerHook
,
EvalHook
,
Fp16OptimizerHook
,
build_optimizer
)
from
mmdet.datasets
import
build_dataloader
,
build_dataset
def
batch_processor
(
model
,
data
,
train_mode
):
...
...
@@ -27,3 +35,88 @@ def batch_processor(model, data, train_mode):
outputs
=
dict
(
loss
=
loss
,
log_vars
=
log_vars
,
num_samples
=
num_samples
)
return
outputs
def
train_detector
(
model
,
dataset
,
cfg
,
distributed
=
False
,
validate
=
False
,
timestamp
=
None
,
meta
=
None
):
logger
=
get_root_logger
(
cfg
.
log_level
)
# prepare data loaders
dataset
=
dataset
if
isinstance
(
dataset
,
(
list
,
tuple
))
else
[
dataset
]
data_loaders
=
[
build_dataloader
(
ds
,
cfg
.
data
.
samples_per_gpu
,
cfg
.
data
.
workers_per_gpu
,
# cfg.gpus will be ignored if distributed
len
(
cfg
.
gpu_ids
),
dist
=
distributed
,
seed
=
cfg
.
seed
)
for
ds
in
dataset
]
# put model on gpus
if
distributed
:
find_unused_parameters
=
cfg
.
get
(
'find_unused_parameters'
,
False
)
# Sets the `find_unused_parameters` parameter in
# torch.nn.parallel.DistributedDataParallel
model
=
MMDistributedDataParallel
(
model
.
cuda
(),
device_ids
=
[
torch
.
cuda
.
current_device
()],
broadcast_buffers
=
False
,
find_unused_parameters
=
find_unused_parameters
)
else
:
model
=
MMDataParallel
(
model
.
cuda
(
cfg
.
gpu_ids
[
0
]),
device_ids
=
cfg
.
gpu_ids
)
# build runner
optimizer
=
build_optimizer
(
model
,
cfg
.
optimizer
)
runner
=
Runner
(
model
,
batch_processor
,
optimizer
,
cfg
.
work_dir
,
logger
=
logger
,
meta
=
meta
)
# an ugly walkaround to make the .log and .log.json filenames the same
runner
.
timestamp
=
timestamp
# fp16 setting
fp16_cfg
=
cfg
.
get
(
'fp16'
,
None
)
if
fp16_cfg
is
not
None
:
optimizer_config
=
Fp16OptimizerHook
(
**
cfg
.
optimizer_config
,
**
fp16_cfg
,
distributed
=
distributed
)
elif
distributed
and
'type'
not
in
cfg
.
optimizer_config
:
optimizer_config
=
DistOptimizerHook
(
**
cfg
.
optimizer_config
)
else
:
optimizer_config
=
cfg
.
optimizer_config
# register hooks
runner
.
register_training_hooks
(
cfg
.
lr_config
,
optimizer_config
,
cfg
.
checkpoint_config
,
cfg
.
log_config
,
cfg
.
get
(
'momentum_config'
,
None
))
if
distributed
:
runner
.
register_hook
(
DistSamplerSeedHook
())
# register eval hooks
if
validate
:
val_dataset
=
build_dataset
(
cfg
.
data
.
val
,
dict
(
test_mode
=
True
))
val_dataloader
=
build_dataloader
(
val_dataset
,
samples_per_gpu
=
1
,
workers_per_gpu
=
cfg
.
data
.
workers_per_gpu
,
dist
=
distributed
,
shuffle
=
False
)
eval_cfg
=
cfg
.
get
(
'evaluation'
,
{})
eval_hook
=
DistEvalHook
if
distributed
else
EvalHook
runner
.
register_hook
(
eval_hook
(
val_dataloader
,
**
eval_cfg
))
if
cfg
.
resume_from
:
runner
.
resume
(
cfg
.
resume_from
)
elif
cfg
.
load_from
:
runner
.
load_checkpoint
(
cfg
.
load_from
)
runner
.
run
(
data_loaders
,
cfg
.
workflow
,
cfg
.
total_epochs
)
mmdet3d/core/anchor/__init__.py
View file @
e0d892c7
from
.anchor_3d_generator
import
(
AlignedAnchorGeneratorRange
,
AnchorGeneratorRange
)
from
mmdet.core.anchor
import
build_anchor_generator
from
.anchor_3d_generator
import
(
AlignedAnchor3DRangeGenerator
,
Anchor3DRangeGenerator
)
__all__
=
[
'AlignedAnchorGenerator
Range
'
,
'AnchorGenerator
Range
'
,
'AlignedAnchor
3DRange
Generator'
,
'Anchor
3DRange
Generator'
,
'build_anchor_generator'
]
def
build_anchor_generator
(
cfg
,
**
kwargs
):
from
.
import
anchor_3d_generator
import
mmcv
if
isinstance
(
cfg
,
dict
):
return
mmcv
.
runner
.
obj_from_dict
(
cfg
,
anchor_3d_generator
,
default_args
=
kwargs
)
else
:
raise
TypeError
(
'Invalid type {} for building a sampler'
.
format
(
type
(
cfg
)))
mmdet3d/core/anchor/anchor_3d_generator.py
View file @
e0d892c7
import
mmcv
import
torch
from
mmdet.core.anchor
import
ANCHOR_GENERATORS
class
AnchorGeneratorRange
(
object
):
@
ANCHOR_GENERATORS
.
register_module
()
class
Anchor3DRangeGenerator
(
object
):
"""3D Anchor Generator by range
This anchor generator generates anchors by the given range in different
feature levels.
Due the convention in 3D detection, different anchor sizes are related to
different ranges for different categories. However we find this setting
does not effect the performance much in some datasets, e.g., nuScenes.
Args:
ranges (list[list[float]]): Ranges of different anchors.
The ranges are the same across different feature levels. But may
vary for different anchor sizes if size_per_range is True.
sizes (list[list[float]]): 3D sizes of anchors.
strides (list[int]): Strides of anchors in different feature levels.
rotations (list(float)): Rotations of anchors in a feature grid.
custom_values (tuple(float)): Customized values of that anchor. For
example, in nuScenes the anchors have velocities.
reshape_out (bool): Whether to reshape the output into (Nx4)
size_per_range: Whether to use separate ranges for different sizes.
If size_per_range is True, the ranges should have the same length
as the sizes, if not, it will be duplicated.
"""
def
__init__
(
self
,
anchor_
ranges
,
sizes
=
((
1.6
,
3.9
,
1.56
),
)
,
stride
=
2
,
rotations
=
(
0
,
3.1415926
/
2
)
,
ranges
,
sizes
=
[[
1.6
,
3.9
,
1.56
]]
,
stride
s
=
[
2
]
,
rotations
=
[
0
,
1.5707963
]
,
custom_values
=
(),
cache_anchor
=
False
):
reshape_out
=
True
,
size_per_range
=
True
):
assert
mmcv
.
is_list_of
(
ranges
,
list
)
if
size_per_range
:
if
len
(
sizes
)
!=
len
(
ranges
):
assert
len
(
ranges
)
==
1
ranges
=
ranges
*
len
(
sizes
)
assert
len
(
ranges
)
==
len
(
sizes
)
else
:
assert
len
(
ranges
)
==
1
assert
mmcv
.
is_list_of
(
sizes
,
list
)
assert
isinstance
(
strides
,
list
)
self
.
sizes
=
sizes
self
.
stride
=
stride
self
.
anchor_ranges
=
anchor_ranges
if
len
(
anchor_ranges
)
!=
len
(
sizes
):
self
.
anchor_ranges
=
anchor_ranges
*
len
(
sizes
)
self
.
strides
=
strides
self
.
ranges
=
ranges
self
.
rotations
=
rotations
self
.
custom_values
=
custom_values
self
.
cache_anchor
=
cache_anchor
self
.
cached_anchors
=
None
self
.
reshape_out
=
reshape_out
self
.
size_per_range
=
size_per_range
def
__repr__
(
self
):
s
=
self
.
__class__
.
__name__
+
'('
s
+=
'anchor_range={}, '
.
format
(
self
.
anchor_ranges
)
s
+=
'stride={}, '
.
format
(
self
.
stride
)
s
+=
'sizes={}, '
.
format
(
self
.
sizes
)
s
+=
'rotations={})'
.
format
(
self
.
rotations
)
s
+=
f
'anchor_range=
{
self
.
ranges
}
,
\n
'
s
+=
f
'strides=
{
self
.
strides
}
,
\n
'
s
+=
f
'sizes=
{
self
.
sizes
}
,
\n
'
s
+=
f
'rotations=
{
self
.
rotations
}
,
\n
'
s
+=
f
'reshape_out=
{
self
.
reshape_out
}
,
\n
'
s
+=
f
'size_per_range=
{
self
.
size_per_range
}
)'
return
s
@
property
...
...
@@ -34,40 +73,68 @@ class AnchorGeneratorRange(object):
num_size
=
torch
.
tensor
(
self
.
sizes
).
reshape
(
-
1
,
3
).
size
(
0
)
return
num_rot
*
num_size
def
grid_anchors
(
self
,
feature_map_size
,
device
=
'cuda'
):
@
property
def
num_levels
(
self
):
return
len
(
self
.
strides
)
def
grid_anchors
(
self
,
featmap_sizes
,
device
=
'cuda'
):
"""Generate grid anchors in multiple feature levels
Args:
featmap_sizes (list[tuple]): List of feature map sizes in
multiple feature levels.
device (str): Device where the anchors will be put on.
Return:
list[torch.Tensor]: Anchors in multiple feature levels.
The sizes of each tensor should be [N, 4], where
N = width * height * num_base_anchors, width and height
are the sizes of the corresponding feature lavel,
num_base_anchors is the number of anchors for that level.
"""
assert
self
.
num_levels
==
len
(
featmap_sizes
)
multi_level_anchors
=
[]
for
i
in
range
(
self
.
num_levels
):
anchors
=
self
.
single_level_grid_anchors
(
featmap_sizes
[
i
],
self
.
strides
[
i
],
device
=
device
)
if
self
.
reshape_out
:
anchors
=
anchors
.
reshape
(
-
1
,
anchors
.
size
(
-
1
))
multi_level_anchors
.
append
(
anchors
)
return
multi_level_anchors
def
single_level_grid_anchors
(
self
,
featmap_size
,
stride
,
device
=
'cuda'
):
# We reimplement the anchor generator using torch in cuda
# torch: 0.6975 s for 1000 times
# numpy: 4.3345 s for 1000 times
# which is ~5 times faster than numpy implementation
if
(
self
.
cache_anchor
and
self
.
cached_anchors
):
return
self
.
cached_anchors
if
not
isinstance
(
self
.
anchor_ranges
[
0
],
list
):
# which is ~5 times faster than the numpy implementation
if
not
self
.
size_per_range
:
return
self
.
anchors_single_range
(
feature_map_size
,
self
.
anchor_ranges
,
featmap_size
,
self
.
ranges
[
0
],
stride
,
self
.
sizes
,
self
.
rotations
,
device
=
device
)
assert
len
(
self
.
sizes
)
==
len
(
self
.
anchor_ranges
)
mr_anchors
=
[]
for
anchor_range
,
anchor_size
in
zip
(
self
.
anchor_
ranges
,
self
.
sizes
):
for
anchor_range
,
anchor_size
in
zip
(
self
.
ranges
,
self
.
sizes
):
mr_anchors
.
append
(
self
.
anchors_single_range
(
feat
ure_
map_size
,
featmap_size
,
anchor_range
,
stride
,
anchor_size
,
self
.
rotations
,
device
=
device
))
mr_anchors
=
torch
.
cat
(
mr_anchors
,
dim
=-
3
)
if
self
.
cache_anchor
and
not
self
.
cached_anchors
:
self
.
cached_anchors
=
mr_anchors
return
mr_anchors
def
anchors_single_range
(
self
,
feature_size
,
anchor_range
,
sizes
=
((
1.6
,
3.9
,
1.56
),
),
rotations
=
(
0
,
3.1415927
/
2
),
stride
=
1
,
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.5707963
],
device
=
'cuda'
):
"""Generate anchors in a single range
Args:
...
...
@@ -106,7 +173,6 @@ class AnchorGeneratorRange(object):
ret
=
torch
.
cat
(
rets
,
dim
=-
1
).
permute
([
2
,
1
,
0
,
3
,
4
,
5
])
# [1, 200, 176, N, 2, 7] for kitti after permute
# ret = ret.reshape(-1, 7)
if
len
(
self
.
custom_values
)
>
0
:
custom_ndim
=
len
(
self
.
custom_values
)
...
...
@@ -117,17 +183,42 @@ class AnchorGeneratorRange(object):
return
ret
class
AlignedAnchorGeneratorRange
(
AnchorGeneratorRange
):
@
ANCHOR_GENERATORS
.
register_module
()
class
AlignedAnchor3DRangeGenerator
(
Anchor3DRangeGenerator
):
"""Aligned 3D Anchor Generator by range
This anchor generator uses a different manner to generate the positions
of anchors' centers from `Anchor3DRangeGenerator`.
Note:
The `align` means that the anchor's center is aligned with the voxel grid,
which is also the feature grid. The previous implementation of
`Anchor3DRangeGenerator` do not generate the anchors' center according
to the voxel grid. Rather, it generates the center by uniformly
distributing the anchors inside the minimum and maximum anchor ranges
according to the feature map sizes.
However, this makes the anchors center does not match the feature grid.
The AlignedAnchor3DRangeGenerator add + 1 when using the feature map sizes
to obtain the corners of the voxel grid. Then it shift the coordinates to
the center of voxel grid of use the left up corner to distribute anchors.
Args:
anchor_corner (bool): Whether to align with the corner of the voxel
grid. By default it is False and the anchor's center will be
the same as the corresponding voxel's center, which is also the
center of the corresponding greature grid.
"""
def
__init__
(
self
,
shift_center
=
Tru
e
,
**
kwargs
):
super
(
AlignedAnchorGenerator
Range
,
self
).
__init__
(
**
kwargs
)
self
.
shift_center
=
shift_cent
er
def
__init__
(
self
,
align_corner
=
Fals
e
,
**
kwargs
):
super
(
AlignedAnchor
3DRange
Generator
,
self
).
__init__
(
**
kwargs
)
self
.
align_corner
=
align_corn
er
def
anchors_single_range
(
self
,
feature_size
,
anchor_range
,
sizes
=
((
1.6
,
3.9
,
1.56
),
),
rotations
=
(
0
,
3.1415927
/
2
),
stride
,
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.5707963
],
device
=
'cuda'
):
"""Generate anchors in a single range
Args:
...
...
@@ -155,11 +246,11 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
anchor_range
[
3
],
feature_size
[
2
]
+
1
,
device
=
device
)
sizes
=
torch
.
tensor
(
sizes
,
device
=
device
).
reshape
(
-
1
,
3
)
*
self
.
stride
sizes
=
torch
.
tensor
(
sizes
,
device
=
device
).
reshape
(
-
1
,
3
)
*
stride
rotations
=
torch
.
tensor
(
rotations
,
device
=
device
)
# shift the anchor center
if
self
.
shift_cent
er
:
if
not
self
.
align_corn
er
:
z_shift
=
(
z_centers
[
1
]
-
z_centers
[
0
])
/
2
y_shift
=
(
y_centers
[
1
]
-
y_centers
[
0
])
/
2
x_shift
=
(
x_centers
[
1
]
-
x_centers
[
0
])
/
2
...
...
@@ -187,7 +278,6 @@ class AlignedAnchorGeneratorRange(AnchorGeneratorRange):
ret
=
torch
.
cat
(
rets
,
dim
=-
1
).
permute
([
2
,
1
,
0
,
3
,
4
,
5
])
# [1, 200, 176, N, 2, 7] for kitti after permute
# ret = ret.reshape(-1, 7)
if
len
(
self
.
custom_values
)
>
0
:
custom_ndim
=
len
(
self
.
custom_values
)
...
...
mmdet3d/core/bbox/__init__.py
View file @
e0d892c7
from
.
import
box_torch_ops
from
.assigners
import
AssignResult
,
BaseAssigner
,
MaxIoUAssigner
from
.coders
import
Residual3D
BoxCoder
from
.coders
import
DeltaXYZWLHRB
BoxCoder
# from .bbox_target import bbox_target
from
.iou_calculators
import
(
BboxOverlaps3D
,
BboxOverlapsNearest3D
,
bbox_overlaps_3d
,
bbox_overlaps_nearest_3d
)
...
...
@@ -18,7 +18,7 @@ __all__ = [
'PseudoSampler'
,
'RandomSampler'
,
'InstanceBalancedPosSampler'
,
'IoUBalancedNegSampler'
,
'CombinedSampler'
,
'SamplingResult'
,
'build_assigner'
,
'build_sampler'
,
'assign_and_sample'
,
'box_torch_ops'
,
'build_bbox_coder'
,
'
Residual3D
BoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'build_bbox_coder'
,
'
DeltaXYZWLHRB
BoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'bbox_overlaps_3d'
]
mmdet3d/core/bbox/assign_sampling.py
View file @
e0d892c7
...
...
@@ -14,7 +14,7 @@ def build_assigner(cfg, **kwargs):
def
build_bbox_coder
(
cfg
,
**
kwargs
):
if
isinstance
(
cfg
,
coders
.
Residual3D
BoxCoder
):
if
isinstance
(
cfg
,
coders
.
DeltaXYZWLHRB
BoxCoder
):
return
cfg
elif
isinstance
(
cfg
,
dict
):
return
mmcv
.
runner
.
obj_from_dict
(
cfg
,
coders
,
default_args
=
kwargs
)
...
...
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment