Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
885a225b
Commit
885a225b
authored
May 15, 2020
by
wuyuefeng
Committed by
zhangwenwei
May 15, 2020
Browse files
Feature parta2 roi
parent
535344de
Changes
20
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1599 additions
and
93 deletions
+1599
-93
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
.../kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
+139
-37
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
...igs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
+278
-0
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+2
-2
mmdet3d/core/bbox/box_np_ops.py
mmdet3d/core/bbox/box_np_ops.py
+66
-0
mmdet3d/core/bbox/box_torch_ops.py
mmdet3d/core/bbox/box_torch_ops.py
+67
-0
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
+5
-0
mmdet3d/core/bbox/transforms.py
mmdet3d/core/bbox/transforms.py
+22
-0
mmdet3d/core/evaluation/kitti_utils/eval.py
mmdet3d/core/evaluation/kitti_utils/eval.py
+0
-1
mmdet3d/models/anchor_heads/parta2_rpn_head.py
mmdet3d/models/anchor_heads/parta2_rpn_head.py
+8
-12
mmdet3d/models/anchor_heads/second_head.py
mmdet3d/models/anchor_heads/second_head.py
+3
-3
mmdet3d/models/detectors/parta2.py
mmdet3d/models/detectors/parta2.py
+38
-16
mmdet3d/models/roi_heads/__init__.py
mmdet3d/models/roi_heads/__init__.py
+8
-1
mmdet3d/models/roi_heads/base_3droi_head.py
mmdet3d/models/roi_heads/base_3droi_head.py
+80
-0
mmdet3d/models/roi_heads/bbox_heads/__init__.py
mmdet3d/models/roi_heads/bbox_heads/__init__.py
+2
-1
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+593
-0
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
...3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+8
-9
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+227
-0
tests/test_config.py
tests/test_config.py
+45
-1
tests/test_heads.py
tests/test_heads.py
+7
-7
tests/test_semantic_heads.py
tests/test_semantic_heads.py
+1
-3
No files found.
configs/kitti/hv_PartA2_secfpn_4x8_c
osine
_80e_kitti-3d-3class.py
→
configs/kitti/hv_PartA2_secfpn_4x8_c
yclic
_80e_kitti-3d-3class.py
View file @
885a225b
...
...
@@ -10,13 +10,16 @@ model = dict(
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
output_shape
=
[
41
,
1600
,
1408
],
pre_act
=
False
,
),
pre_act
=
False
),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
...
...
@@ -56,8 +59,65 @@ model = dict(
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
))
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
roi_head
=
dict
(
type
=
'PartAggregationROIHead'
,
num_classes
=
3
,
semantic_head
=
dict
(
type
=
'PointwiseSemanticHead'
,
in_channels
=
16
,
extra_width
=
0.2
,
seg_score_thr
=
0.3
,
num_classes
=
3
,
loss_seg
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_part
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
)),
seg_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'max'
)),
part_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'avg'
)),
bbox_head
=
dict
(
type
=
'PartA2BboxHead'
,
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
],
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
...
...
@@ -82,7 +142,7 @@ train_cfg = dict(
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
)
,
ignore_iof_thr
=-
1
)
],
allowed_border
=
0
,
pos_weight
=-
1
,
...
...
@@ -93,24 +153,61 @@ train_cfg = dict(
nms_thr
=
0.8
,
score_thr
=
0
,
use_rotate_nms
=
False
),
)
rcnn
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
)
],
sampler
=
dict
(
type
=
'IoUNegPiecewiseSampler'
,
num
=
128
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
True
),
cls_pos_thr
=
0.75
,
cls_neg_thr
=
0.25
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
max_per_img
=
100
,
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_post
=
100
,
nms_thr
=
0.7
,
score_thr
=
0
))
score_thr
=
0
,
use_rotate_nms
=
True
),
rcnn
=
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
))
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
True
)
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
root_path
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
...
...
@@ -119,28 +216,34 @@ db_sampler = dict(
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
)),
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
),
)
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
,
)),
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
,
))
train_pipeline
=
[
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
0
,
0
,
0
],
loc_noise_std
=
[
1.
0
,
1.
0
,
0
.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.
39269908
,
0.39269908
]),
rot_uniform_noise
=
[
-
0.
78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0.2
,
0.2
,
0.2
]),
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
...
...
@@ -148,7 +251,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
]
data
=
dict
(
...
...
@@ -183,21 +286,19 @@ data = dict(
class_names
=
class_names
,
with_label
=
True
))
# optimizer
lr
=
0.003
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.001
)
lr
=
0.001
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cosine'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
target_lr
=
1e-5
,
as_ratio
=
True
)
momentum_config
=
None
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
...
...
@@ -209,8 +310,9 @@ log_config = dict(
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29502
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/parta2_secfpn_80e'
load_from
=
None
resume_from
=
None
...
...
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
0 → 100644
View file @
885a225b
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
model
=
dict
(
type
=
'PartA2'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
output_shape
=
[
41
,
1600
,
1408
],
pre_act
=
False
),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
rpn_head
=
dict
(
type
=
'PartA2RPNHead'
,
class_name
=
[
'Car'
],
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
assigner_per_size
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
roi_head
=
dict
(
type
=
'PartAggregationROIHead'
,
num_classes
=
1
,
semantic_head
=
dict
(
type
=
'PointwiseSemanticHead'
,
in_channels
=
16
,
extra_width
=
0.2
,
seg_score_thr
=
0.3
,
num_classes
=
1
,
loss_seg
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_part
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
)),
seg_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'max'
)),
part_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'avg'
)),
bbox_head
=
dict
(
type
=
'PartA2BboxHead'
,
num_classes
=
1
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
],
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
9000
,
nms_post
=
512
,
nms_thr
=
0.8
,
score_thr
=
0
,
use_rotate_nms
=
False
),
rcnn
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'IoUNegPiecewiseSampler'
,
num
=
128
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
True
),
cls_pos_thr
=
0.75
,
cls_neg_thr
=
0.25
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
nms_post
=
100
,
nms_thr
=
0.7
,
score_thr
=
0
,
use_rotate_nms
=
True
),
rcnn
=
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
))
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
root_path
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
use_road_plane
=
False
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
sample_groups
=
dict
(
Car
=
15
))
train_pipeline
=
[
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
training
=
True
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
),
val
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
),
test
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'testing'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
))
# optimizer
lr
=
0.001
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/parta2_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
mmdet3d/core/bbox/__init__.py
View file @
885a225b
...
...
@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
from
.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
.transforms
import
boxes3d_to_bev_torch_lidar
from
.transforms
import
bbox3d2roi
,
boxes3d_to_bev_torch_lidar
from
.assign_sampling
import
(
# isort:skip, avoid recursive imports
build_bbox_coder
,
# temporally settings
...
...
@@ -22,5 +22,5 @@ __all__ = [
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
'CameraInstance3DBoxes'
,
'bbox3d2roi'
]
mmdet3d/core/bbox/box_np_ops.py
View file @
885a225b
...
...
@@ -566,3 +566,69 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True):
break
ret
[
i
,
j
]
=
success
return
ret
def
boxes3d_to_corners3d_lidar
(
boxes3d
,
bottom_center
=
True
):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (numpy.array): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
numpy.array: box corners with shape (N, 8, 3)
"""
boxes_num
=
boxes3d
.
shape
[
0
]
w
,
l
,
h
=
boxes3d
[:,
3
],
boxes3d
[:,
4
],
boxes3d
[:,
5
]
x_corners
=
np
.
array
(
[
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
],
dtype
=
np
.
float32
).
T
y_corners
=
np
.
array
(
[
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
],
dtype
=
np
.
float32
).
T
if
bottom_center
:
z_corners
=
np
.
zeros
((
boxes_num
,
8
),
dtype
=
np
.
float32
)
z_corners
[:,
4
:
8
]
=
h
.
reshape
(
boxes_num
,
1
).
repeat
(
4
,
axis
=
1
)
# (N, 8)
else
:
z_corners
=
np
.
array
([
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
],
dtype
=
np
.
float32
).
T
ry
=
boxes3d
[:,
6
]
zeros
,
ones
=
np
.
zeros
(
ry
.
size
,
dtype
=
np
.
float32
),
np
.
ones
(
ry
.
size
,
dtype
=
np
.
float32
)
rot_list
=
np
.
array
([[
np
.
cos
(
ry
),
-
np
.
sin
(
ry
),
zeros
],
[
np
.
sin
(
ry
),
np
.
cos
(
ry
),
zeros
],
[
zeros
,
zeros
,
ones
]])
# (3, 3, N)
R_list
=
np
.
transpose
(
rot_list
,
(
2
,
0
,
1
))
# (N, 3, 3)
temp_corners
=
np
.
concatenate
((
x_corners
.
reshape
(
-
1
,
8
,
1
),
y_corners
.
reshape
(
-
1
,
8
,
1
),
z_corners
.
reshape
(
-
1
,
8
,
1
)),
axis
=
2
)
# (N, 8, 3)
rotated_corners
=
np
.
matmul
(
temp_corners
,
R_list
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
reshape
(
-
1
,
1
)
+
x_corners
.
reshape
(
-
1
,
8
)
y
=
y_loc
.
reshape
(
-
1
,
1
)
+
y_corners
.
reshape
(
-
1
,
8
)
z
=
z_loc
.
reshape
(
-
1
,
1
)
+
z_corners
.
reshape
(
-
1
,
8
)
corners
=
np
.
concatenate
(
(
x
.
reshape
(
-
1
,
8
,
1
),
y
.
reshape
(
-
1
,
8
,
1
),
z
.
reshape
(
-
1
,
8
,
1
)),
axis
=
2
)
return
corners
.
astype
(
np
.
float32
)
mmdet3d/core/bbox/box_torch_ops.py
View file @
885a225b
...
...
@@ -210,3 +210,70 @@ def enlarge_box3d_lidar(boxes3d, extra_width):
large_boxes3d
[:,
3
:
6
]
+=
extra_width
*
2
large_boxes3d
[:,
2
]
-=
extra_width
# bottom center z minus extra_width
return
large_boxes3d
def
boxes3d_to_corners3d_lidar_torch
(
boxes3d
,
bottom_center
=
True
):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (FloatTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
FloatTensor: box corners with shape (N, 8, 3)
"""
boxes_num
=
boxes3d
.
shape
[
0
]
w
,
l
,
h
=
boxes3d
[:,
3
:
4
],
boxes3d
[:,
4
:
5
],
boxes3d
[:,
5
:
6
]
ry
=
boxes3d
[:,
6
:
7
]
zeros
=
boxes3d
.
new_zeros
(
boxes_num
,
1
)
ones
=
boxes3d
.
new_ones
(
boxes_num
,
1
)
x_corners
=
torch
.
cat
(
[
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
],
dim
=
1
)
# (N, 8)
y_corners
=
torch
.
cat
(
[
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
],
dim
=
1
)
# (N, 8)
if
bottom_center
:
z_corners
=
torch
.
cat
([
zeros
,
zeros
,
zeros
,
zeros
,
h
,
h
,
h
,
h
],
dim
=
1
)
# (N, 8)
else
:
z_corners
=
torch
.
cat
([
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
],
dim
=
1
)
# (N, 8)
temp_corners
=
torch
.
cat
(
(
x_corners
.
unsqueeze
(
dim
=
2
),
y_corners
.
unsqueeze
(
dim
=
2
),
z_corners
.
unsqueeze
(
dim
=
2
)),
dim
=
2
)
# (N, 8, 3)
cosa
,
sina
=
torch
.
cos
(
ry
),
torch
.
sin
(
ry
)
raw_1
=
torch
.
cat
([
cosa
,
-
sina
,
zeros
],
dim
=
1
)
# (N, 3)
raw_2
=
torch
.
cat
([
sina
,
cosa
,
zeros
],
dim
=
1
)
# (N, 3)
raw_3
=
torch
.
cat
([
zeros
,
zeros
,
ones
],
dim
=
1
)
# (N, 3)
R
=
torch
.
cat
((
raw_1
.
unsqueeze
(
dim
=
1
),
raw_2
.
unsqueeze
(
dim
=
1
),
raw_3
.
unsqueeze
(
dim
=
1
)),
dim
=
1
)
# (N, 3, 3)
rotated_corners
=
torch
.
matmul
(
temp_corners
,
R
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
view
(
-
1
,
1
)
+
x_corners
.
view
(
-
1
,
8
)
y
=
y_loc
.
view
(
-
1
,
1
)
+
y_corners
.
view
(
-
1
,
8
)
z
=
z_loc
.
view
(
-
1
,
1
)
+
z_corners
.
view
(
-
1
,
8
)
corners
=
torch
.
cat
((
x
.
view
(
-
1
,
8
,
1
),
y
.
view
(
-
1
,
8
,
1
),
z
.
view
(
-
1
,
8
,
1
)),
dim
=
2
)
return
corners
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
View file @
885a225b
...
...
@@ -88,6 +88,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
assert
bboxes1
.
size
(
-
1
)
==
bboxes2
.
size
(
-
1
)
==
7
assert
coordinate
in
[
'camera'
,
'lidar'
]
rows
=
bboxes1
.
size
(
0
)
cols
=
bboxes2
.
size
(
0
)
if
rows
*
cols
==
0
:
return
bboxes1
.
new
(
rows
,
cols
)
if
coordinate
==
'camera'
:
return
boxes_iou3d_gpu_camera
(
bboxes1
,
bboxes2
,
mode
)
elif
coordinate
==
'lidar'
:
...
...
mmdet3d/core/bbox/transforms.py
View file @
885a225b
...
...
@@ -47,3 +47,25 @@ def boxes3d_to_bev_torch_lidar(boxes3d):
boxes_bev
[:,
2
],
boxes_bev
[:,
3
]
=
cu
+
half_w
,
cv
+
half_l
boxes_bev
[:,
4
]
=
boxes3d
[:,
6
]
return
boxes_bev
def
bbox3d2roi
(
bbox_list
):
"""Convert a list of bboxes to roi format.
Args:
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns:
Tensor: shape (n, c), [batch_ind, x, y ...]
"""
rois_list
=
[]
for
img_id
,
bboxes
in
enumerate
(
bbox_list
):
if
bboxes
.
size
(
0
)
>
0
:
img_inds
=
bboxes
.
new_full
((
bboxes
.
size
(
0
),
1
),
img_id
)
rois
=
torch
.
cat
([
img_inds
,
bboxes
],
dim
=-
1
)
else
:
rois
=
torch
.
zeros_like
(
bboxes
)
rois_list
.
append
(
rois
)
rois
=
torch
.
cat
(
rois_list
,
0
)
return
rois
mmdet3d/core/evaluation/kitti_utils/eval.py
View file @
885a225b
...
...
@@ -731,7 +731,6 @@ def kitti_eval(gt_annos,
result
+=
'aos AP:{:.2f}, {:.2f}, {:.2f}
\n
'
.
format
(
*
mAPaos
[:,
0
])
# prepare results for logger
ret_dict
[
'Overall'
]
=
dict
()
for
idx
in
range
(
3
):
postfix
=
f
'
{
difficulty
[
idx
]
}
'
if
mAP3d
is
not
None
:
...
...
mmdet3d/models/anchor_heads/parta2_rpn_head.py
View file @
885a225b
...
...
@@ -231,19 +231,15 @@ class PartA2RPNHead(SECONDHead):
labels
=
labels
[
inds
]
scores
=
scores
[
inds
]
cls_scores
=
cls_scores
[
inds
]
dir_scores
=
dir_scores
[
inds
]
return
dict
(
box3d_lidar
=
bboxes
.
cpu
(),
scores
=
scores
.
cpu
(),
label_preds
=
labels
.
cpu
(),
cls_preds
=
cls_scores
.
cpu
(
)
# raw scores with shape [max_num, cls_num]
box3d_lidar
=
bboxes
,
scores
=
scores
,
label_preds
=
labels
,
cls_preds
=
cls_scores
# raw scores [max_num, cls_num]
)
else
:
return
dict
(
box3d_lidar
=
mlvl_bboxes
.
new_zeros
([
0
,
self
.
box_code_size
]).
cpu
(),
scores
=
mlvl_bboxes
.
new_zeros
([
0
]).
cpu
(),
label_preds
=
mlvl_bboxes
.
new_zeros
([
0
]).
cpu
(),
cls_preds
=
mlvl_bboxes
.
new_zeros
([
0
,
mlvl_cls_score
.
shape
[
-
1
]
]).
cpu
())
box3d_lidar
=
mlvl_bboxes
.
new_zeros
([
0
,
self
.
box_code_size
]),
scores
=
mlvl_bboxes
.
new_zeros
([
0
]),
label_preds
=
mlvl_bboxes
.
new_zeros
([
0
]),
cls_preds
=
mlvl_bboxes
.
new_zeros
([
0
,
mlvl_cls_score
.
shape
[
-
1
]]))
mmdet3d/models/anchor_heads/second_head.py
View file @
885a225b
...
...
@@ -258,9 +258,9 @@ class SECONDHead(nn.Module, AnchorTrainMixin):
dir_weights_list
,
num_total_samples
=
num_total_samples
)
return
dict
(
loss_cls
_3d
=
losses_cls
,
loss_bbox
_3d
=
losses_bbox
,
loss_dir
_3d
=
losses_dir
)
loss_
rpn_
cls
=
losses_cls
,
loss_
rpn_
bbox
=
losses_bbox
,
loss_
rpn_
dir
=
losses_dir
)
def
get_bboxes
(
self
,
cls_scores
,
...
...
mmdet3d/models/detectors/parta2.py
View file @
885a225b
...
...
@@ -34,11 +34,13 @@ class PartA2(TwoStageDetector):
self
.
middle_encoder
=
builder
.
build_middle_encoder
(
middle_encoder
)
def
extract_feat
(
self
,
points
,
img_meta
):
voxels
,
num_points
,
coors
=
self
.
voxelize
(
points
)
voxel_dict
=
dict
(
voxels
=
voxels
,
num_points
=
num_points
,
coors
=
coors
)
voxel_features
=
self
.
voxel_encoder
(
voxels
,
num_points
,
coors
)
batch_size
=
coors
[
-
1
,
0
].
item
()
+
1
feats_dict
=
self
.
middle_encoder
(
voxel_features
,
coors
,
batch_size
)
voxel_dict
=
self
.
voxelize
(
points
)
voxel_features
=
self
.
voxel_encoder
(
voxel_dict
[
'voxels'
],
voxel_dict
[
'num_points'
],
voxel_dict
[
'coors'
])
batch_size
=
voxel_dict
[
'coors'
][
-
1
,
0
].
item
()
+
1
feats_dict
=
self
.
middle_encoder
(
voxel_features
,
voxel_dict
[
'coors'
],
batch_size
)
x
=
self
.
backbone
(
feats_dict
[
'spatial_features'
])
if
self
.
with_neck
:
neck_feats
=
self
.
neck
(
x
)
...
...
@@ -47,20 +49,33 @@ class PartA2(TwoStageDetector):
@
torch
.
no_grad
()
def
voxelize
(
self
,
points
):
voxels
,
coors
,
num_points
=
[],
[],
[]
voxels
,
coors
,
num_points
,
voxel_centers
=
[],
[],
[],
[]
for
res
in
points
:
res_voxels
,
res_coors
,
res_num_points
=
self
.
voxel_layer
(
res
)
res_voxel_centers
=
(
res_coors
[:,
[
2
,
1
,
0
]]
+
0.5
)
*
res_voxels
.
new_tensor
(
self
.
voxel_layer
.
voxel_size
)
+
res_voxels
.
new_tensor
(
self
.
voxel_layer
.
point_cloud_range
[
0
:
3
])
voxels
.
append
(
res_voxels
)
coors
.
append
(
res_coors
)
num_points
.
append
(
res_num_points
)
voxel_centers
.
append
(
res_voxel_centers
)
voxels
=
torch
.
cat
(
voxels
,
dim
=
0
)
num_points
=
torch
.
cat
(
num_points
,
dim
=
0
)
voxel_centers
=
torch
.
cat
(
voxel_centers
,
dim
=
0
)
coors_batch
=
[]
for
i
,
coor
in
enumerate
(
coors
):
coor_pad
=
F
.
pad
(
coor
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
coors_batch
.
append
(
coor_pad
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
return
voxels
,
num_points
,
coors_batch
voxel_dict
=
dict
(
voxels
=
voxels
,
num_points
=
num_points
,
coors
=
coors_batch
,
voxel_centers
=
voxel_centers
)
return
voxel_dict
def
forward_train
(
self
,
points
,
...
...
@@ -69,7 +84,6 @@ class PartA2(TwoStageDetector):
gt_labels_3d
,
gt_bboxes_ignore
=
None
,
proposals
=
None
):
# TODO: complete it
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_meta
)
losses
=
dict
()
...
...
@@ -86,7 +100,13 @@ class PartA2(TwoStageDetector):
proposal_inputs
=
rpn_outs
+
(
img_meta
,
proposal_cfg
)
proposal_list
=
self
.
rpn_head
.
get_bboxes
(
*
proposal_inputs
)
else
:
proposal_list
=
proposals
# noqa: F841
proposal_list
=
proposals
roi_losses
=
self
.
roi_head
.
forward_train
(
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
)
losses
.
update
(
roi_losses
)
return
losses
...
...
@@ -102,16 +122,18 @@ class PartA2(TwoStageDetector):
def
simple_test
(
self
,
points
,
img_meta
,
gt_bboxes_3d
=
None
,
gt_bboxes_3d
,
proposals
=
None
,
rescale
=
False
):
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_meta
)
# TODO: complete it
if
proposals
is
None
:
proposal_list
=
self
.
simple_test_rpn
(
feats_dict
[
'neck_feats'
],
img_meta
,
self
.
test_cfg
.
rpn
)
if
self
.
with_rpn
:
rpn_outs
=
self
.
rpn_head
(
feats_dict
[
'neck_feats'
])
proposal_cfg
=
self
.
test_cfg
.
rpn
bbox_inputs
=
rpn_outs
+
(
img_meta
,
proposal_cfg
)
proposal_list
=
self
.
rpn_head
.
get_bboxes
(
*
bbox_inputs
)
else
:
proposal_list
=
proposals
return
self
.
roi_head
.
simple_test
(
feats_dict
,
proposal_list
,
img_meta
,
rescale
=
rescale
)
return
self
.
roi_head
.
simple_test
(
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
)
mmdet3d/models/roi_heads/__init__.py
View file @
885a225b
from
.base_3droi_head
import
Base3DRoIHead
from
.bbox_heads
import
PartA2BboxHead
from
.mask_heads
import
PointwiseSemanticHead
from
.part_aggregation_roi_head
import
PartAggregationROIHead
from
.roi_extractors
import
Single3DRoIAwareExtractor
__all__
=
[
'PointwiseSemanticHead'
]
__all__
=
[
'Base3DRoIHead'
,
'PartAggregationROIHead'
,
'PointwiseSemanticHead'
,
'Single3DRoIAwareExtractor'
,
'PartA2BboxHead'
]
mmdet3d/models/roi_heads/base_3droi_head.py
0 → 100644
View file @
885a225b
from
abc
import
ABCMeta
,
abstractmethod
import
torch.nn
as
nn
class
Base3DRoIHead
(
nn
.
Module
,
metaclass
=
ABCMeta
):
"""Base class for 3d RoIHeads"""
def
__init__
(
self
,
bbox_head
=
None
,
mask_roi_extractor
=
None
,
mask_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
):
super
(
Base3DRoIHead
,
self
).
__init__
()
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
if
bbox_head
is
not
None
:
self
.
init_bbox_head
(
bbox_head
)
if
mask_head
is
not
None
:
self
.
init_mask_head
(
mask_roi_extractor
,
mask_head
)
self
.
init_assigner_sampler
()
@
property
def
with_bbox
(
self
):
return
hasattr
(
self
,
'bbox_head'
)
and
self
.
bbox_head
is
not
None
@
property
def
with_mask
(
self
):
return
hasattr
(
self
,
'mask_head'
)
and
self
.
mask_head
is
not
None
@
abstractmethod
def
init_weights
(
self
,
pretrained
):
pass
@
abstractmethod
def
init_bbox_head
(
self
):
pass
@
abstractmethod
def
init_mask_head
(
self
):
pass
@
abstractmethod
def
init_assigner_sampler
(
self
):
pass
@
abstractmethod
def
forward_train
(
self
,
x
,
img_meta
,
proposal_list
,
gt_bboxes
,
gt_labels
,
gt_bboxes_ignore
=
None
,
gt_masks
=
None
,
**
kwargs
):
"""Forward function during training"""
pass
def
simple_test
(
self
,
x
,
proposal_list
,
img_meta
,
proposals
=
None
,
rescale
=
False
,
**
kwargs
):
"""Test without augmentation."""
pass
def
aug_test
(
self
,
x
,
proposal_list
,
img_metas
,
rescale
=
False
,
**
kwargs
):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
pass
mmdet3d/models/roi_heads/bbox_heads/__init__.py
View file @
885a225b
...
...
@@ -2,8 +2,9 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
DoubleConvFCBBoxHead
,
Shared2FCBBoxHead
,
Shared4Conv1FCBBoxHead
)
from
.parta2_bbox_head
import
PartA2BboxHead
__all__
=
[
'BBoxHead'
,
'ConvFCBBoxHead'
,
'Shared2FCBBoxHead'
,
'Shared4Conv1FCBBoxHead'
,
'DoubleConvFCBBoxHead'
'Shared4Conv1FCBBoxHead'
,
'DoubleConvFCBBoxHead'
,
'PartA2BboxHead'
]
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
0 → 100644
View file @
885a225b
This diff is collapsed.
Click to expand it.
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
View file @
885a225b
...
...
@@ -126,22 +126,21 @@ class PointwiseSemanticHead(nn.Module):
part_targets
=
torch
.
cat
(
part_targets
,
dim
=
0
)
return
dict
(
seg_targets
=
seg_targets
,
part_targets
=
part_targets
)
def
loss
(
self
,
se
g_preds
,
part_preds
,
seg_targets
,
part
_targets
):
def
loss
(
self
,
se
mantic_results
,
semantic
_targets
):
"""Calculate point-wise segmentation and part prediction losses.
Args:
seg_preds (torch.Tensor): prediction of binary
segmentation with shape [voxel_num, 1].
part_preds (torch.Tensor): prediction of part
with shape [voxel_num, 3].
seg_targets (torch.Tensor): target of segmentation
with shape [voxel_num, 1].
part_targets (torch.Tensor): target of part with
shape [voxel_num, 3].
semantic_results (dict): Results from semantic head.
semantic_targets (dict): Targets of semantic results.
Returns:
dict: loss of segmentation and part prediction.
"""
seg_preds
=
semantic_results
[
'seg_preds'
]
part_preds
=
semantic_results
[
'part_preds'
]
seg_targets
=
semantic_targets
[
'seg_targets'
]
part_targets
=
semantic_targets
[
'part_targets'
]
pos_mask
=
(
seg_targets
>
-
1
)
&
(
seg_targets
<
self
.
num_classes
)
binary_seg_target
=
pos_mask
.
long
()
pos
=
pos_mask
.
float
()
...
...
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
0 → 100644
View file @
885a225b
import
torch.nn.functional
as
F
from
mmdet3d.core
import
AssignResult
from
mmdet3d.core.bbox
import
bbox3d2roi
from
mmdet.core
import
build_assigner
,
build_sampler
from
mmdet.models
import
HEADS
from
..builder
import
build_head
,
build_roi_extractor
from
.base_3droi_head
import
Base3DRoIHead
@
HEADS
.
register_module
class
PartAggregationROIHead
(
Base3DRoIHead
):
"""Part aggregation roi head for PartA2"""
def
__init__
(
self
,
semantic_head
,
num_classes
=
3
,
seg_roi_extractor
=
None
,
part_roi_extractor
=
None
,
bbox_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
):
super
(
PartAggregationROIHead
,
self
).
__init__
(
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
)
self
.
num_classes
=
num_classes
assert
semantic_head
is
not
None
self
.
semantic_head
=
build_head
(
semantic_head
)
if
seg_roi_extractor
is
not
None
:
self
.
seg_roi_extractor
=
build_roi_extractor
(
seg_roi_extractor
)
if
part_roi_extractor
is
not
None
:
self
.
part_roi_extractor
=
build_roi_extractor
(
part_roi_extractor
)
self
.
init_assigner_sampler
()
def
init_weights
(
self
,
pretrained
):
pass
def
init_mask_head
(
self
):
pass
def
init_bbox_head
(
self
,
bbox_head
):
self
.
bbox_head
=
build_head
(
bbox_head
)
def
init_assigner_sampler
(
self
):
self
.
bbox_assigner
=
None
self
.
bbox_sampler
=
None
if
self
.
train_cfg
:
if
isinstance
(
self
.
train_cfg
.
assigner
,
dict
):
self
.
bbox_assigner
=
build_assigner
(
self
.
train_cfg
.
assigner
)
elif
isinstance
(
self
.
train_cfg
.
assigner
,
list
):
self
.
bbox_assigner
=
[
build_assigner
(
res
)
for
res
in
self
.
train_cfg
.
assigner
]
self
.
bbox_sampler
=
build_sampler
(
self
.
train_cfg
.
sampler
)
@
property
def
with_semantic
(
self
):
return
hasattr
(
self
,
'semantic_head'
)
and
self
.
semantic_head
is
not
None
def
forward_train
(
self
,
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Training forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
gt_bboxes_3d (list[FloatTensor]): GT bboxes of each batch.
gt_labels_3d (list[LongTensor]): GT labels of each batch.
Returns:
dict: losses from each head.
"""
losses
=
dict
()
if
self
.
with_semantic
:
semantic_results
=
self
.
_semantic_forward_train
(
feats_dict
[
'seg_features'
],
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
)
losses
.
update
(
semantic_results
[
'loss_semantic'
])
sample_results
=
self
.
_assign_and_sample
(
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
)
if
self
.
with_bbox
:
bbox_results
=
self
.
_bbox_forward_train
(
feats_dict
[
'seg_features'
],
semantic_results
[
'part_feats'
],
voxels_dict
,
sample_results
)
losses
.
update
(
bbox_results
[
'loss_bbox'
])
return
losses
def
simple_test
(
self
,
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
**
kwargs
):
"""Simple testing forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
list[dict]: Bbox results of each batch.
"""
assert
self
.
with_bbox
,
'Bbox head must be implemented.'
assert
self
.
with_semantic
semantic_results
=
self
.
semantic_head
(
feats_dict
[
'seg_features'
])
rois
=
bbox3d2roi
([
res
[
'box3d_lidar'
]
for
res
in
proposal_list
])
label_preds
=
[
res
[
'label_preds'
]
for
res
in
proposal_list
]
cls_preds
=
[
res
[
'cls_preds'
]
for
res
in
proposal_list
]
bbox_results
=
self
.
_bbox_forward
(
feats_dict
[
'seg_features'
],
semantic_results
[
'part_feats'
],
voxels_dict
,
rois
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
rois
,
bbox_results
[
'cls_score'
],
bbox_results
[
'bbox_pred'
],
label_preds
,
cls_preds
,
img_meta
,
cfg
=
self
.
test_cfg
)
return
bbox_list
def
_bbox_forward_train
(
self
,
seg_feats
,
part_feats
,
voxels_dict
,
sampling_results
):
rois
=
bbox3d2roi
([
res
.
bboxes
for
res
in
sampling_results
])
bbox_results
=
self
.
_bbox_forward
(
seg_feats
,
part_feats
,
voxels_dict
,
rois
)
bbox_targets
=
self
.
bbox_head
.
get_targets
(
sampling_results
,
self
.
train_cfg
)
loss_bbox
=
self
.
bbox_head
.
loss
(
bbox_results
[
'cls_score'
],
bbox_results
[
'bbox_pred'
],
rois
,
*
bbox_targets
)
bbox_results
.
update
(
loss_bbox
=
loss_bbox
)
return
bbox_results
def
_bbox_forward
(
self
,
seg_feats
,
part_feats
,
voxels_dict
,
rois
):
pooled_seg_feats
=
self
.
seg_roi_extractor
(
seg_feats
,
voxels_dict
[
'voxel_centers'
],
voxels_dict
[
'coors'
][...,
0
],
rois
)
pooled_part_feats
=
self
.
part_roi_extractor
(
part_feats
,
voxels_dict
[
'voxel_centers'
],
voxels_dict
[
'coors'
][...,
0
],
rois
)
cls_score
,
bbox_pred
=
self
.
bbox_head
(
pooled_seg_feats
,
pooled_part_feats
)
bbox_results
=
dict
(
cls_score
=
cls_score
,
bbox_pred
=
bbox_pred
,
pooled_seg_feats
=
pooled_seg_feats
,
pooled_part_feats
=
pooled_part_feats
)
return
bbox_results
def
_assign_and_sample
(
self
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
):
sampling_results
=
[]
# bbox assign
for
batch_idx
in
range
(
len
(
proposal_list
)):
cur_proposal_list
=
proposal_list
[
batch_idx
]
cur_boxes
=
cur_proposal_list
[
'box3d_lidar'
]
cur_label_preds
=
cur_proposal_list
[
'label_preds'
]
cur_gt_bboxes
=
gt_bboxes_3d
[
batch_idx
]
cur_gt_labels
=
gt_labels_3d
[
batch_idx
]
batch_num_gts
=
0
batch_gt_indis
=
cur_gt_labels
.
new_full
((
cur_boxes
.
shape
[
0
],
),
0
)
# 0 is bg
batch_max_overlaps
=
cur_boxes
.
new_zeros
(
cur_boxes
.
shape
[
0
])
batch_gt_labels
=
cur_gt_labels
.
new_full
((
cur_boxes
.
shape
[
0
],
),
-
1
)
# -1 is bg
if
isinstance
(
self
.
bbox_assigner
,
list
):
# for multi classes
for
i
,
assigner
in
enumerate
(
self
.
bbox_assigner
):
gt_per_cls
=
(
cur_gt_labels
==
i
)
pred_per_cls
=
(
cur_label_preds
==
i
)
cur_assign_res
=
assigner
.
assign
(
cur_boxes
[
pred_per_cls
],
cur_gt_bboxes
[
gt_per_cls
],
gt_labels
=
cur_gt_labels
[
gt_per_cls
])
# gather assign_results in different class into one result
batch_num_gts
+=
cur_assign_res
.
num_gts
# gt inds (1-based)
gt_inds_arange_pad
=
gt_per_cls
.
nonzero
().
view
(
-
1
)
+
1
# pad 0 for indice unassigned
gt_inds_arange_pad
=
F
.
pad
(
gt_inds_arange_pad
,
(
1
,
0
),
mode
=
'constant'
,
value
=
0
)
# pad -1 for indice ignore
gt_inds_arange_pad
=
F
.
pad
(
gt_inds_arange_pad
,
(
1
,
0
),
mode
=
'constant'
,
value
=-
1
)
# convert to 0~gt_num+2 for indices
gt_inds_arange_pad
+=
1
# now 0 is bg, >1 is fg in batch_gt_indis
batch_gt_indis
[
pred_per_cls
]
=
gt_inds_arange_pad
[
cur_assign_res
.
gt_inds
+
1
]
-
1
batch_max_overlaps
[
pred_per_cls
]
=
cur_assign_res
.
max_overlaps
batch_gt_labels
[
pred_per_cls
]
=
cur_assign_res
.
labels
assign_result
=
AssignResult
(
batch_num_gts
,
batch_gt_indis
,
batch_max_overlaps
,
batch_gt_labels
)
else
:
# for single class
assign_result
=
self
.
bbox_assigner
.
assign
(
cur_boxes
,
cur_gt_bboxes
,
gt_labels
=
cur_gt_labels
)
# sample boxes
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
cur_boxes
,
cur_gt_bboxes
,
cur_gt_labels
)
sampling_results
.
append
(
sampling_result
)
return
sampling_results
def
_semantic_forward_train
(
self
,
x
,
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
):
semantic_results
=
self
.
semantic_head
(
x
)
semantic_targets
=
self
.
semantic_head
.
get_targets
(
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
)
loss_semantic
=
self
.
semantic_head
.
loss
(
semantic_results
,
semantic_targets
)
semantic_results
.
update
(
loss_semantic
=
loss_semantic
)
return
semantic_results
tests/test_config.py
View file @
885a225b
...
...
@@ -61,6 +61,9 @@ def test_config_build_detector():
assert
detector
.
roi_head
.
with_mask
==
detector
.
with_mask
head_config
=
config_mod
.
model
[
'roi_head'
]
if
head_config
.
type
==
'PartAggregationROIHead'
:
check_parta2_roi_head
(
head_config
,
detector
.
roi_head
)
else
:
_check_roi_head
(
head_config
,
detector
.
roi_head
)
# else:
# # for single stage detector
...
...
@@ -319,3 +322,44 @@ def _check_bbox_head(bbox_cfg, bbox_head):
out_dim
=
(
4
if
bbox_cfg
.
reg_class_agnostic
else
4
*
bbox_cfg
.
num_classes
)
assert
bbox_head
.
fc_reg
.
out_features
==
out_dim
def
check_parta2_roi_head
(
config
,
head
):
assert
config
[
'type'
]
==
head
.
__class__
.
__name__
# check seg_roi_extractor
seg_roi_cfg
=
config
.
seg_roi_extractor
seg_roi_extractor
=
head
.
seg_roi_extractor
_check_parta2_roi_extractor
(
seg_roi_cfg
,
seg_roi_extractor
)
# check part_roi_extractor
part_roi_cfg
=
config
.
part_roi_extractor
part_roi_extractor
=
head
.
part_roi_extractor
_check_parta2_roi_extractor
(
part_roi_cfg
,
part_roi_extractor
)
# check bbox head infos
bbox_cfg
=
config
.
bbox_head
bbox_head
=
head
.
bbox_head
_check_parta2_bbox_head
(
bbox_cfg
,
bbox_head
)
def
_check_parta2_roi_extractor
(
config
,
roi_extractor
):
assert
config
[
'type'
]
==
roi_extractor
.
__class__
.
__name__
assert
(
config
.
roi_layer
.
out_size
==
roi_extractor
.
roi_layer
.
out_size
)
assert
(
config
.
roi_layer
.
max_pts_per_voxel
==
roi_extractor
.
roi_layer
.
max_pts_per_voxel
)
def
_check_parta2_bbox_head
(
bbox_cfg
,
bbox_head
):
import
torch.nn
as
nn
if
isinstance
(
bbox_cfg
,
list
):
for
single_bbox_cfg
,
single_bbox_head
in
zip
(
bbox_cfg
,
bbox_head
):
_check_bbox_head
(
single_bbox_cfg
,
single_bbox_head
)
elif
isinstance
(
bbox_head
,
nn
.
ModuleList
):
for
single_bbox_head
in
bbox_head
:
_check_bbox_head
(
bbox_cfg
,
single_bbox_head
)
else
:
assert
bbox_cfg
[
'type'
]
==
bbox_head
.
__class__
.
__name__
assert
bbox_cfg
.
seg_in_channels
==
bbox_head
.
seg_conv
[
0
][
0
].
in_channels
assert
bbox_cfg
.
part_in_channels
==
bbox_head
.
part_conv
[
0
][
0
].
in_channels
tests/test_heads.py
View file @
885a225b
...
...
@@ -103,18 +103,18 @@ def test_second_head_loss():
losses
=
self
.
loss
(
cls_score
,
bbox_pred
,
dir_cls_preds
,
gt_bboxes
,
gt_labels
,
input_metas
)
assert
losses
[
'loss_cls
_3d
'
][
0
]
>
0
assert
losses
[
'loss_bbox
_3d
'
][
0
]
>
0
assert
losses
[
'loss_dir
_3d
'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
cls'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
bbox'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
dir'
][
0
]
>
0
# test empty ground truth case
gt_bboxes
=
list
(
torch
.
empty
((
2
,
0
,
7
)).
cuda
())
gt_labels
=
list
(
torch
.
empty
((
2
,
0
)).
cuda
())
empty_gt_losses
=
self
.
loss
(
cls_score
,
bbox_pred
,
dir_cls_preds
,
gt_bboxes
,
gt_labels
,
input_metas
)
assert
empty_gt_losses
[
'loss_cls
_3d
'
][
0
]
>
0
assert
empty_gt_losses
[
'loss_bbox
_3d
'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_dir
_3d
'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_
rpn_
cls'
][
0
]
>
0
assert
empty_gt_losses
[
'loss_
rpn_
bbox'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_
rpn_
dir'
][
0
]
==
0
def
test_second_head_getboxes
():
...
...
@@ -147,7 +147,7 @@ def test_parta2_rpnhead_getboxes():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
(
'test requires GPU and torch+cuda'
)
rpn_head_cfg
,
proposal_cfg
=
_get_rpn_head_cfg
(
'kitti/hv_PartA2_secfpn_4x8_c
osine
_80e_kitti-3d-3class.py'
)
'kitti/hv_PartA2_secfpn_4x8_c
yclic
_80e_kitti-3d-3class.py'
)
from
mmdet3d.models.builder
import
build_head
self
=
build_head
(
rpn_head_cfg
)
...
...
tests/test_semantic_heads.py
View file @
885a225b
...
...
@@ -62,9 +62,7 @@ def test_PointwiseSemanticHead():
[
voxel_features
.
shape
[
0
],
3
])
# test loss
loss_dict
=
self
.
loss
(
feats_dict
[
'seg_preds'
],
feats_dict
[
'part_preds'
],
target_dict
[
'seg_targets'
],
target_dict
[
'part_targets'
])
loss_dict
=
self
.
loss
(
feats_dict
,
target_dict
)
assert
loss_dict
[
'loss_seg'
]
>
0
assert
loss_dict
[
'loss_part'
]
==
0
# no points in gt_boxes
total_loss
=
loss_dict
[
'loss_seg'
]
+
loss_dict
[
'loss_part'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment