Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
54595292
Commit
54595292
authored
May 15, 2020
by
zhangwenwei
Browse files
Merge branch 'feature_parta2_roi' into 'master'
Feature parta2 roi See merge request open-mmlab/mmdet.3d!31
parents
535344de
885a225b
Changes
20
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1599 additions
and
93 deletions
+1599
-93
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
.../kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
+139
-37
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
...igs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
+278
-0
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+2
-2
mmdet3d/core/bbox/box_np_ops.py
mmdet3d/core/bbox/box_np_ops.py
+66
-0
mmdet3d/core/bbox/box_torch_ops.py
mmdet3d/core/bbox/box_torch_ops.py
+67
-0
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
+5
-0
mmdet3d/core/bbox/transforms.py
mmdet3d/core/bbox/transforms.py
+22
-0
mmdet3d/core/evaluation/kitti_utils/eval.py
mmdet3d/core/evaluation/kitti_utils/eval.py
+0
-1
mmdet3d/models/anchor_heads/parta2_rpn_head.py
mmdet3d/models/anchor_heads/parta2_rpn_head.py
+8
-12
mmdet3d/models/anchor_heads/second_head.py
mmdet3d/models/anchor_heads/second_head.py
+3
-3
mmdet3d/models/detectors/parta2.py
mmdet3d/models/detectors/parta2.py
+38
-16
mmdet3d/models/roi_heads/__init__.py
mmdet3d/models/roi_heads/__init__.py
+8
-1
mmdet3d/models/roi_heads/base_3droi_head.py
mmdet3d/models/roi_heads/base_3droi_head.py
+80
-0
mmdet3d/models/roi_heads/bbox_heads/__init__.py
mmdet3d/models/roi_heads/bbox_heads/__init__.py
+2
-1
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+593
-0
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
...3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+8
-9
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+227
-0
tests/test_config.py
tests/test_config.py
+45
-1
tests/test_heads.py
tests/test_heads.py
+7
-7
tests/test_semantic_heads.py
tests/test_semantic_heads.py
+1
-3
No files found.
configs/kitti/hv_PartA2_secfpn_4x8_c
osine
_80e_kitti-3d-3class.py
→
configs/kitti/hv_PartA2_secfpn_4x8_c
yclic
_80e_kitti-3d-3class.py
View file @
54595292
...
...
@@ -10,13 +10,16 @@ model = dict(
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
output_shape
=
[
41
,
1600
,
1408
],
pre_act
=
False
,
),
pre_act
=
False
),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
...
...
@@ -56,8 +59,65 @@ model = dict(
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
))
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
roi_head
=
dict
(
type
=
'PartAggregationROIHead'
,
num_classes
=
3
,
semantic_head
=
dict
(
type
=
'PointwiseSemanticHead'
,
in_channels
=
16
,
extra_width
=
0.2
,
seg_score_thr
=
0.3
,
num_classes
=
3
,
loss_seg
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_part
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
)),
seg_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'max'
)),
part_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'avg'
)),
bbox_head
=
dict
(
type
=
'PartA2BboxHead'
,
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
],
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
...
...
@@ -82,7 +142,7 @@ train_cfg = dict(
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
)
,
ignore_iof_thr
=-
1
)
],
allowed_border
=
0
,
pos_weight
=-
1
,
...
...
@@ -93,24 +153,61 @@ train_cfg = dict(
nms_thr
=
0.8
,
score_thr
=
0
,
use_rotate_nms
=
False
),
)
rcnn
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
)
],
sampler
=
dict
(
type
=
'IoUNegPiecewiseSampler'
,
num
=
128
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
True
),
cls_pos_thr
=
0.75
,
cls_neg_thr
=
0.25
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
max_per_img
=
100
,
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_post
=
100
,
nms_thr
=
0.7
,
score_thr
=
0
))
score_thr
=
0
,
use_rotate_nms
=
True
),
rcnn
=
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
))
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
True
)
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
root_path
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
...
...
@@ -119,28 +216,34 @@ db_sampler = dict(
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
)),
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
),
)
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
,
)),
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
,
))
train_pipeline
=
[
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
0
,
0
,
0
],
loc_noise_std
=
[
1.
0
,
1.
0
,
0
.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.
39269908
,
0.39269908
]),
rot_uniform_noise
=
[
-
0.
78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
trans_normal_noise
=
[
0.2
,
0.2
,
0.2
]),
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
...
...
@@ -148,7 +251,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
]
data
=
dict
(
...
...
@@ -183,21 +286,19 @@ data = dict(
class_names
=
class_names
,
with_label
=
True
))
# optimizer
lr
=
0.003
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.001
)
lr
=
0.001
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cosine'
,
warmup
=
'linear'
,
warmup_iters
=
1000
,
warmup_ratio
=
1.0
/
10
,
target_lr
=
1e-5
,
as_ratio
=
True
)
momentum_config
=
None
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
...
...
@@ -209,8 +310,9 @@ log_config = dict(
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29502
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/parta2_secfpn_80e'
load_from
=
None
resume_from
=
None
...
...
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
0 → 100644
View file @
54595292
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
model
=
dict
(
type
=
'PartA2'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
output_shape
=
[
41
,
1600
,
1408
],
pre_act
=
False
),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
rpn_head
=
dict
(
type
=
'PartA2RPNHead'
,
class_name
=
[
'Car'
],
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
assigner_per_size
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
roi_head
=
dict
(
type
=
'PartAggregationROIHead'
,
num_classes
=
1
,
semantic_head
=
dict
(
type
=
'PointwiseSemanticHead'
,
in_channels
=
16
,
extra_width
=
0.2
,
seg_score_thr
=
0.3
,
num_classes
=
1
,
loss_seg
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_part
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
)),
seg_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'max'
)),
part_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'avg'
)),
bbox_head
=
dict
(
type
=
'PartA2BboxHead'
,
num_classes
=
1
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
],
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
9000
,
nms_post
=
512
,
nms_thr
=
0.8
,
score_thr
=
0
,
use_rotate_nms
=
False
),
rcnn
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'IoUNegPiecewiseSampler'
,
num
=
128
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
True
),
cls_pos_thr
=
0.75
,
cls_neg_thr
=
0.25
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
nms_post
=
100
,
nms_thr
=
0.7
,
score_thr
=
0
,
use_rotate_nms
=
True
),
rcnn
=
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
))
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
root_path
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
use_road_plane
=
False
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
sample_groups
=
dict
(
Car
=
15
))
train_pipeline
=
[
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
training
=
True
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
),
val
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
),
test
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'testing'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
))
# optimizer
lr
=
0.001
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/parta2_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
mmdet3d/core/bbox/__init__.py
View file @
54595292
...
...
@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
from
.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
.transforms
import
boxes3d_to_bev_torch_lidar
from
.transforms
import
bbox3d2roi
,
boxes3d_to_bev_torch_lidar
from
.assign_sampling
import
(
# isort:skip, avoid recursive imports
build_bbox_coder
,
# temporally settings
...
...
@@ -22,5 +22,5 @@ __all__ = [
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
'CameraInstance3DBoxes'
,
'bbox3d2roi'
]
mmdet3d/core/bbox/box_np_ops.py
View file @
54595292
...
...
@@ -566,3 +566,69 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True):
break
ret
[
i
,
j
]
=
success
return
ret
def
boxes3d_to_corners3d_lidar
(
boxes3d
,
bottom_center
=
True
):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (numpy.array): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
numpy.array: box corners with shape (N, 8, 3)
"""
boxes_num
=
boxes3d
.
shape
[
0
]
w
,
l
,
h
=
boxes3d
[:,
3
],
boxes3d
[:,
4
],
boxes3d
[:,
5
]
x_corners
=
np
.
array
(
[
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
],
dtype
=
np
.
float32
).
T
y_corners
=
np
.
array
(
[
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
],
dtype
=
np
.
float32
).
T
if
bottom_center
:
z_corners
=
np
.
zeros
((
boxes_num
,
8
),
dtype
=
np
.
float32
)
z_corners
[:,
4
:
8
]
=
h
.
reshape
(
boxes_num
,
1
).
repeat
(
4
,
axis
=
1
)
# (N, 8)
else
:
z_corners
=
np
.
array
([
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
],
dtype
=
np
.
float32
).
T
ry
=
boxes3d
[:,
6
]
zeros
,
ones
=
np
.
zeros
(
ry
.
size
,
dtype
=
np
.
float32
),
np
.
ones
(
ry
.
size
,
dtype
=
np
.
float32
)
rot_list
=
np
.
array
([[
np
.
cos
(
ry
),
-
np
.
sin
(
ry
),
zeros
],
[
np
.
sin
(
ry
),
np
.
cos
(
ry
),
zeros
],
[
zeros
,
zeros
,
ones
]])
# (3, 3, N)
R_list
=
np
.
transpose
(
rot_list
,
(
2
,
0
,
1
))
# (N, 3, 3)
temp_corners
=
np
.
concatenate
((
x_corners
.
reshape
(
-
1
,
8
,
1
),
y_corners
.
reshape
(
-
1
,
8
,
1
),
z_corners
.
reshape
(
-
1
,
8
,
1
)),
axis
=
2
)
# (N, 8, 3)
rotated_corners
=
np
.
matmul
(
temp_corners
,
R_list
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
reshape
(
-
1
,
1
)
+
x_corners
.
reshape
(
-
1
,
8
)
y
=
y_loc
.
reshape
(
-
1
,
1
)
+
y_corners
.
reshape
(
-
1
,
8
)
z
=
z_loc
.
reshape
(
-
1
,
1
)
+
z_corners
.
reshape
(
-
1
,
8
)
corners
=
np
.
concatenate
(
(
x
.
reshape
(
-
1
,
8
,
1
),
y
.
reshape
(
-
1
,
8
,
1
),
z
.
reshape
(
-
1
,
8
,
1
)),
axis
=
2
)
return
corners
.
astype
(
np
.
float32
)
mmdet3d/core/bbox/box_torch_ops.py
View file @
54595292
...
...
@@ -210,3 +210,70 @@ def enlarge_box3d_lidar(boxes3d, extra_width):
large_boxes3d
[:,
3
:
6
]
+=
extra_width
*
2
large_boxes3d
[:,
2
]
-=
extra_width
# bottom center z minus extra_width
return
large_boxes3d
def
boxes3d_to_corners3d_lidar_torch
(
boxes3d
,
bottom_center
=
True
):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (FloatTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
FloatTensor: box corners with shape (N, 8, 3)
"""
boxes_num
=
boxes3d
.
shape
[
0
]
w
,
l
,
h
=
boxes3d
[:,
3
:
4
],
boxes3d
[:,
4
:
5
],
boxes3d
[:,
5
:
6
]
ry
=
boxes3d
[:,
6
:
7
]
zeros
=
boxes3d
.
new_zeros
(
boxes_num
,
1
)
ones
=
boxes3d
.
new_ones
(
boxes_num
,
1
)
x_corners
=
torch
.
cat
(
[
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
],
dim
=
1
)
# (N, 8)
y_corners
=
torch
.
cat
(
[
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
],
dim
=
1
)
# (N, 8)
if
bottom_center
:
z_corners
=
torch
.
cat
([
zeros
,
zeros
,
zeros
,
zeros
,
h
,
h
,
h
,
h
],
dim
=
1
)
# (N, 8)
else
:
z_corners
=
torch
.
cat
([
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
],
dim
=
1
)
# (N, 8)
temp_corners
=
torch
.
cat
(
(
x_corners
.
unsqueeze
(
dim
=
2
),
y_corners
.
unsqueeze
(
dim
=
2
),
z_corners
.
unsqueeze
(
dim
=
2
)),
dim
=
2
)
# (N, 8, 3)
cosa
,
sina
=
torch
.
cos
(
ry
),
torch
.
sin
(
ry
)
raw_1
=
torch
.
cat
([
cosa
,
-
sina
,
zeros
],
dim
=
1
)
# (N, 3)
raw_2
=
torch
.
cat
([
sina
,
cosa
,
zeros
],
dim
=
1
)
# (N, 3)
raw_3
=
torch
.
cat
([
zeros
,
zeros
,
ones
],
dim
=
1
)
# (N, 3)
R
=
torch
.
cat
((
raw_1
.
unsqueeze
(
dim
=
1
),
raw_2
.
unsqueeze
(
dim
=
1
),
raw_3
.
unsqueeze
(
dim
=
1
)),
dim
=
1
)
# (N, 3, 3)
rotated_corners
=
torch
.
matmul
(
temp_corners
,
R
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
view
(
-
1
,
1
)
+
x_corners
.
view
(
-
1
,
8
)
y
=
y_loc
.
view
(
-
1
,
1
)
+
y_corners
.
view
(
-
1
,
8
)
z
=
z_loc
.
view
(
-
1
,
1
)
+
z_corners
.
view
(
-
1
,
8
)
corners
=
torch
.
cat
((
x
.
view
(
-
1
,
8
,
1
),
y
.
view
(
-
1
,
8
,
1
),
z
.
view
(
-
1
,
8
,
1
)),
dim
=
2
)
return
corners
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
View file @
54595292
...
...
@@ -88,6 +88,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
assert
bboxes1
.
size
(
-
1
)
==
bboxes2
.
size
(
-
1
)
==
7
assert
coordinate
in
[
'camera'
,
'lidar'
]
rows
=
bboxes1
.
size
(
0
)
cols
=
bboxes2
.
size
(
0
)
if
rows
*
cols
==
0
:
return
bboxes1
.
new
(
rows
,
cols
)
if
coordinate
==
'camera'
:
return
boxes_iou3d_gpu_camera
(
bboxes1
,
bboxes2
,
mode
)
elif
coordinate
==
'lidar'
:
...
...
mmdet3d/core/bbox/transforms.py
View file @
54595292
...
...
@@ -47,3 +47,25 @@ def boxes3d_to_bev_torch_lidar(boxes3d):
boxes_bev
[:,
2
],
boxes_bev
[:,
3
]
=
cu
+
half_w
,
cv
+
half_l
boxes_bev
[:,
4
]
=
boxes3d
[:,
6
]
return
boxes_bev
def
bbox3d2roi
(
bbox_list
):
"""Convert a list of bboxes to roi format.
Args:
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns:
Tensor: shape (n, c), [batch_ind, x, y ...]
"""
rois_list
=
[]
for
img_id
,
bboxes
in
enumerate
(
bbox_list
):
if
bboxes
.
size
(
0
)
>
0
:
img_inds
=
bboxes
.
new_full
((
bboxes
.
size
(
0
),
1
),
img_id
)
rois
=
torch
.
cat
([
img_inds
,
bboxes
],
dim
=-
1
)
else
:
rois
=
torch
.
zeros_like
(
bboxes
)
rois_list
.
append
(
rois
)
rois
=
torch
.
cat
(
rois_list
,
0
)
return
rois
mmdet3d/core/evaluation/kitti_utils/eval.py
View file @
54595292
...
...
@@ -731,7 +731,6 @@ def kitti_eval(gt_annos,
result
+=
'aos AP:{:.2f}, {:.2f}, {:.2f}
\n
'
.
format
(
*
mAPaos
[:,
0
])
# prepare results for logger
ret_dict
[
'Overall'
]
=
dict
()
for
idx
in
range
(
3
):
postfix
=
f
'
{
difficulty
[
idx
]
}
'
if
mAP3d
is
not
None
:
...
...
mmdet3d/models/anchor_heads/parta2_rpn_head.py
View file @
54595292
...
...
@@ -231,19 +231,15 @@ class PartA2RPNHead(SECONDHead):
labels
=
labels
[
inds
]
scores
=
scores
[
inds
]
cls_scores
=
cls_scores
[
inds
]
dir_scores
=
dir_scores
[
inds
]
return
dict
(
box3d_lidar
=
bboxes
.
cpu
(),
scores
=
scores
.
cpu
(),
label_preds
=
labels
.
cpu
(),
cls_preds
=
cls_scores
.
cpu
(
)
# raw scores with shape [max_num, cls_num]
box3d_lidar
=
bboxes
,
scores
=
scores
,
label_preds
=
labels
,
cls_preds
=
cls_scores
# raw scores [max_num, cls_num]
)
else
:
return
dict
(
box3d_lidar
=
mlvl_bboxes
.
new_zeros
([
0
,
self
.
box_code_size
]).
cpu
(),
scores
=
mlvl_bboxes
.
new_zeros
([
0
]).
cpu
(),
label_preds
=
mlvl_bboxes
.
new_zeros
([
0
]).
cpu
(),
cls_preds
=
mlvl_bboxes
.
new_zeros
([
0
,
mlvl_cls_score
.
shape
[
-
1
]
]).
cpu
())
box3d_lidar
=
mlvl_bboxes
.
new_zeros
([
0
,
self
.
box_code_size
]),
scores
=
mlvl_bboxes
.
new_zeros
([
0
]),
label_preds
=
mlvl_bboxes
.
new_zeros
([
0
]),
cls_preds
=
mlvl_bboxes
.
new_zeros
([
0
,
mlvl_cls_score
.
shape
[
-
1
]]))
mmdet3d/models/anchor_heads/second_head.py
View file @
54595292
...
...
@@ -258,9 +258,9 @@ class SECONDHead(nn.Module, AnchorTrainMixin):
dir_weights_list
,
num_total_samples
=
num_total_samples
)
return
dict
(
loss_cls
_3d
=
losses_cls
,
loss_bbox
_3d
=
losses_bbox
,
loss_dir
_3d
=
losses_dir
)
loss_
rpn_
cls
=
losses_cls
,
loss_
rpn_
bbox
=
losses_bbox
,
loss_
rpn_
dir
=
losses_dir
)
def
get_bboxes
(
self
,
cls_scores
,
...
...
mmdet3d/models/detectors/parta2.py
View file @
54595292
...
...
@@ -34,11 +34,13 @@ class PartA2(TwoStageDetector):
self
.
middle_encoder
=
builder
.
build_middle_encoder
(
middle_encoder
)
def
extract_feat
(
self
,
points
,
img_meta
):
voxels
,
num_points
,
coors
=
self
.
voxelize
(
points
)
voxel_dict
=
dict
(
voxels
=
voxels
,
num_points
=
num_points
,
coors
=
coors
)
voxel_features
=
self
.
voxel_encoder
(
voxels
,
num_points
,
coors
)
batch_size
=
coors
[
-
1
,
0
].
item
()
+
1
feats_dict
=
self
.
middle_encoder
(
voxel_features
,
coors
,
batch_size
)
voxel_dict
=
self
.
voxelize
(
points
)
voxel_features
=
self
.
voxel_encoder
(
voxel_dict
[
'voxels'
],
voxel_dict
[
'num_points'
],
voxel_dict
[
'coors'
])
batch_size
=
voxel_dict
[
'coors'
][
-
1
,
0
].
item
()
+
1
feats_dict
=
self
.
middle_encoder
(
voxel_features
,
voxel_dict
[
'coors'
],
batch_size
)
x
=
self
.
backbone
(
feats_dict
[
'spatial_features'
])
if
self
.
with_neck
:
neck_feats
=
self
.
neck
(
x
)
...
...
@@ -47,20 +49,33 @@ class PartA2(TwoStageDetector):
@
torch
.
no_grad
()
def
voxelize
(
self
,
points
):
voxels
,
coors
,
num_points
=
[],
[],
[]
voxels
,
coors
,
num_points
,
voxel_centers
=
[],
[],
[],
[]
for
res
in
points
:
res_voxels
,
res_coors
,
res_num_points
=
self
.
voxel_layer
(
res
)
res_voxel_centers
=
(
res_coors
[:,
[
2
,
1
,
0
]]
+
0.5
)
*
res_voxels
.
new_tensor
(
self
.
voxel_layer
.
voxel_size
)
+
res_voxels
.
new_tensor
(
self
.
voxel_layer
.
point_cloud_range
[
0
:
3
])
voxels
.
append
(
res_voxels
)
coors
.
append
(
res_coors
)
num_points
.
append
(
res_num_points
)
voxel_centers
.
append
(
res_voxel_centers
)
voxels
=
torch
.
cat
(
voxels
,
dim
=
0
)
num_points
=
torch
.
cat
(
num_points
,
dim
=
0
)
voxel_centers
=
torch
.
cat
(
voxel_centers
,
dim
=
0
)
coors_batch
=
[]
for
i
,
coor
in
enumerate
(
coors
):
coor_pad
=
F
.
pad
(
coor
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
coors_batch
.
append
(
coor_pad
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
return
voxels
,
num_points
,
coors_batch
voxel_dict
=
dict
(
voxels
=
voxels
,
num_points
=
num_points
,
coors
=
coors_batch
,
voxel_centers
=
voxel_centers
)
return
voxel_dict
def
forward_train
(
self
,
points
,
...
...
@@ -69,7 +84,6 @@ class PartA2(TwoStageDetector):
gt_labels_3d
,
gt_bboxes_ignore
=
None
,
proposals
=
None
):
# TODO: complete it
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_meta
)
losses
=
dict
()
...
...
@@ -86,7 +100,13 @@ class PartA2(TwoStageDetector):
proposal_inputs
=
rpn_outs
+
(
img_meta
,
proposal_cfg
)
proposal_list
=
self
.
rpn_head
.
get_bboxes
(
*
proposal_inputs
)
else
:
proposal_list
=
proposals
# noqa: F841
proposal_list
=
proposals
roi_losses
=
self
.
roi_head
.
forward_train
(
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
)
losses
.
update
(
roi_losses
)
return
losses
...
...
@@ -102,16 +122,18 @@ class PartA2(TwoStageDetector):
def
simple_test
(
self
,
points
,
img_meta
,
gt_bboxes_3d
=
None
,
gt_bboxes_3d
,
proposals
=
None
,
rescale
=
False
):
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_meta
)
# TODO: complete it
if
proposals
is
None
:
proposal_list
=
self
.
simple_test_rpn
(
feats_dict
[
'neck_feats'
],
img_meta
,
self
.
test_cfg
.
rpn
)
if
self
.
with_rpn
:
rpn_outs
=
self
.
rpn_head
(
feats_dict
[
'neck_feats'
])
proposal_cfg
=
self
.
test_cfg
.
rpn
bbox_inputs
=
rpn_outs
+
(
img_meta
,
proposal_cfg
)
proposal_list
=
self
.
rpn_head
.
get_bboxes
(
*
bbox_inputs
)
else
:
proposal_list
=
proposals
return
self
.
roi_head
.
simple_test
(
feats_dict
,
proposal_list
,
img_meta
,
rescale
=
rescale
)
return
self
.
roi_head
.
simple_test
(
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
)
mmdet3d/models/roi_heads/__init__.py
View file @
54595292
from
.base_3droi_head
import
Base3DRoIHead
from
.bbox_heads
import
PartA2BboxHead
from
.mask_heads
import
PointwiseSemanticHead
from
.part_aggregation_roi_head
import
PartAggregationROIHead
from
.roi_extractors
import
Single3DRoIAwareExtractor
__all__
=
[
'PointwiseSemanticHead'
]
__all__
=
[
'Base3DRoIHead'
,
'PartAggregationROIHead'
,
'PointwiseSemanticHead'
,
'Single3DRoIAwareExtractor'
,
'PartA2BboxHead'
]
mmdet3d/models/roi_heads/base_3droi_head.py
0 → 100644
View file @
54595292
from
abc
import
ABCMeta
,
abstractmethod
import
torch.nn
as
nn
class
Base3DRoIHead
(
nn
.
Module
,
metaclass
=
ABCMeta
):
"""Base class for 3d RoIHeads"""
def
__init__
(
self
,
bbox_head
=
None
,
mask_roi_extractor
=
None
,
mask_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
):
super
(
Base3DRoIHead
,
self
).
__init__
()
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
if
bbox_head
is
not
None
:
self
.
init_bbox_head
(
bbox_head
)
if
mask_head
is
not
None
:
self
.
init_mask_head
(
mask_roi_extractor
,
mask_head
)
self
.
init_assigner_sampler
()
@
property
def
with_bbox
(
self
):
return
hasattr
(
self
,
'bbox_head'
)
and
self
.
bbox_head
is
not
None
@
property
def
with_mask
(
self
):
return
hasattr
(
self
,
'mask_head'
)
and
self
.
mask_head
is
not
None
@
abstractmethod
def
init_weights
(
self
,
pretrained
):
pass
@
abstractmethod
def
init_bbox_head
(
self
):
pass
@
abstractmethod
def
init_mask_head
(
self
):
pass
@
abstractmethod
def
init_assigner_sampler
(
self
):
pass
@
abstractmethod
def
forward_train
(
self
,
x
,
img_meta
,
proposal_list
,
gt_bboxes
,
gt_labels
,
gt_bboxes_ignore
=
None
,
gt_masks
=
None
,
**
kwargs
):
"""Forward function during training"""
pass
def
simple_test
(
self
,
x
,
proposal_list
,
img_meta
,
proposals
=
None
,
rescale
=
False
,
**
kwargs
):
"""Test without augmentation."""
pass
def
aug_test
(
self
,
x
,
proposal_list
,
img_metas
,
rescale
=
False
,
**
kwargs
):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
pass
mmdet3d/models/roi_heads/bbox_heads/__init__.py
View file @
54595292
...
...
@@ -2,8 +2,9 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
DoubleConvFCBBoxHead
,
Shared2FCBBoxHead
,
Shared4Conv1FCBBoxHead
)
from
.parta2_bbox_head
import
PartA2BboxHead
__all__
=
[
'BBoxHead'
,
'ConvFCBBoxHead'
,
'Shared2FCBBoxHead'
,
'Shared4Conv1FCBBoxHead'
,
'DoubleConvFCBBoxHead'
'Shared4Conv1FCBBoxHead'
,
'DoubleConvFCBBoxHead'
,
'PartA2BboxHead'
]
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
0 → 100644
View file @
54595292
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
build_norm_layer
,
normal_init
,
xavier_init
import
mmdet3d.ops.spconv
as
spconv
from
mmdet3d.core
import
build_bbox_coder
,
multi_apply
from
mmdet3d.core.bbox
import
box_torch_ops
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.ops.iou3d.iou3d_utils
import
(
boxes3d_to_bev_torch_lidar
,
nms_gpu
,
nms_normal_gpu
)
from
mmdet.models
import
HEADS
@
HEADS
.
register_module
()
class
PartA2BboxHead
(
nn
.
Module
):
"""PartA2 rcnn box head.
Args:
num_classes (int): The number of classes to prediction.
seg_in_channels (int): Input channels of segmentation
convolution layer.
part_in_channels (int): Input channels of part convolution layer.
seg_conv_channels (list(int)): Out channels of each
segmentation convolution layer.
part_conv_channels (list(int)): Out channels of each
part convolution layer.
merge_conv_channels (list(int)): Out channels of each
feature merged convolution layer.
down_conv_channels (list(int)): Out channels of each
downsampled convolution layer.
shared_fc_channels (list(int)): Out channels of each shared fc layer.
cls_channels (list(int)): Out channels of each classification layer.
reg_channels (list(int)): Out channels of each regression layer.
dropout_ratio (float): Dropout ratio of classification and
regression layers.
roi_feat_size (int): The size of pooled roi features.
with_corner_loss (bool): Whether to use corner loss or not.
bbox_coder (BaseBBoxCoder): Bbox coder for box head.
conv_cfg (dict): Config dict of convolutional layers
norm_cfg (dict): Config dict of normalization layers
loss_bbox (dict): Config dict of box regression loss.
loss_cls (dict): Config dict of classifacation loss.
"""
def
__init__
(
self
,
num_classes
,
seg_in_channels
,
part_in_channels
,
seg_conv_channels
=
None
,
part_conv_channels
=
None
,
merge_conv_channels
=
None
,
down_conv_channels
=
None
,
shared_fc_channels
=
None
,
cls_channels
=
None
,
reg_channels
=
None
,
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'none'
,
loss_weight
=
1.0
)):
super
(
PartA2BboxHead
,
self
).
__init__
()
self
.
num_classes
=
num_classes
self
.
with_corner_loss
=
with_corner_loss
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
self
.
loss_bbox
=
build_loss
(
loss_bbox
)
self
.
loss_cls
=
build_loss
(
loss_cls
)
self
.
use_sigmoid_cls
=
loss_cls
.
get
(
'use_sigmoid'
,
False
)
assert
down_conv_channels
[
-
1
]
==
shared_fc_channels
[
0
]
# init layers
block
=
self
.
post_act_block
part_channel_last
=
part_in_channels
part_conv
=
[]
for
i
,
channel
in
enumerate
(
part_conv_channels
):
part_conv
.
append
(
block
(
part_channel_last
,
channel
,
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_part
{
i
}
'
))
part_channel_last
=
channel
self
.
part_conv
=
spconv
.
SparseSequential
(
*
part_conv
)
seg_channel_last
=
seg_in_channels
seg_conv
=
[]
for
i
,
channel
in
enumerate
(
seg_conv_channels
):
seg_conv
.
append
(
block
(
seg_channel_last
,
channel
,
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_seg
{
i
}
'
))
seg_channel_last
=
channel
self
.
seg_conv
=
spconv
.
SparseSequential
(
*
seg_conv
)
self
.
conv_down
=
spconv
.
SparseSequential
()
merge_conv_channel_last
=
part_channel_last
+
seg_channel_last
merge_conv
=
[]
for
i
,
channel
in
enumerate
(
merge_conv_channels
):
merge_conv
.
append
(
block
(
merge_conv_channel_last
,
channel
,
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down0'
))
merge_conv_channel_last
=
channel
down_conv_channel_last
=
merge_conv_channel_last
conv_down
=
[]
for
i
,
channel
in
enumerate
(
down_conv_channels
):
conv_down
.
append
(
block
(
down_conv_channel_last
,
channel
,
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down1'
))
down_conv_channel_last
=
channel
self
.
conv_down
.
add_module
(
'merge_conv'
,
spconv
.
SparseSequential
(
*
merge_conv
))
self
.
conv_down
.
add_module
(
'max_pool3d'
,
spconv
.
SparseMaxPool3d
(
kernel_size
=
2
,
stride
=
2
))
self
.
conv_down
.
add_module
(
'down_conv'
,
spconv
.
SparseSequential
(
*
conv_down
))
shared_fc_list
=
[]
pool_size
=
roi_feat_size
//
2
pre_channel
=
shared_fc_channels
[
0
]
*
pool_size
**
3
for
k
in
range
(
1
,
len
(
shared_fc_channels
)):
shared_fc_list
.
append
(
ConvModule
(
pre_channel
,
shared_fc_channels
[
k
],
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
inplace
=
True
))
pre_channel
=
shared_fc_channels
[
k
]
if
k
!=
len
(
shared_fc_channels
)
-
1
and
dropout_ratio
>
0
:
shared_fc_list
.
append
(
nn
.
Dropout
(
dropout_ratio
))
self
.
shared_fc
=
nn
.
Sequential
(
*
shared_fc_list
)
# Classification layer
channel_in
=
shared_fc_channels
[
-
1
]
cls_channel
=
1
cls_layers
=
[]
pre_channel
=
channel_in
for
k
in
range
(
0
,
len
(
cls_channels
)):
cls_layers
.
append
(
ConvModule
(
pre_channel
,
cls_channels
[
k
],
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
inplace
=
True
))
pre_channel
=
cls_channels
[
k
]
cls_layers
.
append
(
ConvModule
(
pre_channel
,
cls_channel
,
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
act_cfg
=
None
))
if
dropout_ratio
>=
0
:
cls_layers
.
insert
(
1
,
nn
.
Dropout
(
dropout_ratio
))
self
.
conv_cls
=
nn
.
Sequential
(
*
cls_layers
)
# Regression layer
reg_layers
=
[]
pre_channel
=
channel_in
for
k
in
range
(
0
,
len
(
reg_channels
)):
reg_layers
.
append
(
ConvModule
(
pre_channel
,
reg_channels
[
k
],
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
inplace
=
True
))
pre_channel
=
reg_channels
[
k
]
reg_layers
.
append
(
ConvModule
(
pre_channel
,
self
.
bbox_coder
.
code_size
,
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
act_cfg
=
None
))
if
dropout_ratio
>=
0
:
reg_layers
.
insert
(
1
,
nn
.
Dropout
(
dropout_ratio
))
self
.
conv_reg
=
nn
.
Sequential
(
*
reg_layers
)
self
.
init_weights
()
def
init_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
(
nn
.
Conv2d
,
nn
.
Conv1d
)):
xavier_init
(
m
,
distribution
=
'uniform'
)
normal_init
(
self
.
conv_reg
[
-
1
].
conv
,
mean
=
0
,
std
=
0.001
)
def
post_act_block
(
self
,
in_channels
,
out_channels
,
kernel_size
,
indice_key
,
stride
=
1
,
padding
=
0
,
conv_type
=
'subm'
,
norm_cfg
=
None
):
"""Make post activate sparse convolution block.
Args:
in_channels (int): the number of input channels
out_channels (int): the number of out channels
kernel_size (int): kernel size of convolution
indice_key (str): the indice key used for sparse tensor
stride (int): the stride of convolution
padding (int or list[int]): the padding number of input
conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
norm_cfg (dict[str]): config of normalization layer
Returns:
spconv.SparseSequential: post activate sparse convolution block.
"""
# TODO: clean post_act_block by existing bottlnecks.
assert
conv_type
in
[
'subm'
,
'spconv'
,
'inverseconv'
]
if
conv_type
==
'subm'
:
m
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
in_channels
,
out_channels
,
kernel_size
,
bias
=
False
,
indice_key
=
indice_key
),
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
],
nn
.
ReLU
(
inplace
=
True
))
elif
conv_type
==
'spconv'
:
m
=
spconv
.
SparseSequential
(
spconv
.
SparseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
bias
=
False
,
indice_key
=
indice_key
),
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
],
nn
.
ReLU
(
inplace
=
True
))
elif
conv_type
==
'inverseconv'
:
m
=
spconv
.
SparseSequential
(
spconv
.
SparseInverseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
bias
=
False
,
indice_key
=
indice_key
),
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
],
nn
.
ReLU
(
inplace
=
True
))
else
:
raise
NotImplementedError
return
m
def
forward
(
self
,
seg_feats
,
part_feats
):
# (B * N, out_x, out_y, out_z, 4)
rcnn_batch_size
=
part_feats
.
shape
[
0
]
# transform to sparse tensors
sparse_shape
=
part_feats
.
shape
[
1
:
4
]
# (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx]
sparse_idx
=
part_feats
.
sum
(
dim
=-
1
).
nonzero
()
part_features
=
part_feats
[
sparse_idx
[:,
0
],
sparse_idx
[:,
1
],
sparse_idx
[:,
2
],
sparse_idx
[:,
3
]]
seg_features
=
seg_feats
[
sparse_idx
[:,
0
],
sparse_idx
[:,
1
],
sparse_idx
[:,
2
],
sparse_idx
[:,
3
]]
coords
=
sparse_idx
.
int
()
part_features
=
spconv
.
SparseConvTensor
(
part_features
,
coords
,
sparse_shape
,
rcnn_batch_size
)
seg_features
=
spconv
.
SparseConvTensor
(
seg_features
,
coords
,
sparse_shape
,
rcnn_batch_size
)
# forward rcnn network
x_part
=
self
.
part_conv
(
part_features
)
x_rpn
=
self
.
seg_conv
(
seg_features
)
merged_feature
=
torch
.
cat
((
x_rpn
.
features
,
x_part
.
features
),
dim
=
1
)
# (N, C)
shared_feature
=
spconv
.
SparseConvTensor
(
merged_feature
,
coords
,
sparse_shape
,
rcnn_batch_size
)
x
=
self
.
conv_down
(
shared_feature
)
shared_feature
=
x
.
dense
().
view
(
rcnn_batch_size
,
-
1
,
1
)
shared_feature
=
self
.
shared_fc
(
shared_feature
)
cls_score
=
self
.
conv_cls
(
shared_feature
).
transpose
(
1
,
2
).
contiguous
().
squeeze
(
dim
=
1
)
# (B, 1)
bbox_pred
=
self
.
conv_reg
(
shared_feature
).
transpose
(
1
,
2
).
contiguous
().
squeeze
(
dim
=
1
)
# (B, C)
return
cls_score
,
bbox_pred
def
loss
(
self
,
cls_score
,
bbox_pred
,
rois
,
labels
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
):
losses
=
dict
()
rcnn_batch_size
=
cls_score
.
shape
[
0
]
# calculate class loss
cls_flat
=
cls_score
.
view
(
-
1
)
loss_cls
=
self
.
loss_cls
(
cls_flat
,
labels
,
label_weights
)
losses
[
'loss_cls'
]
=
loss_cls
# calculate regression loss
code_size
=
self
.
bbox_coder
.
code_size
pos_inds
=
(
reg_mask
>
0
)
if
pos_inds
.
any
()
==
0
:
# fake a part loss
losses
[
'loss_bbox'
]
=
loss_cls
.
new_tensor
(
0
)
if
self
.
with_corner_loss
:
losses
[
'loss_corner'
]
=
loss_cls
.
new_tensor
(
0
)
else
:
pos_bbox_pred
=
bbox_pred
.
view
(
rcnn_batch_size
,
-
1
)[
pos_inds
]
bbox_weights_flat
=
bbox_weights
[
pos_inds
].
view
(
-
1
,
1
).
repeat
(
1
,
pos_bbox_pred
.
shape
[
-
1
])
loss_bbox
=
self
.
loss_bbox
(
pos_bbox_pred
.
unsqueeze
(
dim
=
0
),
bbox_targets
.
unsqueeze
(
dim
=
0
),
bbox_weights_flat
.
unsqueeze
(
dim
=
0
))
losses
[
'loss_bbox'
]
=
loss_bbox
if
self
.
with_corner_loss
:
pos_roi_boxes3d
=
rois
[...,
1
:].
view
(
-
1
,
code_size
)[
pos_inds
]
pos_roi_boxes3d
=
pos_roi_boxes3d
.
view
(
-
1
,
code_size
)
batch_anchors
=
pos_roi_boxes3d
.
clone
().
detach
()
pos_rois_rotation
=
pos_roi_boxes3d
[...,
6
].
view
(
-
1
)
roi_xyz
=
pos_roi_boxes3d
[...,
0
:
3
].
view
(
-
1
,
3
)
batch_anchors
[...,
0
:
3
]
=
0
# decode boxes
pred_boxes3d
=
self
.
bbox_coder
.
decode
(
batch_anchors
,
pos_bbox_pred
.
view
(
-
1
,
code_size
)).
view
(
-
1
,
code_size
)
pred_boxes3d
[...,
0
:
3
]
=
box_torch_ops
.
rotation_3d_in_axis
(
pred_boxes3d
[...,
0
:
3
].
unsqueeze
(
1
),
(
pos_rois_rotation
+
np
.
pi
/
2
),
axis
=
2
).
squeeze
(
1
)
pred_boxes3d
[:,
0
:
3
]
+=
roi_xyz
# calculate corner loss
loss_corner
=
self
.
get_corner_loss_lidar
(
pred_boxes3d
,
pos_gt_bboxes
)
losses
[
'loss_corner'
]
=
loss_corner
return
losses
def
get_targets
(
self
,
sampling_results
,
rcnn_train_cfg
,
concat
=
True
):
pos_bboxes_list
=
[
res
.
pos_bboxes
for
res
in
sampling_results
]
pos_gt_bboxes_list
=
[
res
.
pos_gt_bboxes
for
res
in
sampling_results
]
iou_list
=
[
res
.
iou
for
res
in
sampling_results
]
targets
=
multi_apply
(
self
.
_get_target_single
,
pos_bboxes_list
,
pos_gt_bboxes_list
,
iou_list
,
cfg
=
rcnn_train_cfg
)
(
label
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
)
=
targets
if
concat
:
label
=
torch
.
cat
(
label
,
0
)
bbox_targets
=
torch
.
cat
(
bbox_targets
,
0
)
pos_gt_bboxes
=
torch
.
cat
(
pos_gt_bboxes
,
0
)
reg_mask
=
torch
.
cat
(
reg_mask
,
0
)
label_weights
=
torch
.
cat
(
label_weights
,
0
)
label_weights
/=
torch
.
clamp
(
label_weights
.
sum
(),
min
=
1.0
)
bbox_weights
=
torch
.
cat
(
bbox_weights
,
0
)
bbox_weights
/=
torch
.
clamp
(
bbox_weights
.
sum
(),
min
=
1.0
)
return
(
label
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
)
def
_get_target_single
(
self
,
pos_bboxes
,
pos_gt_bboxes
,
ious
,
cfg
):
cls_pos_mask
=
ious
>
cfg
.
cls_pos_thr
cls_neg_mask
=
ious
<
cfg
.
cls_neg_thr
interval_mask
=
(
cls_pos_mask
==
0
)
&
(
cls_neg_mask
==
0
)
# iou regression target
label
=
(
cls_pos_mask
>
0
).
float
()
label
[
interval_mask
]
=
ious
[
interval_mask
]
*
2
-
0.5
# label weights
label_weights
=
(
label
>=
0
).
float
()
# box regression target
reg_mask
=
pos_bboxes
.
new_zeros
(
ious
.
size
(
0
)).
long
()
reg_mask
[
0
:
pos_gt_bboxes
.
size
(
0
)]
=
1
bbox_weights
=
(
reg_mask
>
0
).
float
()
if
reg_mask
.
bool
().
any
():
pos_gt_bboxes_ct
=
pos_gt_bboxes
.
clone
().
detach
()
roi_center
=
pos_bboxes
[...,
0
:
3
]
roi_ry
=
pos_bboxes
[...,
6
]
%
(
2
*
np
.
pi
)
# canonical transformation
pos_gt_bboxes_ct
[...,
0
:
3
]
-=
roi_center
pos_gt_bboxes_ct
[...,
6
]
-=
roi_ry
pos_gt_bboxes_ct
[...,
0
:
3
]
=
box_torch_ops
.
rotation_3d_in_axis
(
pos_gt_bboxes_ct
[...,
0
:
3
].
unsqueeze
(
1
),
-
(
roi_ry
+
np
.
pi
/
2
),
axis
=
2
).
squeeze
(
1
)
# flip orientation if rois have opposite orientation
ry_label
=
pos_gt_bboxes_ct
[...,
6
]
%
(
2
*
np
.
pi
)
# 0 ~ 2pi
opposite_flag
=
(
ry_label
>
np
.
pi
*
0.5
)
&
(
ry_label
<
np
.
pi
*
1.5
)
ry_label
[
opposite_flag
]
=
(
ry_label
[
opposite_flag
]
+
np
.
pi
)
%
(
2
*
np
.
pi
)
# (0 ~ pi/2, 3pi/2 ~ 2pi)
flag
=
ry_label
>
np
.
pi
ry_label
[
flag
]
=
ry_label
[
flag
]
-
np
.
pi
*
2
# (-pi/2, pi/2)
ry_label
=
torch
.
clamp
(
ry_label
,
min
=-
np
.
pi
/
2
,
max
=
np
.
pi
/
2
)
pos_gt_bboxes_ct
[...,
6
]
=
ry_label
rois_anchor
=
pos_bboxes
.
clone
().
detach
()
rois_anchor
[:,
0
:
3
]
=
0
rois_anchor
[:,
6
]
=
0
bbox_targets
=
self
.
bbox_coder
.
encode
(
rois_anchor
,
pos_gt_bboxes_ct
)
else
:
# no fg bbox
bbox_targets
=
pos_gt_bboxes
.
new_empty
((
0
,
7
))
return
(
label
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
)
def
get_corner_loss_lidar
(
self
,
pred_bbox3d
,
gt_bbox3d
,
delta
=
1
):
"""Calculate corner loss of given boxes.
Args:
pred_bbox3d (FloatTensor): predicted boxes with shape (N, 7).
gt_bbox3d (FloatTensor): gt boxes with shape (N, 7).
Returns:
FloatTensor: Calculated corner loss with shape (N).
"""
assert
pred_bbox3d
.
shape
[
0
]
==
gt_bbox3d
.
shape
[
0
]
pred_box_corners
=
box_torch_ops
.
boxes3d_to_corners3d_lidar_torch
(
pred_bbox3d
)
gt_box_corners
=
box_torch_ops
.
boxes3d_to_corners3d_lidar_torch
(
gt_bbox3d
)
gt_bbox3d_flip
=
gt_bbox3d
.
clone
()
gt_bbox3d_flip
[:,
6
]
+=
np
.
pi
gt_box_corners_flip
=
box_torch_ops
.
boxes3d_to_corners3d_lidar_torch
(
gt_bbox3d_flip
)
corner_dist
=
torch
.
min
(
torch
.
norm
(
pred_box_corners
-
gt_box_corners
,
dim
=
2
),
torch
.
norm
(
pred_box_corners
-
gt_box_corners_flip
,
dim
=
2
))
# (N, 8)
# huber loss
abs_error
=
torch
.
abs
(
corner_dist
)
quadratic
=
torch
.
clamp
(
abs_error
,
max
=
delta
)
linear
=
(
abs_error
-
quadratic
)
corner_loss
=
0.5
*
quadratic
**
2
+
delta
*
linear
return
corner_loss
.
mean
(
dim
=
1
)
def
get_bboxes
(
self
,
rois
,
cls_score
,
bbox_pred
,
class_labels
,
class_pred
,
img_meta
,
cfg
=
None
):
roi_batch_id
=
rois
[...,
0
]
roi_boxes
=
rois
[...,
1
:]
# boxes without batch id
batch_size
=
int
(
roi_batch_id
.
max
().
item
()
+
1
)
# decode boxes
roi_ry
=
roi_boxes
[...,
6
].
view
(
-
1
)
roi_xyz
=
roi_boxes
[...,
0
:
3
].
view
(
-
1
,
3
)
local_roi_boxes
=
roi_boxes
.
clone
().
detach
()
local_roi_boxes
[...,
0
:
3
]
=
0
rcnn_boxes3d
=
self
.
bbox_coder
.
decode
(
local_roi_boxes
,
bbox_pred
)
rcnn_boxes3d
[...,
0
:
3
]
=
box_torch_ops
.
rotation_3d_in_axis
(
rcnn_boxes3d
[...,
0
:
3
].
unsqueeze
(
1
),
(
roi_ry
+
np
.
pi
/
2
),
axis
=
2
).
squeeze
(
1
)
rcnn_boxes3d
[:,
0
:
3
]
+=
roi_xyz
# post processing
result_list
=
[]
for
batch_id
in
range
(
batch_size
):
cur_class_labels
=
class_labels
[
batch_id
]
cur_cls_score
=
cls_score
[
roi_batch_id
==
batch_id
].
view
(
-
1
)
cur_box_prob
=
class_pred
[
batch_id
]
cur_rcnn_boxes3d
=
rcnn_boxes3d
[
roi_batch_id
==
batch_id
]
selected
=
self
.
multi_class_nms
(
cur_box_prob
,
cur_rcnn_boxes3d
,
cfg
.
score_thr
,
cfg
.
nms_thr
,
cfg
.
use_rotate_nms
)
selected_bboxes
=
cur_rcnn_boxes3d
[
selected
]
selected_label_preds
=
cur_class_labels
[
selected
]
if
cfg
.
use_raw_score
:
selected_scores
=
cur_cls_score
[
selected
]
else
:
selected_scores
=
torch
.
sigmoid
(
cur_cls_score
)[
selected
]
cur_result
=
dict
(
box3d_lidar
=
selected_bboxes
.
cpu
(),
scores
=
selected_scores
.
cpu
(),
label_preds
=
selected_label_preds
.
cpu
(),
sample_idx
=
img_meta
[
batch_id
][
'sample_idx'
])
result_list
.
append
(
cur_result
)
return
result_list
def
multi_class_nms
(
self
,
box_probs
,
box_preds
,
score_thr
,
nms_thr
,
use_rotate_nms
=
True
):
normalized_scores
=
torch
.
sigmoid
(
box_probs
)
if
use_rotate_nms
:
nms_func
=
nms_gpu
else
:
nms_func
=
nms_normal_gpu
assert
box_probs
.
shape
[
1
]
==
self
.
num_classes
,
f
'box_probs shape:
{
str
(
box_probs
.
shape
)
}
'
selected_list
=
[]
selected_labels
=
[]
boxes_for_nms
=
boxes3d_to_bev_torch_lidar
(
box_preds
)
score_thresh
=
score_thr
if
isinstance
(
score_thr
,
list
)
else
[
score_thr
for
x
in
range
(
self
.
num_classes
)]
nms_thresh
=
nms_thr
if
isinstance
(
nms_thr
,
list
)
else
[
nms_thr
for
x
in
range
(
self
.
num_classes
)]
for
k
in
range
(
0
,
self
.
num_classes
):
class_scores_keep
=
normalized_scores
[:,
k
]
>=
score_thresh
[
k
]
if
class_scores_keep
.
int
().
sum
()
>
0
:
original_idxs
=
class_scores_keep
.
nonzero
().
view
(
-
1
)
cur_boxes_for_nms
=
boxes_for_nms
[
class_scores_keep
]
cur_rank_scores
=
box_probs
[
class_scores_keep
,
k
]
cur_selected
=
nms_func
(
cur_boxes_for_nms
,
cur_rank_scores
,
nms_thresh
[
k
])
if
cur_selected
.
shape
[
0
]
==
0
:
continue
selected_list
.
append
(
original_idxs
[
cur_selected
])
selected_labels
.
append
(
torch
.
full
([
cur_selected
.
shape
[
0
]],
k
+
1
,
dtype
=
torch
.
int64
,
device
=
box_preds
.
device
))
selected
=
torch
.
cat
(
selected_list
,
dim
=
0
)
if
len
(
selected_list
)
>
0
else
[]
return
selected
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
View file @
54595292
...
...
@@ -126,22 +126,21 @@ class PointwiseSemanticHead(nn.Module):
part_targets
=
torch
.
cat
(
part_targets
,
dim
=
0
)
return
dict
(
seg_targets
=
seg_targets
,
part_targets
=
part_targets
)
def
loss
(
self
,
se
g_preds
,
part_preds
,
seg_targets
,
part
_targets
):
def
loss
(
self
,
se
mantic_results
,
semantic
_targets
):
"""Calculate point-wise segmentation and part prediction losses.
Args:
seg_preds (torch.Tensor): prediction of binary
segmentation with shape [voxel_num, 1].
part_preds (torch.Tensor): prediction of part
with shape [voxel_num, 3].
seg_targets (torch.Tensor): target of segmentation
with shape [voxel_num, 1].
part_targets (torch.Tensor): target of part with
shape [voxel_num, 3].
semantic_results (dict): Results from semantic head.
semantic_targets (dict): Targets of semantic results.
Returns:
dict: loss of segmentation and part prediction.
"""
seg_preds
=
semantic_results
[
'seg_preds'
]
part_preds
=
semantic_results
[
'part_preds'
]
seg_targets
=
semantic_targets
[
'seg_targets'
]
part_targets
=
semantic_targets
[
'part_targets'
]
pos_mask
=
(
seg_targets
>
-
1
)
&
(
seg_targets
<
self
.
num_classes
)
binary_seg_target
=
pos_mask
.
long
()
pos
=
pos_mask
.
float
()
...
...
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
0 → 100644
View file @
54595292
import
torch.nn.functional
as
F
from
mmdet3d.core
import
AssignResult
from
mmdet3d.core.bbox
import
bbox3d2roi
from
mmdet.core
import
build_assigner
,
build_sampler
from
mmdet.models
import
HEADS
from
..builder
import
build_head
,
build_roi_extractor
from
.base_3droi_head
import
Base3DRoIHead
@
HEADS
.
register_module
class
PartAggregationROIHead
(
Base3DRoIHead
):
"""Part aggregation roi head for PartA2"""
def
__init__
(
self
,
semantic_head
,
num_classes
=
3
,
seg_roi_extractor
=
None
,
part_roi_extractor
=
None
,
bbox_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
):
super
(
PartAggregationROIHead
,
self
).
__init__
(
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
)
self
.
num_classes
=
num_classes
assert
semantic_head
is
not
None
self
.
semantic_head
=
build_head
(
semantic_head
)
if
seg_roi_extractor
is
not
None
:
self
.
seg_roi_extractor
=
build_roi_extractor
(
seg_roi_extractor
)
if
part_roi_extractor
is
not
None
:
self
.
part_roi_extractor
=
build_roi_extractor
(
part_roi_extractor
)
self
.
init_assigner_sampler
()
def
init_weights
(
self
,
pretrained
):
pass
def
init_mask_head
(
self
):
pass
def
init_bbox_head
(
self
,
bbox_head
):
self
.
bbox_head
=
build_head
(
bbox_head
)
def
init_assigner_sampler
(
self
):
self
.
bbox_assigner
=
None
self
.
bbox_sampler
=
None
if
self
.
train_cfg
:
if
isinstance
(
self
.
train_cfg
.
assigner
,
dict
):
self
.
bbox_assigner
=
build_assigner
(
self
.
train_cfg
.
assigner
)
elif
isinstance
(
self
.
train_cfg
.
assigner
,
list
):
self
.
bbox_assigner
=
[
build_assigner
(
res
)
for
res
in
self
.
train_cfg
.
assigner
]
self
.
bbox_sampler
=
build_sampler
(
self
.
train_cfg
.
sampler
)
@
property
def
with_semantic
(
self
):
return
hasattr
(
self
,
'semantic_head'
)
and
self
.
semantic_head
is
not
None
def
forward_train
(
self
,
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Training forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
gt_bboxes_3d (list[FloatTensor]): GT bboxes of each batch.
gt_labels_3d (list[LongTensor]): GT labels of each batch.
Returns:
dict: losses from each head.
"""
losses
=
dict
()
if
self
.
with_semantic
:
semantic_results
=
self
.
_semantic_forward_train
(
feats_dict
[
'seg_features'
],
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
)
losses
.
update
(
semantic_results
[
'loss_semantic'
])
sample_results
=
self
.
_assign_and_sample
(
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
)
if
self
.
with_bbox
:
bbox_results
=
self
.
_bbox_forward_train
(
feats_dict
[
'seg_features'
],
semantic_results
[
'part_feats'
],
voxels_dict
,
sample_results
)
losses
.
update
(
bbox_results
[
'loss_bbox'
])
return
losses
def
simple_test
(
self
,
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
**
kwargs
):
"""Simple testing forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
list[dict]: Bbox results of each batch.
"""
assert
self
.
with_bbox
,
'Bbox head must be implemented.'
assert
self
.
with_semantic
semantic_results
=
self
.
semantic_head
(
feats_dict
[
'seg_features'
])
rois
=
bbox3d2roi
([
res
[
'box3d_lidar'
]
for
res
in
proposal_list
])
label_preds
=
[
res
[
'label_preds'
]
for
res
in
proposal_list
]
cls_preds
=
[
res
[
'cls_preds'
]
for
res
in
proposal_list
]
bbox_results
=
self
.
_bbox_forward
(
feats_dict
[
'seg_features'
],
semantic_results
[
'part_feats'
],
voxels_dict
,
rois
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
rois
,
bbox_results
[
'cls_score'
],
bbox_results
[
'bbox_pred'
],
label_preds
,
cls_preds
,
img_meta
,
cfg
=
self
.
test_cfg
)
return
bbox_list
def
_bbox_forward_train
(
self
,
seg_feats
,
part_feats
,
voxels_dict
,
sampling_results
):
rois
=
bbox3d2roi
([
res
.
bboxes
for
res
in
sampling_results
])
bbox_results
=
self
.
_bbox_forward
(
seg_feats
,
part_feats
,
voxels_dict
,
rois
)
bbox_targets
=
self
.
bbox_head
.
get_targets
(
sampling_results
,
self
.
train_cfg
)
loss_bbox
=
self
.
bbox_head
.
loss
(
bbox_results
[
'cls_score'
],
bbox_results
[
'bbox_pred'
],
rois
,
*
bbox_targets
)
bbox_results
.
update
(
loss_bbox
=
loss_bbox
)
return
bbox_results
def
_bbox_forward
(
self
,
seg_feats
,
part_feats
,
voxels_dict
,
rois
):
pooled_seg_feats
=
self
.
seg_roi_extractor
(
seg_feats
,
voxels_dict
[
'voxel_centers'
],
voxels_dict
[
'coors'
][...,
0
],
rois
)
pooled_part_feats
=
self
.
part_roi_extractor
(
part_feats
,
voxels_dict
[
'voxel_centers'
],
voxels_dict
[
'coors'
][...,
0
],
rois
)
cls_score
,
bbox_pred
=
self
.
bbox_head
(
pooled_seg_feats
,
pooled_part_feats
)
bbox_results
=
dict
(
cls_score
=
cls_score
,
bbox_pred
=
bbox_pred
,
pooled_seg_feats
=
pooled_seg_feats
,
pooled_part_feats
=
pooled_part_feats
)
return
bbox_results
def
_assign_and_sample
(
self
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
):
sampling_results
=
[]
# bbox assign
for
batch_idx
in
range
(
len
(
proposal_list
)):
cur_proposal_list
=
proposal_list
[
batch_idx
]
cur_boxes
=
cur_proposal_list
[
'box3d_lidar'
]
cur_label_preds
=
cur_proposal_list
[
'label_preds'
]
cur_gt_bboxes
=
gt_bboxes_3d
[
batch_idx
]
cur_gt_labels
=
gt_labels_3d
[
batch_idx
]
batch_num_gts
=
0
batch_gt_indis
=
cur_gt_labels
.
new_full
((
cur_boxes
.
shape
[
0
],
),
0
)
# 0 is bg
batch_max_overlaps
=
cur_boxes
.
new_zeros
(
cur_boxes
.
shape
[
0
])
batch_gt_labels
=
cur_gt_labels
.
new_full
((
cur_boxes
.
shape
[
0
],
),
-
1
)
# -1 is bg
if
isinstance
(
self
.
bbox_assigner
,
list
):
# for multi classes
for
i
,
assigner
in
enumerate
(
self
.
bbox_assigner
):
gt_per_cls
=
(
cur_gt_labels
==
i
)
pred_per_cls
=
(
cur_label_preds
==
i
)
cur_assign_res
=
assigner
.
assign
(
cur_boxes
[
pred_per_cls
],
cur_gt_bboxes
[
gt_per_cls
],
gt_labels
=
cur_gt_labels
[
gt_per_cls
])
# gather assign_results in different class into one result
batch_num_gts
+=
cur_assign_res
.
num_gts
# gt inds (1-based)
gt_inds_arange_pad
=
gt_per_cls
.
nonzero
().
view
(
-
1
)
+
1
# pad 0 for indice unassigned
gt_inds_arange_pad
=
F
.
pad
(
gt_inds_arange_pad
,
(
1
,
0
),
mode
=
'constant'
,
value
=
0
)
# pad -1 for indice ignore
gt_inds_arange_pad
=
F
.
pad
(
gt_inds_arange_pad
,
(
1
,
0
),
mode
=
'constant'
,
value
=-
1
)
# convert to 0~gt_num+2 for indices
gt_inds_arange_pad
+=
1
# now 0 is bg, >1 is fg in batch_gt_indis
batch_gt_indis
[
pred_per_cls
]
=
gt_inds_arange_pad
[
cur_assign_res
.
gt_inds
+
1
]
-
1
batch_max_overlaps
[
pred_per_cls
]
=
cur_assign_res
.
max_overlaps
batch_gt_labels
[
pred_per_cls
]
=
cur_assign_res
.
labels
assign_result
=
AssignResult
(
batch_num_gts
,
batch_gt_indis
,
batch_max_overlaps
,
batch_gt_labels
)
else
:
# for single class
assign_result
=
self
.
bbox_assigner
.
assign
(
cur_boxes
,
cur_gt_bboxes
,
gt_labels
=
cur_gt_labels
)
# sample boxes
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
cur_boxes
,
cur_gt_bboxes
,
cur_gt_labels
)
sampling_results
.
append
(
sampling_result
)
return
sampling_results
def
_semantic_forward_train
(
self
,
x
,
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
):
semantic_results
=
self
.
semantic_head
(
x
)
semantic_targets
=
self
.
semantic_head
.
get_targets
(
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
)
loss_semantic
=
self
.
semantic_head
.
loss
(
semantic_results
,
semantic_targets
)
semantic_results
.
update
(
loss_semantic
=
loss_semantic
)
return
semantic_results
tests/test_config.py
View file @
54595292
...
...
@@ -61,6 +61,9 @@ def test_config_build_detector():
assert
detector
.
roi_head
.
with_mask
==
detector
.
with_mask
head_config
=
config_mod
.
model
[
'roi_head'
]
if
head_config
.
type
==
'PartAggregationROIHead'
:
check_parta2_roi_head
(
head_config
,
detector
.
roi_head
)
else
:
_check_roi_head
(
head_config
,
detector
.
roi_head
)
# else:
# # for single stage detector
...
...
@@ -319,3 +322,44 @@ def _check_bbox_head(bbox_cfg, bbox_head):
out_dim
=
(
4
if
bbox_cfg
.
reg_class_agnostic
else
4
*
bbox_cfg
.
num_classes
)
assert
bbox_head
.
fc_reg
.
out_features
==
out_dim
def
check_parta2_roi_head
(
config
,
head
):
assert
config
[
'type'
]
==
head
.
__class__
.
__name__
# check seg_roi_extractor
seg_roi_cfg
=
config
.
seg_roi_extractor
seg_roi_extractor
=
head
.
seg_roi_extractor
_check_parta2_roi_extractor
(
seg_roi_cfg
,
seg_roi_extractor
)
# check part_roi_extractor
part_roi_cfg
=
config
.
part_roi_extractor
part_roi_extractor
=
head
.
part_roi_extractor
_check_parta2_roi_extractor
(
part_roi_cfg
,
part_roi_extractor
)
# check bbox head infos
bbox_cfg
=
config
.
bbox_head
bbox_head
=
head
.
bbox_head
_check_parta2_bbox_head
(
bbox_cfg
,
bbox_head
)
def
_check_parta2_roi_extractor
(
config
,
roi_extractor
):
assert
config
[
'type'
]
==
roi_extractor
.
__class__
.
__name__
assert
(
config
.
roi_layer
.
out_size
==
roi_extractor
.
roi_layer
.
out_size
)
assert
(
config
.
roi_layer
.
max_pts_per_voxel
==
roi_extractor
.
roi_layer
.
max_pts_per_voxel
)
def
_check_parta2_bbox_head
(
bbox_cfg
,
bbox_head
):
import
torch.nn
as
nn
if
isinstance
(
bbox_cfg
,
list
):
for
single_bbox_cfg
,
single_bbox_head
in
zip
(
bbox_cfg
,
bbox_head
):
_check_bbox_head
(
single_bbox_cfg
,
single_bbox_head
)
elif
isinstance
(
bbox_head
,
nn
.
ModuleList
):
for
single_bbox_head
in
bbox_head
:
_check_bbox_head
(
bbox_cfg
,
single_bbox_head
)
else
:
assert
bbox_cfg
[
'type'
]
==
bbox_head
.
__class__
.
__name__
assert
bbox_cfg
.
seg_in_channels
==
bbox_head
.
seg_conv
[
0
][
0
].
in_channels
assert
bbox_cfg
.
part_in_channels
==
bbox_head
.
part_conv
[
0
][
0
].
in_channels
tests/test_heads.py
View file @
54595292
...
...
@@ -103,18 +103,18 @@ def test_second_head_loss():
losses
=
self
.
loss
(
cls_score
,
bbox_pred
,
dir_cls_preds
,
gt_bboxes
,
gt_labels
,
input_metas
)
assert
losses
[
'loss_cls
_3d
'
][
0
]
>
0
assert
losses
[
'loss_bbox
_3d
'
][
0
]
>
0
assert
losses
[
'loss_dir
_3d
'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
cls'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
bbox'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
dir'
][
0
]
>
0
# test empty ground truth case
gt_bboxes
=
list
(
torch
.
empty
((
2
,
0
,
7
)).
cuda
())
gt_labels
=
list
(
torch
.
empty
((
2
,
0
)).
cuda
())
empty_gt_losses
=
self
.
loss
(
cls_score
,
bbox_pred
,
dir_cls_preds
,
gt_bboxes
,
gt_labels
,
input_metas
)
assert
empty_gt_losses
[
'loss_cls
_3d
'
][
0
]
>
0
assert
empty_gt_losses
[
'loss_bbox
_3d
'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_dir
_3d
'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_
rpn_
cls'
][
0
]
>
0
assert
empty_gt_losses
[
'loss_
rpn_
bbox'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_
rpn_
dir'
][
0
]
==
0
def
test_second_head_getboxes
():
...
...
@@ -147,7 +147,7 @@ def test_parta2_rpnhead_getboxes():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
(
'test requires GPU and torch+cuda'
)
rpn_head_cfg
,
proposal_cfg
=
_get_rpn_head_cfg
(
'kitti/hv_PartA2_secfpn_4x8_c
osine
_80e_kitti-3d-3class.py'
)
'kitti/hv_PartA2_secfpn_4x8_c
yclic
_80e_kitti-3d-3class.py'
)
from
mmdet3d.models.builder
import
build_head
self
=
build_head
(
rpn_head_cfg
)
...
...
tests/test_semantic_heads.py
View file @
54595292
...
...
@@ -62,9 +62,7 @@ def test_PointwiseSemanticHead():
[
voxel_features
.
shape
[
0
],
3
])
# test loss
loss_dict
=
self
.
loss
(
feats_dict
[
'seg_preds'
],
feats_dict
[
'part_preds'
],
target_dict
[
'seg_targets'
],
target_dict
[
'part_targets'
])
loss_dict
=
self
.
loss
(
feats_dict
,
target_dict
)
assert
loss_dict
[
'loss_seg'
]
>
0
assert
loss_dict
[
'loss_part'
]
==
0
# no points in gt_boxes
total_loss
=
loss_dict
[
'loss_seg'
]
+
loss_dict
[
'loss_part'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment