Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
mmdetection3d
Commits
885a225b
Commit
885a225b
authored
May 15, 2020
by
wuyuefeng
Committed by
zhangwenwei
May 15, 2020
Browse files
Feature parta2 roi
parent
535344de
Changes
20
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1599 additions
and
93 deletions
+1599
-93
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
.../kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py
+139
-37
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
...igs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
+278
-0
mmdet3d/core/bbox/__init__.py
mmdet3d/core/bbox/__init__.py
+2
-2
mmdet3d/core/bbox/box_np_ops.py
mmdet3d/core/bbox/box_np_ops.py
+66
-0
mmdet3d/core/bbox/box_torch_ops.py
mmdet3d/core/bbox/box_torch_ops.py
+67
-0
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
+5
-0
mmdet3d/core/bbox/transforms.py
mmdet3d/core/bbox/transforms.py
+22
-0
mmdet3d/core/evaluation/kitti_utils/eval.py
mmdet3d/core/evaluation/kitti_utils/eval.py
+0
-1
mmdet3d/models/anchor_heads/parta2_rpn_head.py
mmdet3d/models/anchor_heads/parta2_rpn_head.py
+8
-12
mmdet3d/models/anchor_heads/second_head.py
mmdet3d/models/anchor_heads/second_head.py
+3
-3
mmdet3d/models/detectors/parta2.py
mmdet3d/models/detectors/parta2.py
+38
-16
mmdet3d/models/roi_heads/__init__.py
mmdet3d/models/roi_heads/__init__.py
+8
-1
mmdet3d/models/roi_heads/base_3droi_head.py
mmdet3d/models/roi_heads/base_3droi_head.py
+80
-0
mmdet3d/models/roi_heads/bbox_heads/__init__.py
mmdet3d/models/roi_heads/bbox_heads/__init__.py
+2
-1
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
+593
-0
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
...3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
+8
-9
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
+227
-0
tests/test_config.py
tests/test_config.py
+45
-1
tests/test_heads.py
tests/test_heads.py
+7
-7
tests/test_semantic_heads.py
tests/test_semantic_heads.py
+1
-3
No files found.
configs/kitti/hv_PartA2_secfpn_4x8_c
osine
_80e_kitti-3d-3class.py
→
configs/kitti/hv_PartA2_secfpn_4x8_c
yclic
_80e_kitti-3d-3class.py
View file @
885a225b
...
@@ -10,13 +10,16 @@ model = dict(
...
@@ -10,13 +10,16 @@ model = dict(
voxel_size
=
voxel_size
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
type
=
'SparseUNet'
,
in_channels
=
4
,
in_channels
=
4
,
output_shape
=
[
41
,
1600
,
1408
],
output_shape
=
[
41
,
1600
,
1408
],
pre_act
=
False
,
pre_act
=
False
),
),
backbone
=
dict
(
backbone
=
dict
(
type
=
'SECOND'
,
type
=
'SECOND'
,
in_channels
=
256
,
in_channels
=
256
,
...
@@ -56,8 +59,65 @@ model = dict(
...
@@ -56,8 +59,65 @@ model = dict(
loss_weight
=
1.0
),
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
),
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
))
roi_head
=
dict
(
type
=
'PartAggregationROIHead'
,
num_classes
=
3
,
semantic_head
=
dict
(
type
=
'PointwiseSemanticHead'
,
in_channels
=
16
,
extra_width
=
0.2
,
seg_score_thr
=
0.3
,
num_classes
=
3
,
loss_seg
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_part
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
)),
seg_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'max'
)),
part_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'avg'
)),
bbox_head
=
dict
(
type
=
'PartA2BboxHead'
,
num_classes
=
3
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
],
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
))))
# model training and testing settings
# model training and testing settings
train_cfg
=
dict
(
train_cfg
=
dict
(
rpn
=
dict
(
rpn
=
dict
(
...
@@ -82,7 +142,7 @@ train_cfg = dict(
...
@@ -82,7 +142,7 @@ train_cfg = dict(
pos_iou_thr
=
0.6
,
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
)
,
ignore_iof_thr
=-
1
)
],
],
allowed_border
=
0
,
allowed_border
=
0
,
pos_weight
=-
1
,
pos_weight
=-
1
,
...
@@ -93,24 +153,61 @@ train_cfg = dict(
...
@@ -93,24 +153,61 @@ train_cfg = dict(
nms_thr
=
0.8
,
nms_thr
=
0.8
,
score_thr
=
0
,
score_thr
=
0
,
use_rotate_nms
=
False
),
use_rotate_nms
=
False
),
)
rcnn
=
dict
(
assigner
=
[
dict
(
# for Pedestrian
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Cyclist
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
)
],
sampler
=
dict
(
type
=
'IoUNegPiecewiseSampler'
,
num
=
128
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
True
),
cls_pos_thr
=
0.75
,
cls_neg_thr
=
0.25
))
test_cfg
=
dict
(
test_cfg
=
dict
(
rpn
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
nms_pre
=
1024
,
max_per_img
=
100
,
nms_post
=
100
,
use_rotate_nms
=
True
,
nms_across_levels
=
False
,
nms_thr
=
0.7
,
nms_thr
=
0.7
,
score_thr
=
0
))
score_thr
=
0
,
use_rotate_nms
=
True
),
rcnn
=
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
))
# dataset settings
# dataset settings
dataset_type
=
'KittiDataset'
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
data_root
=
'data/kitti/'
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
class_names
=
[
'Pedestrian'
,
'Cyclist'
,
'Car'
]
img_norm_cfg
=
dict
(
mean
=
[
123.675
,
116.28
,
103.53
],
std
=
[
58.395
,
57.12
,
57.375
],
to_rgb
=
True
)
input_modality
=
dict
(
input_modality
=
dict
(
use_lidar
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
True
)
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
db_sampler
=
dict
(
root_path
=
data_root
,
root_path
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
...
@@ -119,28 +216,34 @@ db_sampler = dict(
...
@@ -119,28 +216,34 @@ db_sampler = dict(
object_rot_range
=
[
0.0
,
0.0
],
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
,
Pedestrian
=
10
,
Cyclist
=
10
)),
filter_by_min_points
=
dict
(
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
),
Car
=
5
,
)
Pedestrian
=
10
,
Cyclist
=
10
,
)),
sample_groups
=
dict
(
Car
=
12
,
Pedestrian
=
6
,
Cyclist
=
6
,
))
train_pipeline
=
[
train_pipeline
=
[
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
dict
(
type
=
'ObjectNoise'
,
type
=
'ObjectNoise'
,
num_try
=
100
,
num_try
=
100
,
loc_noise_std
=
[
0
,
0
,
0
],
loc_noise_std
=
[
1.
0
,
1.
0
,
0
.5
],
global_rot_range
=
[
0.0
,
0.0
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.
39269908
,
0.39269908
]),
rot_uniform_noise
=
[
-
0.
78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
dict
(
type
=
'GlobalRotScale'
,
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
trans_normal_noise
=
[
0.2
,
0.2
,
0.2
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
]
test_pipeline
=
[
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
...
@@ -148,7 +251,7 @@ test_pipeline = [
...
@@ -148,7 +251,7 @@ test_pipeline = [
type
=
'DefaultFormatBundle3D'
,
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
class_names
=
class_names
,
with_label
=
False
),
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
,
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
]
]
data
=
dict
(
data
=
dict
(
...
@@ -183,21 +286,19 @@ data = dict(
...
@@ -183,21 +286,19 @@ data = dict(
class_names
=
class_names
,
class_names
=
class_names
,
with_label
=
True
))
with_label
=
True
))
# optimizer
# optimizer
lr
=
0.003
# max learning rate
lr
=
0.001
# max learning rate
optimizer
=
dict
(
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
# the momentum is change during training
weight_decay
=
0.001
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
lr_config
=
dict
(
policy
=
'cosine'
,
policy
=
'cyclic'
,
warmup
=
'linear'
,
target_ratio
=
(
10
,
1e-4
),
warmup_iters
=
1000
,
cyclic_times
=
1
,
warmup_ratio
=
1.0
/
10
,
step_ratio_up
=
0.4
)
target_lr
=
1e-5
,
momentum_config
=
dict
(
as_ratio
=
True
)
policy
=
'cyclic'
,
momentum_config
=
None
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
checkpoint_config
=
dict
(
interval
=
1
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
# yapf:disable
log_config
=
dict
(
log_config
=
dict
(
...
@@ -209,8 +310,9 @@ log_config = dict(
...
@@ -209,8 +310,9 @@ log_config = dict(
# yapf:enable
# yapf:enable
# runtime settings
# runtime settings
total_epochs
=
80
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
,
port
=
29502
)
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/parta2_secfpn_80e'
work_dir
=
'./work_dirs/parta2_secfpn_80e'
load_from
=
None
load_from
=
None
resume_from
=
None
resume_from
=
None
...
...
configs/kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-car.py
0 → 100644
View file @
885a225b
# model settings
voxel_size
=
[
0.05
,
0.05
,
0.1
]
point_cloud_range
=
[
0
,
-
40
,
-
3
,
70.4
,
40
,
1
]
# velodyne coordinates, x, y, z
model
=
dict
(
type
=
'PartA2'
,
voxel_layer
=
dict
(
max_num_points
=
5
,
# max_points_per_voxel
point_cloud_range
=
point_cloud_range
,
voxel_size
=
voxel_size
,
max_voxels
=
(
16000
,
40000
)
# (training, testing) max_coxels
),
voxel_encoder
=
dict
(
type
=
'VoxelFeatureExtractorV3'
,
num_input_features
=
4
,
num_filters
=
[
4
],
with_distance
=
False
),
middle_encoder
=
dict
(
type
=
'SparseUNet'
,
in_channels
=
4
,
output_shape
=
[
41
,
1600
,
1408
],
pre_act
=
False
),
backbone
=
dict
(
type
=
'SECOND'
,
in_channels
=
256
,
layer_nums
=
[
5
,
5
],
layer_strides
=
[
1
,
2
],
out_channels
=
[
128
,
256
]),
neck
=
dict
(
type
=
'SECONDFPN'
,
in_channels
=
[
128
,
256
],
upsample_strides
=
[
1
,
2
],
out_channels
=
[
256
,
256
]),
rpn_head
=
dict
(
type
=
'PartA2RPNHead'
,
class_name
=
[
'Car'
],
in_channels
=
512
,
feat_channels
=
512
,
use_direction_classifier
=
True
,
encode_bg_as_zeros
=
True
,
anchor_generator
=
dict
(
type
=
'Anchor3DRangeGenerator'
,
ranges
=
[[
0
,
-
40.0
,
-
1.78
,
70.4
,
40.0
,
-
1.78
]],
strides
=
[
2
],
sizes
=
[[
1.6
,
3.9
,
1.56
]],
rotations
=
[
0
,
1.57
],
reshape_out
=
False
),
diff_rad_by_sin
=
True
,
assigner_per_size
=
True
,
assign_per_class
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
loss_cls
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_dir
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
False
,
loss_weight
=
0.2
)),
roi_head
=
dict
(
type
=
'PartAggregationROIHead'
,
num_classes
=
1
,
semantic_head
=
dict
(
type
=
'PointwiseSemanticHead'
,
in_channels
=
16
,
extra_width
=
0.2
,
seg_score_thr
=
0.3
,
num_classes
=
1
,
loss_seg
=
dict
(
type
=
'FocalLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
gamma
=
2.0
,
alpha
=
0.25
,
loss_weight
=
1.0
),
loss_part
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
loss_weight
=
1.0
)),
seg_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'max'
)),
part_roi_extractor
=
dict
(
type
=
'Single3DRoIAwareExtractor'
,
roi_layer
=
dict
(
type
=
'RoIAwarePool3d'
,
out_size
=
14
,
max_pts_per_voxel
=
128
,
mode
=
'avg'
)),
bbox_head
=
dict
(
type
=
'PartA2BboxHead'
,
num_classes
=
1
,
seg_in_channels
=
16
,
part_in_channels
=
4
,
seg_conv_channels
=
[
64
,
64
],
part_conv_channels
=
[
64
,
64
],
merge_conv_channels
=
[
128
,
128
],
down_conv_channels
=
[
128
,
256
],
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
shared_fc_channels
=
[
256
,
512
,
512
,
512
],
cls_channels
=
[
256
,
256
],
reg_channels
=
[
256
,
256
],
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
reduction
=
'sum'
,
loss_weight
=
1.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'sum'
,
loss_weight
=
1.0
))))
# model training and testing settings
train_cfg
=
dict
(
rpn
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlapsNearest3D'
),
pos_iou_thr
=
0.6
,
neg_iou_thr
=
0.45
,
min_pos_iou
=
0.45
,
ignore_iof_thr
=-
1
),
allowed_border
=
0
,
pos_weight
=-
1
,
debug
=
False
),
rpn_proposal
=
dict
(
nms_pre
=
9000
,
nms_post
=
512
,
nms_thr
=
0.8
,
score_thr
=
0
,
use_rotate_nms
=
False
),
rcnn
=
dict
(
assigner
=
dict
(
# for Car
type
=
'MaxIoUAssigner'
,
iou_calculator
=
dict
(
type
=
'BboxOverlaps3D'
,
coordinate
=
'lidar'
),
pos_iou_thr
=
0.55
,
neg_iou_thr
=
0.55
,
min_pos_iou
=
0.55
,
ignore_iof_thr
=-
1
),
sampler
=
dict
(
type
=
'IoUNegPiecewiseSampler'
,
num
=
128
,
pos_fraction
=
0.55
,
neg_piece_fractions
=
[
0.8
,
0.2
],
neg_iou_piece_thrs
=
[
0.55
,
0.1
],
neg_pos_ub
=-
1
,
add_gt_as_proposals
=
False
,
return_iou
=
True
),
cls_pos_thr
=
0.75
,
cls_neg_thr
=
0.25
))
test_cfg
=
dict
(
rpn
=
dict
(
nms_pre
=
1024
,
nms_post
=
100
,
nms_thr
=
0.7
,
score_thr
=
0
,
use_rotate_nms
=
True
),
rcnn
=
dict
(
use_rotate_nms
=
True
,
use_raw_score
=
True
,
nms_thr
=
0.01
,
score_thr
=
0.1
))
# dataset settings
dataset_type
=
'KittiDataset'
data_root
=
'data/kitti/'
class_names
=
[
'Car'
]
input_modality
=
dict
(
use_lidar
=
False
,
use_lidar_reduced
=
True
,
use_depth
=
False
,
use_lidar_intensity
=
True
,
use_camera
=
False
)
db_sampler
=
dict
(
root_path
=
data_root
,
info_path
=
data_root
+
'kitti_dbinfos_train.pkl'
,
rate
=
1.0
,
use_road_plane
=
False
,
object_rot_range
=
[
0.0
,
0.0
],
prepare
=
dict
(
filter_by_difficulty
=
[
-
1
],
filter_by_min_points
=
dict
(
Car
=
5
),
),
sample_groups
=
dict
(
Car
=
15
))
train_pipeline
=
[
dict
(
type
=
'ObjectSample'
,
db_sampler
=
db_sampler
),
dict
(
type
=
'ObjectNoise'
,
num_try
=
100
,
loc_noise_std
=
[
1.0
,
1.0
,
0.5
],
global_rot_range
=
[
0.0
,
0.0
],
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
]),
dict
(
type
=
'RandomFlip3D'
,
flip_ratio
=
0.5
),
dict
(
type
=
'GlobalRotScale'
,
rot_uniform_noise
=
[
-
0.78539816
,
0.78539816
],
scaling_uniform_noise
=
[
0.95
,
1.05
]),
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'ObjectRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'PointShuffle'
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
,
'gt_labels_3d'
])
]
test_pipeline
=
[
dict
(
type
=
'PointsRangeFilter'
,
point_cloud_range
=
point_cloud_range
),
dict
(
type
=
'DefaultFormatBundle3D'
,
class_names
=
class_names
,
with_label
=
False
),
dict
(
type
=
'Collect3D'
,
keys
=
[
'points'
,
'gt_bboxes_3d'
])
]
data
=
dict
(
samples_per_gpu
=
2
,
workers_per_gpu
=
2
,
train
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_train.pkl'
,
split
=
'training'
,
training
=
True
,
pipeline
=
train_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
),
val
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'training'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
),
test
=
dict
(
type
=
dataset_type
,
root_path
=
data_root
,
ann_file
=
data_root
+
'kitti_infos_val.pkl'
,
split
=
'testing'
,
pipeline
=
test_pipeline
,
modality
=
input_modality
,
class_names
=
class_names
,
with_label
=
True
))
# optimizer
lr
=
0.001
# max learning rate
optimizer
=
dict
(
type
=
'AdamW'
,
lr
=
lr
,
betas
=
(
0.95
,
0.99
),
weight_decay
=
0.01
)
optimizer_config
=
dict
(
grad_clip
=
dict
(
max_norm
=
10
,
norm_type
=
2
))
lr_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
10
,
1e-4
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
momentum_config
=
dict
(
policy
=
'cyclic'
,
target_ratio
=
(
0.85
/
0.95
,
1
),
cyclic_times
=
1
,
step_ratio_up
=
0.4
)
checkpoint_config
=
dict
(
interval
=
1
)
# yapf:disable
log_config
=
dict
(
interval
=
50
,
hooks
=
[
dict
(
type
=
'TextLoggerHook'
),
dict
(
type
=
'TensorboardLoggerHook'
)
])
# yapf:enable
# runtime settings
total_epochs
=
80
dist_params
=
dict
(
backend
=
'nccl'
)
log_level
=
'INFO'
find_unused_parameters
=
True
work_dir
=
'./work_dirs/parta2_secfpn_80e'
load_from
=
None
resume_from
=
None
workflow
=
[(
'train'
,
1
)]
mmdet3d/core/bbox/__init__.py
View file @
885a225b
...
@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler,
...
@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
InstanceBalancedPosSampler
,
IoUBalancedNegSampler
,
PseudoSampler
,
RandomSampler
,
SamplingResult
)
PseudoSampler
,
RandomSampler
,
SamplingResult
)
from
.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
.structures
import
Box3DMode
,
CameraInstance3DBoxes
,
LiDARInstance3DBoxes
from
.transforms
import
boxes3d_to_bev_torch_lidar
from
.transforms
import
bbox3d2roi
,
boxes3d_to_bev_torch_lidar
from
.assign_sampling
import
(
# isort:skip, avoid recursive imports
from
.assign_sampling
import
(
# isort:skip, avoid recursive imports
build_bbox_coder
,
# temporally settings
build_bbox_coder
,
# temporally settings
...
@@ -22,5 +22,5 @@ __all__ = [
...
@@ -22,5 +22,5 @@ __all__ = [
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'build_bbox_coder'
,
'DeltaXYZWLHRBBoxCoder'
,
'boxes3d_to_bev_torch_lidar'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'BboxOverlapsNearest3D'
,
'BboxOverlaps3D'
,
'bbox_overlaps_nearest_3d'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'bbox_overlaps_3d'
,
'Box3DMode'
,
'LiDARInstance3DBoxes'
,
'CameraInstance3DBoxes'
'CameraInstance3DBoxes'
,
'bbox3d2roi'
]
]
mmdet3d/core/bbox/box_np_ops.py
View file @
885a225b
...
@@ -566,3 +566,69 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True):
...
@@ -566,3 +566,69 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True):
break
break
ret
[
i
,
j
]
=
success
ret
[
i
,
j
]
=
success
return
ret
return
ret
def
boxes3d_to_corners3d_lidar
(
boxes3d
,
bottom_center
=
True
):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (numpy.array): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
numpy.array: box corners with shape (N, 8, 3)
"""
boxes_num
=
boxes3d
.
shape
[
0
]
w
,
l
,
h
=
boxes3d
[:,
3
],
boxes3d
[:,
4
],
boxes3d
[:,
5
]
x_corners
=
np
.
array
(
[
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
],
dtype
=
np
.
float32
).
T
y_corners
=
np
.
array
(
[
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
],
dtype
=
np
.
float32
).
T
if
bottom_center
:
z_corners
=
np
.
zeros
((
boxes_num
,
8
),
dtype
=
np
.
float32
)
z_corners
[:,
4
:
8
]
=
h
.
reshape
(
boxes_num
,
1
).
repeat
(
4
,
axis
=
1
)
# (N, 8)
else
:
z_corners
=
np
.
array
([
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
],
dtype
=
np
.
float32
).
T
ry
=
boxes3d
[:,
6
]
zeros
,
ones
=
np
.
zeros
(
ry
.
size
,
dtype
=
np
.
float32
),
np
.
ones
(
ry
.
size
,
dtype
=
np
.
float32
)
rot_list
=
np
.
array
([[
np
.
cos
(
ry
),
-
np
.
sin
(
ry
),
zeros
],
[
np
.
sin
(
ry
),
np
.
cos
(
ry
),
zeros
],
[
zeros
,
zeros
,
ones
]])
# (3, 3, N)
R_list
=
np
.
transpose
(
rot_list
,
(
2
,
0
,
1
))
# (N, 3, 3)
temp_corners
=
np
.
concatenate
((
x_corners
.
reshape
(
-
1
,
8
,
1
),
y_corners
.
reshape
(
-
1
,
8
,
1
),
z_corners
.
reshape
(
-
1
,
8
,
1
)),
axis
=
2
)
# (N, 8, 3)
rotated_corners
=
np
.
matmul
(
temp_corners
,
R_list
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
reshape
(
-
1
,
1
)
+
x_corners
.
reshape
(
-
1
,
8
)
y
=
y_loc
.
reshape
(
-
1
,
1
)
+
y_corners
.
reshape
(
-
1
,
8
)
z
=
z_loc
.
reshape
(
-
1
,
1
)
+
z_corners
.
reshape
(
-
1
,
8
)
corners
=
np
.
concatenate
(
(
x
.
reshape
(
-
1
,
8
,
1
),
y
.
reshape
(
-
1
,
8
,
1
),
z
.
reshape
(
-
1
,
8
,
1
)),
axis
=
2
)
return
corners
.
astype
(
np
.
float32
)
mmdet3d/core/bbox/box_torch_ops.py
View file @
885a225b
...
@@ -210,3 +210,70 @@ def enlarge_box3d_lidar(boxes3d, extra_width):
...
@@ -210,3 +210,70 @@ def enlarge_box3d_lidar(boxes3d, extra_width):
large_boxes3d
[:,
3
:
6
]
+=
extra_width
*
2
large_boxes3d
[:,
3
:
6
]
+=
extra_width
*
2
large_boxes3d
[:,
2
]
-=
extra_width
# bottom center z minus extra_width
large_boxes3d
[:,
2
]
-=
extra_width
# bottom center z minus extra_width
return
large_boxes3d
return
large_boxes3d
def
boxes3d_to_corners3d_lidar_torch
(
boxes3d
,
bottom_center
=
True
):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (FloatTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
FloatTensor: box corners with shape (N, 8, 3)
"""
boxes_num
=
boxes3d
.
shape
[
0
]
w
,
l
,
h
=
boxes3d
[:,
3
:
4
],
boxes3d
[:,
4
:
5
],
boxes3d
[:,
5
:
6
]
ry
=
boxes3d
[:,
6
:
7
]
zeros
=
boxes3d
.
new_zeros
(
boxes_num
,
1
)
ones
=
boxes3d
.
new_ones
(
boxes_num
,
1
)
x_corners
=
torch
.
cat
(
[
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
,
w
/
2.
,
-
w
/
2.
,
-
w
/
2.
,
w
/
2.
],
dim
=
1
)
# (N, 8)
y_corners
=
torch
.
cat
(
[
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
,
-
l
/
2.
,
-
l
/
2.
,
l
/
2.
,
l
/
2.
],
dim
=
1
)
# (N, 8)
if
bottom_center
:
z_corners
=
torch
.
cat
([
zeros
,
zeros
,
zeros
,
zeros
,
h
,
h
,
h
,
h
],
dim
=
1
)
# (N, 8)
else
:
z_corners
=
torch
.
cat
([
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
-
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
,
h
/
2.
],
dim
=
1
)
# (N, 8)
temp_corners
=
torch
.
cat
(
(
x_corners
.
unsqueeze
(
dim
=
2
),
y_corners
.
unsqueeze
(
dim
=
2
),
z_corners
.
unsqueeze
(
dim
=
2
)),
dim
=
2
)
# (N, 8, 3)
cosa
,
sina
=
torch
.
cos
(
ry
),
torch
.
sin
(
ry
)
raw_1
=
torch
.
cat
([
cosa
,
-
sina
,
zeros
],
dim
=
1
)
# (N, 3)
raw_2
=
torch
.
cat
([
sina
,
cosa
,
zeros
],
dim
=
1
)
# (N, 3)
raw_3
=
torch
.
cat
([
zeros
,
zeros
,
ones
],
dim
=
1
)
# (N, 3)
R
=
torch
.
cat
((
raw_1
.
unsqueeze
(
dim
=
1
),
raw_2
.
unsqueeze
(
dim
=
1
),
raw_3
.
unsqueeze
(
dim
=
1
)),
dim
=
1
)
# (N, 3, 3)
rotated_corners
=
torch
.
matmul
(
temp_corners
,
R
)
# (N, 8, 3)
x_corners
=
rotated_corners
[:,
:,
0
]
y_corners
=
rotated_corners
[:,
:,
1
]
z_corners
=
rotated_corners
[:,
:,
2
]
x_loc
,
y_loc
,
z_loc
=
boxes3d
[:,
0
],
boxes3d
[:,
1
],
boxes3d
[:,
2
]
x
=
x_loc
.
view
(
-
1
,
1
)
+
x_corners
.
view
(
-
1
,
8
)
y
=
y_loc
.
view
(
-
1
,
1
)
+
y_corners
.
view
(
-
1
,
8
)
z
=
z_loc
.
view
(
-
1
,
1
)
+
z_corners
.
view
(
-
1
,
8
)
corners
=
torch
.
cat
((
x
.
view
(
-
1
,
8
,
1
),
y
.
view
(
-
1
,
8
,
1
),
z
.
view
(
-
1
,
8
,
1
)),
dim
=
2
)
return
corners
mmdet3d/core/bbox/iou_calculators/iou3d_calculator.py
View file @
885a225b
...
@@ -88,6 +88,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
...
@@ -88,6 +88,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
assert
bboxes1
.
size
(
-
1
)
==
bboxes2
.
size
(
-
1
)
==
7
assert
bboxes1
.
size
(
-
1
)
==
bboxes2
.
size
(
-
1
)
==
7
assert
coordinate
in
[
'camera'
,
'lidar'
]
assert
coordinate
in
[
'camera'
,
'lidar'
]
rows
=
bboxes1
.
size
(
0
)
cols
=
bboxes2
.
size
(
0
)
if
rows
*
cols
==
0
:
return
bboxes1
.
new
(
rows
,
cols
)
if
coordinate
==
'camera'
:
if
coordinate
==
'camera'
:
return
boxes_iou3d_gpu_camera
(
bboxes1
,
bboxes2
,
mode
)
return
boxes_iou3d_gpu_camera
(
bboxes1
,
bboxes2
,
mode
)
elif
coordinate
==
'lidar'
:
elif
coordinate
==
'lidar'
:
...
...
mmdet3d/core/bbox/transforms.py
View file @
885a225b
...
@@ -47,3 +47,25 @@ def boxes3d_to_bev_torch_lidar(boxes3d):
...
@@ -47,3 +47,25 @@ def boxes3d_to_bev_torch_lidar(boxes3d):
boxes_bev
[:,
2
],
boxes_bev
[:,
3
]
=
cu
+
half_w
,
cv
+
half_l
boxes_bev
[:,
2
],
boxes_bev
[:,
3
]
=
cu
+
half_w
,
cv
+
half_l
boxes_bev
[:,
4
]
=
boxes3d
[:,
6
]
boxes_bev
[:,
4
]
=
boxes3d
[:,
6
]
return
boxes_bev
return
boxes_bev
def
bbox3d2roi
(
bbox_list
):
"""Convert a list of bboxes to roi format.
Args:
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns:
Tensor: shape (n, c), [batch_ind, x, y ...]
"""
rois_list
=
[]
for
img_id
,
bboxes
in
enumerate
(
bbox_list
):
if
bboxes
.
size
(
0
)
>
0
:
img_inds
=
bboxes
.
new_full
((
bboxes
.
size
(
0
),
1
),
img_id
)
rois
=
torch
.
cat
([
img_inds
,
bboxes
],
dim
=-
1
)
else
:
rois
=
torch
.
zeros_like
(
bboxes
)
rois_list
.
append
(
rois
)
rois
=
torch
.
cat
(
rois_list
,
0
)
return
rois
mmdet3d/core/evaluation/kitti_utils/eval.py
View file @
885a225b
...
@@ -731,7 +731,6 @@ def kitti_eval(gt_annos,
...
@@ -731,7 +731,6 @@ def kitti_eval(gt_annos,
result
+=
'aos AP:{:.2f}, {:.2f}, {:.2f}
\n
'
.
format
(
*
mAPaos
[:,
0
])
result
+=
'aos AP:{:.2f}, {:.2f}, {:.2f}
\n
'
.
format
(
*
mAPaos
[:,
0
])
# prepare results for logger
# prepare results for logger
ret_dict
[
'Overall'
]
=
dict
()
for
idx
in
range
(
3
):
for
idx
in
range
(
3
):
postfix
=
f
'
{
difficulty
[
idx
]
}
'
postfix
=
f
'
{
difficulty
[
idx
]
}
'
if
mAP3d
is
not
None
:
if
mAP3d
is
not
None
:
...
...
mmdet3d/models/anchor_heads/parta2_rpn_head.py
View file @
885a225b
...
@@ -231,19 +231,15 @@ class PartA2RPNHead(SECONDHead):
...
@@ -231,19 +231,15 @@ class PartA2RPNHead(SECONDHead):
labels
=
labels
[
inds
]
labels
=
labels
[
inds
]
scores
=
scores
[
inds
]
scores
=
scores
[
inds
]
cls_scores
=
cls_scores
[
inds
]
cls_scores
=
cls_scores
[
inds
]
dir_scores
=
dir_scores
[
inds
]
return
dict
(
return
dict
(
box3d_lidar
=
bboxes
.
cpu
(),
box3d_lidar
=
bboxes
,
scores
=
scores
.
cpu
(),
scores
=
scores
,
label_preds
=
labels
.
cpu
(),
label_preds
=
labels
,
cls_preds
=
cls_scores
.
cpu
(
cls_preds
=
cls_scores
# raw scores [max_num, cls_num]
)
# raw scores with shape [max_num, cls_num]
)
)
else
:
else
:
return
dict
(
return
dict
(
box3d_lidar
=
mlvl_bboxes
.
new_zeros
([
0
,
box3d_lidar
=
mlvl_bboxes
.
new_zeros
([
0
,
self
.
box_code_size
]),
self
.
box_code_size
]).
cpu
(),
scores
=
mlvl_bboxes
.
new_zeros
([
0
]),
scores
=
mlvl_bboxes
.
new_zeros
([
0
]).
cpu
(),
label_preds
=
mlvl_bboxes
.
new_zeros
([
0
]),
label_preds
=
mlvl_bboxes
.
new_zeros
([
0
]).
cpu
(),
cls_preds
=
mlvl_bboxes
.
new_zeros
([
0
,
mlvl_cls_score
.
shape
[
-
1
]]))
cls_preds
=
mlvl_bboxes
.
new_zeros
([
0
,
mlvl_cls_score
.
shape
[
-
1
]
]).
cpu
())
mmdet3d/models/anchor_heads/second_head.py
View file @
885a225b
...
@@ -258,9 +258,9 @@ class SECONDHead(nn.Module, AnchorTrainMixin):
...
@@ -258,9 +258,9 @@ class SECONDHead(nn.Module, AnchorTrainMixin):
dir_weights_list
,
dir_weights_list
,
num_total_samples
=
num_total_samples
)
num_total_samples
=
num_total_samples
)
return
dict
(
return
dict
(
loss_cls
_3d
=
losses_cls
,
loss_
rpn_
cls
=
losses_cls
,
loss_bbox
_3d
=
losses_bbox
,
loss_
rpn_
bbox
=
losses_bbox
,
loss_dir
_3d
=
losses_dir
)
loss_
rpn_
dir
=
losses_dir
)
def
get_bboxes
(
self
,
def
get_bboxes
(
self
,
cls_scores
,
cls_scores
,
...
...
mmdet3d/models/detectors/parta2.py
View file @
885a225b
...
@@ -34,11 +34,13 @@ class PartA2(TwoStageDetector):
...
@@ -34,11 +34,13 @@ class PartA2(TwoStageDetector):
self
.
middle_encoder
=
builder
.
build_middle_encoder
(
middle_encoder
)
self
.
middle_encoder
=
builder
.
build_middle_encoder
(
middle_encoder
)
def
extract_feat
(
self
,
points
,
img_meta
):
def
extract_feat
(
self
,
points
,
img_meta
):
voxels
,
num_points
,
coors
=
self
.
voxelize
(
points
)
voxel_dict
=
self
.
voxelize
(
points
)
voxel_dict
=
dict
(
voxels
=
voxels
,
num_points
=
num_points
,
coors
=
coors
)
voxel_features
=
self
.
voxel_encoder
(
voxel_dict
[
'voxels'
],
voxel_features
=
self
.
voxel_encoder
(
voxels
,
num_points
,
coors
)
voxel_dict
[
'num_points'
],
batch_size
=
coors
[
-
1
,
0
].
item
()
+
1
voxel_dict
[
'coors'
])
feats_dict
=
self
.
middle_encoder
(
voxel_features
,
coors
,
batch_size
)
batch_size
=
voxel_dict
[
'coors'
][
-
1
,
0
].
item
()
+
1
feats_dict
=
self
.
middle_encoder
(
voxel_features
,
voxel_dict
[
'coors'
],
batch_size
)
x
=
self
.
backbone
(
feats_dict
[
'spatial_features'
])
x
=
self
.
backbone
(
feats_dict
[
'spatial_features'
])
if
self
.
with_neck
:
if
self
.
with_neck
:
neck_feats
=
self
.
neck
(
x
)
neck_feats
=
self
.
neck
(
x
)
...
@@ -47,20 +49,33 @@ class PartA2(TwoStageDetector):
...
@@ -47,20 +49,33 @@ class PartA2(TwoStageDetector):
@
torch
.
no_grad
()
@
torch
.
no_grad
()
def
voxelize
(
self
,
points
):
def
voxelize
(
self
,
points
):
voxels
,
coors
,
num_points
=
[],
[],
[]
voxels
,
coors
,
num_points
,
voxel_centers
=
[],
[],
[],
[]
for
res
in
points
:
for
res
in
points
:
res_voxels
,
res_coors
,
res_num_points
=
self
.
voxel_layer
(
res
)
res_voxels
,
res_coors
,
res_num_points
=
self
.
voxel_layer
(
res
)
res_voxel_centers
=
(
res_coors
[:,
[
2
,
1
,
0
]]
+
0.5
)
*
res_voxels
.
new_tensor
(
self
.
voxel_layer
.
voxel_size
)
+
res_voxels
.
new_tensor
(
self
.
voxel_layer
.
point_cloud_range
[
0
:
3
])
voxels
.
append
(
res_voxels
)
voxels
.
append
(
res_voxels
)
coors
.
append
(
res_coors
)
coors
.
append
(
res_coors
)
num_points
.
append
(
res_num_points
)
num_points
.
append
(
res_num_points
)
voxel_centers
.
append
(
res_voxel_centers
)
voxels
=
torch
.
cat
(
voxels
,
dim
=
0
)
voxels
=
torch
.
cat
(
voxels
,
dim
=
0
)
num_points
=
torch
.
cat
(
num_points
,
dim
=
0
)
num_points
=
torch
.
cat
(
num_points
,
dim
=
0
)
voxel_centers
=
torch
.
cat
(
voxel_centers
,
dim
=
0
)
coors_batch
=
[]
coors_batch
=
[]
for
i
,
coor
in
enumerate
(
coors
):
for
i
,
coor
in
enumerate
(
coors
):
coor_pad
=
F
.
pad
(
coor
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
coor_pad
=
F
.
pad
(
coor
,
(
1
,
0
),
mode
=
'constant'
,
value
=
i
)
coors_batch
.
append
(
coor_pad
)
coors_batch
.
append
(
coor_pad
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
coors_batch
=
torch
.
cat
(
coors_batch
,
dim
=
0
)
return
voxels
,
num_points
,
coors_batch
voxel_dict
=
dict
(
voxels
=
voxels
,
num_points
=
num_points
,
coors
=
coors_batch
,
voxel_centers
=
voxel_centers
)
return
voxel_dict
def
forward_train
(
self
,
def
forward_train
(
self
,
points
,
points
,
...
@@ -69,7 +84,6 @@ class PartA2(TwoStageDetector):
...
@@ -69,7 +84,6 @@ class PartA2(TwoStageDetector):
gt_labels_3d
,
gt_labels_3d
,
gt_bboxes_ignore
=
None
,
gt_bboxes_ignore
=
None
,
proposals
=
None
):
proposals
=
None
):
# TODO: complete it
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_meta
)
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_meta
)
losses
=
dict
()
losses
=
dict
()
...
@@ -86,7 +100,13 @@ class PartA2(TwoStageDetector):
...
@@ -86,7 +100,13 @@ class PartA2(TwoStageDetector):
proposal_inputs
=
rpn_outs
+
(
img_meta
,
proposal_cfg
)
proposal_inputs
=
rpn_outs
+
(
img_meta
,
proposal_cfg
)
proposal_list
=
self
.
rpn_head
.
get_bboxes
(
*
proposal_inputs
)
proposal_list
=
self
.
rpn_head
.
get_bboxes
(
*
proposal_inputs
)
else
:
else
:
proposal_list
=
proposals
# noqa: F841
proposal_list
=
proposals
roi_losses
=
self
.
roi_head
.
forward_train
(
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
)
losses
.
update
(
roi_losses
)
return
losses
return
losses
...
@@ -102,16 +122,18 @@ class PartA2(TwoStageDetector):
...
@@ -102,16 +122,18 @@ class PartA2(TwoStageDetector):
def
simple_test
(
self
,
def
simple_test
(
self
,
points
,
points
,
img_meta
,
img_meta
,
gt_bboxes_3d
=
None
,
gt_bboxes_3d
,
proposals
=
None
,
proposals
=
None
,
rescale
=
False
):
rescale
=
False
):
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_meta
)
feats_dict
,
voxels_dict
=
self
.
extract_feat
(
points
,
img_meta
)
# TODO: complete it
if
proposals
is
None
:
if
self
.
with_rpn
:
proposal_list
=
self
.
simple_test_rpn
(
feats_dict
[
'neck_feats'
],
rpn_outs
=
self
.
rpn_head
(
feats_dict
[
'neck_feats'
])
img_meta
,
self
.
test_cfg
.
rpn
)
proposal_cfg
=
self
.
test_cfg
.
rpn
bbox_inputs
=
rpn_outs
+
(
img_meta
,
proposal_cfg
)
proposal_list
=
self
.
rpn_head
.
get_bboxes
(
*
bbox_inputs
)
else
:
else
:
proposal_list
=
proposals
proposal_list
=
proposals
return
self
.
roi_head
.
simple_test
(
return
self
.
roi_head
.
simple_test
(
feats_dict
,
voxels_dict
,
img_meta
,
feats_dict
,
proposal_list
,
img_meta
,
rescale
=
rescale
)
proposal_list
)
mmdet3d/models/roi_heads/__init__.py
View file @
885a225b
from
.base_3droi_head
import
Base3DRoIHead
from
.bbox_heads
import
PartA2BboxHead
from
.mask_heads
import
PointwiseSemanticHead
from
.mask_heads
import
PointwiseSemanticHead
from
.part_aggregation_roi_head
import
PartAggregationROIHead
from
.roi_extractors
import
Single3DRoIAwareExtractor
__all__
=
[
'PointwiseSemanticHead'
]
__all__
=
[
'Base3DRoIHead'
,
'PartAggregationROIHead'
,
'PointwiseSemanticHead'
,
'Single3DRoIAwareExtractor'
,
'PartA2BboxHead'
]
mmdet3d/models/roi_heads/base_3droi_head.py
0 → 100644
View file @
885a225b
from
abc
import
ABCMeta
,
abstractmethod
import
torch.nn
as
nn
class
Base3DRoIHead
(
nn
.
Module
,
metaclass
=
ABCMeta
):
"""Base class for 3d RoIHeads"""
def
__init__
(
self
,
bbox_head
=
None
,
mask_roi_extractor
=
None
,
mask_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
):
super
(
Base3DRoIHead
,
self
).
__init__
()
self
.
train_cfg
=
train_cfg
self
.
test_cfg
=
test_cfg
if
bbox_head
is
not
None
:
self
.
init_bbox_head
(
bbox_head
)
if
mask_head
is
not
None
:
self
.
init_mask_head
(
mask_roi_extractor
,
mask_head
)
self
.
init_assigner_sampler
()
@
property
def
with_bbox
(
self
):
return
hasattr
(
self
,
'bbox_head'
)
and
self
.
bbox_head
is
not
None
@
property
def
with_mask
(
self
):
return
hasattr
(
self
,
'mask_head'
)
and
self
.
mask_head
is
not
None
@
abstractmethod
def
init_weights
(
self
,
pretrained
):
pass
@
abstractmethod
def
init_bbox_head
(
self
):
pass
@
abstractmethod
def
init_mask_head
(
self
):
pass
@
abstractmethod
def
init_assigner_sampler
(
self
):
pass
@
abstractmethod
def
forward_train
(
self
,
x
,
img_meta
,
proposal_list
,
gt_bboxes
,
gt_labels
,
gt_bboxes_ignore
=
None
,
gt_masks
=
None
,
**
kwargs
):
"""Forward function during training"""
pass
def
simple_test
(
self
,
x
,
proposal_list
,
img_meta
,
proposals
=
None
,
rescale
=
False
,
**
kwargs
):
"""Test without augmentation."""
pass
def
aug_test
(
self
,
x
,
proposal_list
,
img_metas
,
rescale
=
False
,
**
kwargs
):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
pass
mmdet3d/models/roi_heads/bbox_heads/__init__.py
View file @
885a225b
...
@@ -2,8 +2,9 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
...
@@ -2,8 +2,9 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
DoubleConvFCBBoxHead
,
DoubleConvFCBBoxHead
,
Shared2FCBBoxHead
,
Shared2FCBBoxHead
,
Shared4Conv1FCBBoxHead
)
Shared4Conv1FCBBoxHead
)
from
.parta2_bbox_head
import
PartA2BboxHead
__all__
=
[
__all__
=
[
'BBoxHead'
,
'ConvFCBBoxHead'
,
'Shared2FCBBoxHead'
,
'BBoxHead'
,
'ConvFCBBoxHead'
,
'Shared2FCBBoxHead'
,
'Shared4Conv1FCBBoxHead'
,
'DoubleConvFCBBoxHead'
'Shared4Conv1FCBBoxHead'
,
'DoubleConvFCBBoxHead'
,
'PartA2BboxHead'
]
]
mmdet3d/models/roi_heads/bbox_heads/parta2_bbox_head.py
0 → 100644
View file @
885a225b
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
mmcv.cnn
import
ConvModule
,
build_norm_layer
,
normal_init
,
xavier_init
import
mmdet3d.ops.spconv
as
spconv
from
mmdet3d.core
import
build_bbox_coder
,
multi_apply
from
mmdet3d.core.bbox
import
box_torch_ops
from
mmdet3d.models.builder
import
build_loss
from
mmdet3d.ops.iou3d.iou3d_utils
import
(
boxes3d_to_bev_torch_lidar
,
nms_gpu
,
nms_normal_gpu
)
from
mmdet.models
import
HEADS
@
HEADS
.
register_module
()
class
PartA2BboxHead
(
nn
.
Module
):
"""PartA2 rcnn box head.
Args:
num_classes (int): The number of classes to prediction.
seg_in_channels (int): Input channels of segmentation
convolution layer.
part_in_channels (int): Input channels of part convolution layer.
seg_conv_channels (list(int)): Out channels of each
segmentation convolution layer.
part_conv_channels (list(int)): Out channels of each
part convolution layer.
merge_conv_channels (list(int)): Out channels of each
feature merged convolution layer.
down_conv_channels (list(int)): Out channels of each
downsampled convolution layer.
shared_fc_channels (list(int)): Out channels of each shared fc layer.
cls_channels (list(int)): Out channels of each classification layer.
reg_channels (list(int)): Out channels of each regression layer.
dropout_ratio (float): Dropout ratio of classification and
regression layers.
roi_feat_size (int): The size of pooled roi features.
with_corner_loss (bool): Whether to use corner loss or not.
bbox_coder (BaseBBoxCoder): Bbox coder for box head.
conv_cfg (dict): Config dict of convolutional layers
norm_cfg (dict): Config dict of normalization layers
loss_bbox (dict): Config dict of box regression loss.
loss_cls (dict): Config dict of classifacation loss.
"""
def
__init__
(
self
,
num_classes
,
seg_in_channels
,
part_in_channels
,
seg_conv_channels
=
None
,
part_conv_channels
=
None
,
merge_conv_channels
=
None
,
down_conv_channels
=
None
,
shared_fc_channels
=
None
,
cls_channels
=
None
,
reg_channels
=
None
,
dropout_ratio
=
0.1
,
roi_feat_size
=
14
,
with_corner_loss
=
True
,
bbox_coder
=
dict
(
type
=
'DeltaXYZWLHRBBoxCoder'
),
conv_cfg
=
dict
(
type
=
'Conv1d'
),
norm_cfg
=
dict
(
type
=
'BN1d'
,
eps
=
1e-3
,
momentum
=
0.01
),
loss_bbox
=
dict
(
type
=
'SmoothL1Loss'
,
beta
=
1.0
/
9.0
,
loss_weight
=
2.0
),
loss_cls
=
dict
(
type
=
'CrossEntropyLoss'
,
use_sigmoid
=
True
,
reduction
=
'none'
,
loss_weight
=
1.0
)):
super
(
PartA2BboxHead
,
self
).
__init__
()
self
.
num_classes
=
num_classes
self
.
with_corner_loss
=
with_corner_loss
self
.
bbox_coder
=
build_bbox_coder
(
bbox_coder
)
self
.
loss_bbox
=
build_loss
(
loss_bbox
)
self
.
loss_cls
=
build_loss
(
loss_cls
)
self
.
use_sigmoid_cls
=
loss_cls
.
get
(
'use_sigmoid'
,
False
)
assert
down_conv_channels
[
-
1
]
==
shared_fc_channels
[
0
]
# init layers
block
=
self
.
post_act_block
part_channel_last
=
part_in_channels
part_conv
=
[]
for
i
,
channel
in
enumerate
(
part_conv_channels
):
part_conv
.
append
(
block
(
part_channel_last
,
channel
,
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_part
{
i
}
'
))
part_channel_last
=
channel
self
.
part_conv
=
spconv
.
SparseSequential
(
*
part_conv
)
seg_channel_last
=
seg_in_channels
seg_conv
=
[]
for
i
,
channel
in
enumerate
(
seg_conv_channels
):
seg_conv
.
append
(
block
(
seg_channel_last
,
channel
,
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_seg
{
i
}
'
))
seg_channel_last
=
channel
self
.
seg_conv
=
spconv
.
SparseSequential
(
*
seg_conv
)
self
.
conv_down
=
spconv
.
SparseSequential
()
merge_conv_channel_last
=
part_channel_last
+
seg_channel_last
merge_conv
=
[]
for
i
,
channel
in
enumerate
(
merge_conv_channels
):
merge_conv
.
append
(
block
(
merge_conv_channel_last
,
channel
,
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down0'
))
merge_conv_channel_last
=
channel
down_conv_channel_last
=
merge_conv_channel_last
conv_down
=
[]
for
i
,
channel
in
enumerate
(
down_conv_channels
):
conv_down
.
append
(
block
(
down_conv_channel_last
,
channel
,
3
,
padding
=
1
,
norm_cfg
=
norm_cfg
,
indice_key
=
f
'rcnn_down1'
))
down_conv_channel_last
=
channel
self
.
conv_down
.
add_module
(
'merge_conv'
,
spconv
.
SparseSequential
(
*
merge_conv
))
self
.
conv_down
.
add_module
(
'max_pool3d'
,
spconv
.
SparseMaxPool3d
(
kernel_size
=
2
,
stride
=
2
))
self
.
conv_down
.
add_module
(
'down_conv'
,
spconv
.
SparseSequential
(
*
conv_down
))
shared_fc_list
=
[]
pool_size
=
roi_feat_size
//
2
pre_channel
=
shared_fc_channels
[
0
]
*
pool_size
**
3
for
k
in
range
(
1
,
len
(
shared_fc_channels
)):
shared_fc_list
.
append
(
ConvModule
(
pre_channel
,
shared_fc_channels
[
k
],
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
inplace
=
True
))
pre_channel
=
shared_fc_channels
[
k
]
if
k
!=
len
(
shared_fc_channels
)
-
1
and
dropout_ratio
>
0
:
shared_fc_list
.
append
(
nn
.
Dropout
(
dropout_ratio
))
self
.
shared_fc
=
nn
.
Sequential
(
*
shared_fc_list
)
# Classification layer
channel_in
=
shared_fc_channels
[
-
1
]
cls_channel
=
1
cls_layers
=
[]
pre_channel
=
channel_in
for
k
in
range
(
0
,
len
(
cls_channels
)):
cls_layers
.
append
(
ConvModule
(
pre_channel
,
cls_channels
[
k
],
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
inplace
=
True
))
pre_channel
=
cls_channels
[
k
]
cls_layers
.
append
(
ConvModule
(
pre_channel
,
cls_channel
,
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
act_cfg
=
None
))
if
dropout_ratio
>=
0
:
cls_layers
.
insert
(
1
,
nn
.
Dropout
(
dropout_ratio
))
self
.
conv_cls
=
nn
.
Sequential
(
*
cls_layers
)
# Regression layer
reg_layers
=
[]
pre_channel
=
channel_in
for
k
in
range
(
0
,
len
(
reg_channels
)):
reg_layers
.
append
(
ConvModule
(
pre_channel
,
reg_channels
[
k
],
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
norm_cfg
=
norm_cfg
,
inplace
=
True
))
pre_channel
=
reg_channels
[
k
]
reg_layers
.
append
(
ConvModule
(
pre_channel
,
self
.
bbox_coder
.
code_size
,
1
,
padding
=
0
,
conv_cfg
=
conv_cfg
,
act_cfg
=
None
))
if
dropout_ratio
>=
0
:
reg_layers
.
insert
(
1
,
nn
.
Dropout
(
dropout_ratio
))
self
.
conv_reg
=
nn
.
Sequential
(
*
reg_layers
)
self
.
init_weights
()
def
init_weights
(
self
):
for
m
in
self
.
modules
():
if
isinstance
(
m
,
(
nn
.
Conv2d
,
nn
.
Conv1d
)):
xavier_init
(
m
,
distribution
=
'uniform'
)
normal_init
(
self
.
conv_reg
[
-
1
].
conv
,
mean
=
0
,
std
=
0.001
)
def
post_act_block
(
self
,
in_channels
,
out_channels
,
kernel_size
,
indice_key
,
stride
=
1
,
padding
=
0
,
conv_type
=
'subm'
,
norm_cfg
=
None
):
"""Make post activate sparse convolution block.
Args:
in_channels (int): the number of input channels
out_channels (int): the number of out channels
kernel_size (int): kernel size of convolution
indice_key (str): the indice key used for sparse tensor
stride (int): the stride of convolution
padding (int or list[int]): the padding number of input
conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
norm_cfg (dict[str]): config of normalization layer
Returns:
spconv.SparseSequential: post activate sparse convolution block.
"""
# TODO: clean post_act_block by existing bottlnecks.
assert
conv_type
in
[
'subm'
,
'spconv'
,
'inverseconv'
]
if
conv_type
==
'subm'
:
m
=
spconv
.
SparseSequential
(
spconv
.
SubMConv3d
(
in_channels
,
out_channels
,
kernel_size
,
bias
=
False
,
indice_key
=
indice_key
),
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
],
nn
.
ReLU
(
inplace
=
True
))
elif
conv_type
==
'spconv'
:
m
=
spconv
.
SparseSequential
(
spconv
.
SparseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
stride
=
stride
,
padding
=
padding
,
bias
=
False
,
indice_key
=
indice_key
),
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
],
nn
.
ReLU
(
inplace
=
True
))
elif
conv_type
==
'inverseconv'
:
m
=
spconv
.
SparseSequential
(
spconv
.
SparseInverseConv3d
(
in_channels
,
out_channels
,
kernel_size
,
bias
=
False
,
indice_key
=
indice_key
),
build_norm_layer
(
norm_cfg
,
out_channels
)[
1
],
nn
.
ReLU
(
inplace
=
True
))
else
:
raise
NotImplementedError
return
m
def
forward
(
self
,
seg_feats
,
part_feats
):
# (B * N, out_x, out_y, out_z, 4)
rcnn_batch_size
=
part_feats
.
shape
[
0
]
# transform to sparse tensors
sparse_shape
=
part_feats
.
shape
[
1
:
4
]
# (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx]
sparse_idx
=
part_feats
.
sum
(
dim
=-
1
).
nonzero
()
part_features
=
part_feats
[
sparse_idx
[:,
0
],
sparse_idx
[:,
1
],
sparse_idx
[:,
2
],
sparse_idx
[:,
3
]]
seg_features
=
seg_feats
[
sparse_idx
[:,
0
],
sparse_idx
[:,
1
],
sparse_idx
[:,
2
],
sparse_idx
[:,
3
]]
coords
=
sparse_idx
.
int
()
part_features
=
spconv
.
SparseConvTensor
(
part_features
,
coords
,
sparse_shape
,
rcnn_batch_size
)
seg_features
=
spconv
.
SparseConvTensor
(
seg_features
,
coords
,
sparse_shape
,
rcnn_batch_size
)
# forward rcnn network
x_part
=
self
.
part_conv
(
part_features
)
x_rpn
=
self
.
seg_conv
(
seg_features
)
merged_feature
=
torch
.
cat
((
x_rpn
.
features
,
x_part
.
features
),
dim
=
1
)
# (N, C)
shared_feature
=
spconv
.
SparseConvTensor
(
merged_feature
,
coords
,
sparse_shape
,
rcnn_batch_size
)
x
=
self
.
conv_down
(
shared_feature
)
shared_feature
=
x
.
dense
().
view
(
rcnn_batch_size
,
-
1
,
1
)
shared_feature
=
self
.
shared_fc
(
shared_feature
)
cls_score
=
self
.
conv_cls
(
shared_feature
).
transpose
(
1
,
2
).
contiguous
().
squeeze
(
dim
=
1
)
# (B, 1)
bbox_pred
=
self
.
conv_reg
(
shared_feature
).
transpose
(
1
,
2
).
contiguous
().
squeeze
(
dim
=
1
)
# (B, C)
return
cls_score
,
bbox_pred
def
loss
(
self
,
cls_score
,
bbox_pred
,
rois
,
labels
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
):
losses
=
dict
()
rcnn_batch_size
=
cls_score
.
shape
[
0
]
# calculate class loss
cls_flat
=
cls_score
.
view
(
-
1
)
loss_cls
=
self
.
loss_cls
(
cls_flat
,
labels
,
label_weights
)
losses
[
'loss_cls'
]
=
loss_cls
# calculate regression loss
code_size
=
self
.
bbox_coder
.
code_size
pos_inds
=
(
reg_mask
>
0
)
if
pos_inds
.
any
()
==
0
:
# fake a part loss
losses
[
'loss_bbox'
]
=
loss_cls
.
new_tensor
(
0
)
if
self
.
with_corner_loss
:
losses
[
'loss_corner'
]
=
loss_cls
.
new_tensor
(
0
)
else
:
pos_bbox_pred
=
bbox_pred
.
view
(
rcnn_batch_size
,
-
1
)[
pos_inds
]
bbox_weights_flat
=
bbox_weights
[
pos_inds
].
view
(
-
1
,
1
).
repeat
(
1
,
pos_bbox_pred
.
shape
[
-
1
])
loss_bbox
=
self
.
loss_bbox
(
pos_bbox_pred
.
unsqueeze
(
dim
=
0
),
bbox_targets
.
unsqueeze
(
dim
=
0
),
bbox_weights_flat
.
unsqueeze
(
dim
=
0
))
losses
[
'loss_bbox'
]
=
loss_bbox
if
self
.
with_corner_loss
:
pos_roi_boxes3d
=
rois
[...,
1
:].
view
(
-
1
,
code_size
)[
pos_inds
]
pos_roi_boxes3d
=
pos_roi_boxes3d
.
view
(
-
1
,
code_size
)
batch_anchors
=
pos_roi_boxes3d
.
clone
().
detach
()
pos_rois_rotation
=
pos_roi_boxes3d
[...,
6
].
view
(
-
1
)
roi_xyz
=
pos_roi_boxes3d
[...,
0
:
3
].
view
(
-
1
,
3
)
batch_anchors
[...,
0
:
3
]
=
0
# decode boxes
pred_boxes3d
=
self
.
bbox_coder
.
decode
(
batch_anchors
,
pos_bbox_pred
.
view
(
-
1
,
code_size
)).
view
(
-
1
,
code_size
)
pred_boxes3d
[...,
0
:
3
]
=
box_torch_ops
.
rotation_3d_in_axis
(
pred_boxes3d
[...,
0
:
3
].
unsqueeze
(
1
),
(
pos_rois_rotation
+
np
.
pi
/
2
),
axis
=
2
).
squeeze
(
1
)
pred_boxes3d
[:,
0
:
3
]
+=
roi_xyz
# calculate corner loss
loss_corner
=
self
.
get_corner_loss_lidar
(
pred_boxes3d
,
pos_gt_bboxes
)
losses
[
'loss_corner'
]
=
loss_corner
return
losses
def
get_targets
(
self
,
sampling_results
,
rcnn_train_cfg
,
concat
=
True
):
pos_bboxes_list
=
[
res
.
pos_bboxes
for
res
in
sampling_results
]
pos_gt_bboxes_list
=
[
res
.
pos_gt_bboxes
for
res
in
sampling_results
]
iou_list
=
[
res
.
iou
for
res
in
sampling_results
]
targets
=
multi_apply
(
self
.
_get_target_single
,
pos_bboxes_list
,
pos_gt_bboxes_list
,
iou_list
,
cfg
=
rcnn_train_cfg
)
(
label
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
)
=
targets
if
concat
:
label
=
torch
.
cat
(
label
,
0
)
bbox_targets
=
torch
.
cat
(
bbox_targets
,
0
)
pos_gt_bboxes
=
torch
.
cat
(
pos_gt_bboxes
,
0
)
reg_mask
=
torch
.
cat
(
reg_mask
,
0
)
label_weights
=
torch
.
cat
(
label_weights
,
0
)
label_weights
/=
torch
.
clamp
(
label_weights
.
sum
(),
min
=
1.0
)
bbox_weights
=
torch
.
cat
(
bbox_weights
,
0
)
bbox_weights
/=
torch
.
clamp
(
bbox_weights
.
sum
(),
min
=
1.0
)
return
(
label
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
)
def
_get_target_single
(
self
,
pos_bboxes
,
pos_gt_bboxes
,
ious
,
cfg
):
cls_pos_mask
=
ious
>
cfg
.
cls_pos_thr
cls_neg_mask
=
ious
<
cfg
.
cls_neg_thr
interval_mask
=
(
cls_pos_mask
==
0
)
&
(
cls_neg_mask
==
0
)
# iou regression target
label
=
(
cls_pos_mask
>
0
).
float
()
label
[
interval_mask
]
=
ious
[
interval_mask
]
*
2
-
0.5
# label weights
label_weights
=
(
label
>=
0
).
float
()
# box regression target
reg_mask
=
pos_bboxes
.
new_zeros
(
ious
.
size
(
0
)).
long
()
reg_mask
[
0
:
pos_gt_bboxes
.
size
(
0
)]
=
1
bbox_weights
=
(
reg_mask
>
0
).
float
()
if
reg_mask
.
bool
().
any
():
pos_gt_bboxes_ct
=
pos_gt_bboxes
.
clone
().
detach
()
roi_center
=
pos_bboxes
[...,
0
:
3
]
roi_ry
=
pos_bboxes
[...,
6
]
%
(
2
*
np
.
pi
)
# canonical transformation
pos_gt_bboxes_ct
[...,
0
:
3
]
-=
roi_center
pos_gt_bboxes_ct
[...,
6
]
-=
roi_ry
pos_gt_bboxes_ct
[...,
0
:
3
]
=
box_torch_ops
.
rotation_3d_in_axis
(
pos_gt_bboxes_ct
[...,
0
:
3
].
unsqueeze
(
1
),
-
(
roi_ry
+
np
.
pi
/
2
),
axis
=
2
).
squeeze
(
1
)
# flip orientation if rois have opposite orientation
ry_label
=
pos_gt_bboxes_ct
[...,
6
]
%
(
2
*
np
.
pi
)
# 0 ~ 2pi
opposite_flag
=
(
ry_label
>
np
.
pi
*
0.5
)
&
(
ry_label
<
np
.
pi
*
1.5
)
ry_label
[
opposite_flag
]
=
(
ry_label
[
opposite_flag
]
+
np
.
pi
)
%
(
2
*
np
.
pi
)
# (0 ~ pi/2, 3pi/2 ~ 2pi)
flag
=
ry_label
>
np
.
pi
ry_label
[
flag
]
=
ry_label
[
flag
]
-
np
.
pi
*
2
# (-pi/2, pi/2)
ry_label
=
torch
.
clamp
(
ry_label
,
min
=-
np
.
pi
/
2
,
max
=
np
.
pi
/
2
)
pos_gt_bboxes_ct
[...,
6
]
=
ry_label
rois_anchor
=
pos_bboxes
.
clone
().
detach
()
rois_anchor
[:,
0
:
3
]
=
0
rois_anchor
[:,
6
]
=
0
bbox_targets
=
self
.
bbox_coder
.
encode
(
rois_anchor
,
pos_gt_bboxes_ct
)
else
:
# no fg bbox
bbox_targets
=
pos_gt_bboxes
.
new_empty
((
0
,
7
))
return
(
label
,
bbox_targets
,
pos_gt_bboxes
,
reg_mask
,
label_weights
,
bbox_weights
)
def
get_corner_loss_lidar
(
self
,
pred_bbox3d
,
gt_bbox3d
,
delta
=
1
):
"""Calculate corner loss of given boxes.
Args:
pred_bbox3d (FloatTensor): predicted boxes with shape (N, 7).
gt_bbox3d (FloatTensor): gt boxes with shape (N, 7).
Returns:
FloatTensor: Calculated corner loss with shape (N).
"""
assert
pred_bbox3d
.
shape
[
0
]
==
gt_bbox3d
.
shape
[
0
]
pred_box_corners
=
box_torch_ops
.
boxes3d_to_corners3d_lidar_torch
(
pred_bbox3d
)
gt_box_corners
=
box_torch_ops
.
boxes3d_to_corners3d_lidar_torch
(
gt_bbox3d
)
gt_bbox3d_flip
=
gt_bbox3d
.
clone
()
gt_bbox3d_flip
[:,
6
]
+=
np
.
pi
gt_box_corners_flip
=
box_torch_ops
.
boxes3d_to_corners3d_lidar_torch
(
gt_bbox3d_flip
)
corner_dist
=
torch
.
min
(
torch
.
norm
(
pred_box_corners
-
gt_box_corners
,
dim
=
2
),
torch
.
norm
(
pred_box_corners
-
gt_box_corners_flip
,
dim
=
2
))
# (N, 8)
# huber loss
abs_error
=
torch
.
abs
(
corner_dist
)
quadratic
=
torch
.
clamp
(
abs_error
,
max
=
delta
)
linear
=
(
abs_error
-
quadratic
)
corner_loss
=
0.5
*
quadratic
**
2
+
delta
*
linear
return
corner_loss
.
mean
(
dim
=
1
)
def
get_bboxes
(
self
,
rois
,
cls_score
,
bbox_pred
,
class_labels
,
class_pred
,
img_meta
,
cfg
=
None
):
roi_batch_id
=
rois
[...,
0
]
roi_boxes
=
rois
[...,
1
:]
# boxes without batch id
batch_size
=
int
(
roi_batch_id
.
max
().
item
()
+
1
)
# decode boxes
roi_ry
=
roi_boxes
[...,
6
].
view
(
-
1
)
roi_xyz
=
roi_boxes
[...,
0
:
3
].
view
(
-
1
,
3
)
local_roi_boxes
=
roi_boxes
.
clone
().
detach
()
local_roi_boxes
[...,
0
:
3
]
=
0
rcnn_boxes3d
=
self
.
bbox_coder
.
decode
(
local_roi_boxes
,
bbox_pred
)
rcnn_boxes3d
[...,
0
:
3
]
=
box_torch_ops
.
rotation_3d_in_axis
(
rcnn_boxes3d
[...,
0
:
3
].
unsqueeze
(
1
),
(
roi_ry
+
np
.
pi
/
2
),
axis
=
2
).
squeeze
(
1
)
rcnn_boxes3d
[:,
0
:
3
]
+=
roi_xyz
# post processing
result_list
=
[]
for
batch_id
in
range
(
batch_size
):
cur_class_labels
=
class_labels
[
batch_id
]
cur_cls_score
=
cls_score
[
roi_batch_id
==
batch_id
].
view
(
-
1
)
cur_box_prob
=
class_pred
[
batch_id
]
cur_rcnn_boxes3d
=
rcnn_boxes3d
[
roi_batch_id
==
batch_id
]
selected
=
self
.
multi_class_nms
(
cur_box_prob
,
cur_rcnn_boxes3d
,
cfg
.
score_thr
,
cfg
.
nms_thr
,
cfg
.
use_rotate_nms
)
selected_bboxes
=
cur_rcnn_boxes3d
[
selected
]
selected_label_preds
=
cur_class_labels
[
selected
]
if
cfg
.
use_raw_score
:
selected_scores
=
cur_cls_score
[
selected
]
else
:
selected_scores
=
torch
.
sigmoid
(
cur_cls_score
)[
selected
]
cur_result
=
dict
(
box3d_lidar
=
selected_bboxes
.
cpu
(),
scores
=
selected_scores
.
cpu
(),
label_preds
=
selected_label_preds
.
cpu
(),
sample_idx
=
img_meta
[
batch_id
][
'sample_idx'
])
result_list
.
append
(
cur_result
)
return
result_list
def
multi_class_nms
(
self
,
box_probs
,
box_preds
,
score_thr
,
nms_thr
,
use_rotate_nms
=
True
):
normalized_scores
=
torch
.
sigmoid
(
box_probs
)
if
use_rotate_nms
:
nms_func
=
nms_gpu
else
:
nms_func
=
nms_normal_gpu
assert
box_probs
.
shape
[
1
]
==
self
.
num_classes
,
f
'box_probs shape:
{
str
(
box_probs
.
shape
)
}
'
selected_list
=
[]
selected_labels
=
[]
boxes_for_nms
=
boxes3d_to_bev_torch_lidar
(
box_preds
)
score_thresh
=
score_thr
if
isinstance
(
score_thr
,
list
)
else
[
score_thr
for
x
in
range
(
self
.
num_classes
)]
nms_thresh
=
nms_thr
if
isinstance
(
nms_thr
,
list
)
else
[
nms_thr
for
x
in
range
(
self
.
num_classes
)]
for
k
in
range
(
0
,
self
.
num_classes
):
class_scores_keep
=
normalized_scores
[:,
k
]
>=
score_thresh
[
k
]
if
class_scores_keep
.
int
().
sum
()
>
0
:
original_idxs
=
class_scores_keep
.
nonzero
().
view
(
-
1
)
cur_boxes_for_nms
=
boxes_for_nms
[
class_scores_keep
]
cur_rank_scores
=
box_probs
[
class_scores_keep
,
k
]
cur_selected
=
nms_func
(
cur_boxes_for_nms
,
cur_rank_scores
,
nms_thresh
[
k
])
if
cur_selected
.
shape
[
0
]
==
0
:
continue
selected_list
.
append
(
original_idxs
[
cur_selected
])
selected_labels
.
append
(
torch
.
full
([
cur_selected
.
shape
[
0
]],
k
+
1
,
dtype
=
torch
.
int64
,
device
=
box_preds
.
device
))
selected
=
torch
.
cat
(
selected_list
,
dim
=
0
)
if
len
(
selected_list
)
>
0
else
[]
return
selected
mmdet3d/models/roi_heads/mask_heads/pointwise_semantic_head.py
View file @
885a225b
...
@@ -126,22 +126,21 @@ class PointwiseSemanticHead(nn.Module):
...
@@ -126,22 +126,21 @@ class PointwiseSemanticHead(nn.Module):
part_targets
=
torch
.
cat
(
part_targets
,
dim
=
0
)
part_targets
=
torch
.
cat
(
part_targets
,
dim
=
0
)
return
dict
(
seg_targets
=
seg_targets
,
part_targets
=
part_targets
)
return
dict
(
seg_targets
=
seg_targets
,
part_targets
=
part_targets
)
def
loss
(
self
,
se
g_preds
,
part_preds
,
seg_targets
,
part
_targets
):
def
loss
(
self
,
se
mantic_results
,
semantic
_targets
):
"""Calculate point-wise segmentation and part prediction losses.
"""Calculate point-wise segmentation and part prediction losses.
Args:
Args:
seg_preds (torch.Tensor): prediction of binary
semantic_results (dict): Results from semantic head.
segmentation with shape [voxel_num, 1].
semantic_targets (dict): Targets of semantic results.
part_preds (torch.Tensor): prediction of part
with shape [voxel_num, 3].
seg_targets (torch.Tensor): target of segmentation
with shape [voxel_num, 1].
part_targets (torch.Tensor): target of part with
shape [voxel_num, 3].
Returns:
Returns:
dict: loss of segmentation and part prediction.
dict: loss of segmentation and part prediction.
"""
"""
seg_preds
=
semantic_results
[
'seg_preds'
]
part_preds
=
semantic_results
[
'part_preds'
]
seg_targets
=
semantic_targets
[
'seg_targets'
]
part_targets
=
semantic_targets
[
'part_targets'
]
pos_mask
=
(
seg_targets
>
-
1
)
&
(
seg_targets
<
self
.
num_classes
)
pos_mask
=
(
seg_targets
>
-
1
)
&
(
seg_targets
<
self
.
num_classes
)
binary_seg_target
=
pos_mask
.
long
()
binary_seg_target
=
pos_mask
.
long
()
pos
=
pos_mask
.
float
()
pos
=
pos_mask
.
float
()
...
...
mmdet3d/models/roi_heads/part_aggregation_roi_head.py
0 → 100644
View file @
885a225b
import
torch.nn.functional
as
F
from
mmdet3d.core
import
AssignResult
from
mmdet3d.core.bbox
import
bbox3d2roi
from
mmdet.core
import
build_assigner
,
build_sampler
from
mmdet.models
import
HEADS
from
..builder
import
build_head
,
build_roi_extractor
from
.base_3droi_head
import
Base3DRoIHead
@
HEADS
.
register_module
class
PartAggregationROIHead
(
Base3DRoIHead
):
"""Part aggregation roi head for PartA2"""
def
__init__
(
self
,
semantic_head
,
num_classes
=
3
,
seg_roi_extractor
=
None
,
part_roi_extractor
=
None
,
bbox_head
=
None
,
train_cfg
=
None
,
test_cfg
=
None
):
super
(
PartAggregationROIHead
,
self
).
__init__
(
bbox_head
=
bbox_head
,
train_cfg
=
train_cfg
,
test_cfg
=
test_cfg
)
self
.
num_classes
=
num_classes
assert
semantic_head
is
not
None
self
.
semantic_head
=
build_head
(
semantic_head
)
if
seg_roi_extractor
is
not
None
:
self
.
seg_roi_extractor
=
build_roi_extractor
(
seg_roi_extractor
)
if
part_roi_extractor
is
not
None
:
self
.
part_roi_extractor
=
build_roi_extractor
(
part_roi_extractor
)
self
.
init_assigner_sampler
()
def
init_weights
(
self
,
pretrained
):
pass
def
init_mask_head
(
self
):
pass
def
init_bbox_head
(
self
,
bbox_head
):
self
.
bbox_head
=
build_head
(
bbox_head
)
def
init_assigner_sampler
(
self
):
self
.
bbox_assigner
=
None
self
.
bbox_sampler
=
None
if
self
.
train_cfg
:
if
isinstance
(
self
.
train_cfg
.
assigner
,
dict
):
self
.
bbox_assigner
=
build_assigner
(
self
.
train_cfg
.
assigner
)
elif
isinstance
(
self
.
train_cfg
.
assigner
,
list
):
self
.
bbox_assigner
=
[
build_assigner
(
res
)
for
res
in
self
.
train_cfg
.
assigner
]
self
.
bbox_sampler
=
build_sampler
(
self
.
train_cfg
.
sampler
)
@
property
def
with_semantic
(
self
):
return
hasattr
(
self
,
'semantic_head'
)
and
self
.
semantic_head
is
not
None
def
forward_train
(
self
,
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
):
"""Training forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
gt_bboxes_3d (list[FloatTensor]): GT bboxes of each batch.
gt_labels_3d (list[LongTensor]): GT labels of each batch.
Returns:
dict: losses from each head.
"""
losses
=
dict
()
if
self
.
with_semantic
:
semantic_results
=
self
.
_semantic_forward_train
(
feats_dict
[
'seg_features'
],
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
)
losses
.
update
(
semantic_results
[
'loss_semantic'
])
sample_results
=
self
.
_assign_and_sample
(
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
)
if
self
.
with_bbox
:
bbox_results
=
self
.
_bbox_forward_train
(
feats_dict
[
'seg_features'
],
semantic_results
[
'part_feats'
],
voxels_dict
,
sample_results
)
losses
.
update
(
bbox_results
[
'loss_bbox'
])
return
losses
def
simple_test
(
self
,
feats_dict
,
voxels_dict
,
img_meta
,
proposal_list
,
**
kwargs
):
"""Simple testing forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
list[dict]: Bbox results of each batch.
"""
assert
self
.
with_bbox
,
'Bbox head must be implemented.'
assert
self
.
with_semantic
semantic_results
=
self
.
semantic_head
(
feats_dict
[
'seg_features'
])
rois
=
bbox3d2roi
([
res
[
'box3d_lidar'
]
for
res
in
proposal_list
])
label_preds
=
[
res
[
'label_preds'
]
for
res
in
proposal_list
]
cls_preds
=
[
res
[
'cls_preds'
]
for
res
in
proposal_list
]
bbox_results
=
self
.
_bbox_forward
(
feats_dict
[
'seg_features'
],
semantic_results
[
'part_feats'
],
voxels_dict
,
rois
)
bbox_list
=
self
.
bbox_head
.
get_bboxes
(
rois
,
bbox_results
[
'cls_score'
],
bbox_results
[
'bbox_pred'
],
label_preds
,
cls_preds
,
img_meta
,
cfg
=
self
.
test_cfg
)
return
bbox_list
def
_bbox_forward_train
(
self
,
seg_feats
,
part_feats
,
voxels_dict
,
sampling_results
):
rois
=
bbox3d2roi
([
res
.
bboxes
for
res
in
sampling_results
])
bbox_results
=
self
.
_bbox_forward
(
seg_feats
,
part_feats
,
voxels_dict
,
rois
)
bbox_targets
=
self
.
bbox_head
.
get_targets
(
sampling_results
,
self
.
train_cfg
)
loss_bbox
=
self
.
bbox_head
.
loss
(
bbox_results
[
'cls_score'
],
bbox_results
[
'bbox_pred'
],
rois
,
*
bbox_targets
)
bbox_results
.
update
(
loss_bbox
=
loss_bbox
)
return
bbox_results
def
_bbox_forward
(
self
,
seg_feats
,
part_feats
,
voxels_dict
,
rois
):
pooled_seg_feats
=
self
.
seg_roi_extractor
(
seg_feats
,
voxels_dict
[
'voxel_centers'
],
voxels_dict
[
'coors'
][...,
0
],
rois
)
pooled_part_feats
=
self
.
part_roi_extractor
(
part_feats
,
voxels_dict
[
'voxel_centers'
],
voxels_dict
[
'coors'
][...,
0
],
rois
)
cls_score
,
bbox_pred
=
self
.
bbox_head
(
pooled_seg_feats
,
pooled_part_feats
)
bbox_results
=
dict
(
cls_score
=
cls_score
,
bbox_pred
=
bbox_pred
,
pooled_seg_feats
=
pooled_seg_feats
,
pooled_part_feats
=
pooled_part_feats
)
return
bbox_results
def
_assign_and_sample
(
self
,
proposal_list
,
gt_bboxes_3d
,
gt_labels_3d
):
sampling_results
=
[]
# bbox assign
for
batch_idx
in
range
(
len
(
proposal_list
)):
cur_proposal_list
=
proposal_list
[
batch_idx
]
cur_boxes
=
cur_proposal_list
[
'box3d_lidar'
]
cur_label_preds
=
cur_proposal_list
[
'label_preds'
]
cur_gt_bboxes
=
gt_bboxes_3d
[
batch_idx
]
cur_gt_labels
=
gt_labels_3d
[
batch_idx
]
batch_num_gts
=
0
batch_gt_indis
=
cur_gt_labels
.
new_full
((
cur_boxes
.
shape
[
0
],
),
0
)
# 0 is bg
batch_max_overlaps
=
cur_boxes
.
new_zeros
(
cur_boxes
.
shape
[
0
])
batch_gt_labels
=
cur_gt_labels
.
new_full
((
cur_boxes
.
shape
[
0
],
),
-
1
)
# -1 is bg
if
isinstance
(
self
.
bbox_assigner
,
list
):
# for multi classes
for
i
,
assigner
in
enumerate
(
self
.
bbox_assigner
):
gt_per_cls
=
(
cur_gt_labels
==
i
)
pred_per_cls
=
(
cur_label_preds
==
i
)
cur_assign_res
=
assigner
.
assign
(
cur_boxes
[
pred_per_cls
],
cur_gt_bboxes
[
gt_per_cls
],
gt_labels
=
cur_gt_labels
[
gt_per_cls
])
# gather assign_results in different class into one result
batch_num_gts
+=
cur_assign_res
.
num_gts
# gt inds (1-based)
gt_inds_arange_pad
=
gt_per_cls
.
nonzero
().
view
(
-
1
)
+
1
# pad 0 for indice unassigned
gt_inds_arange_pad
=
F
.
pad
(
gt_inds_arange_pad
,
(
1
,
0
),
mode
=
'constant'
,
value
=
0
)
# pad -1 for indice ignore
gt_inds_arange_pad
=
F
.
pad
(
gt_inds_arange_pad
,
(
1
,
0
),
mode
=
'constant'
,
value
=-
1
)
# convert to 0~gt_num+2 for indices
gt_inds_arange_pad
+=
1
# now 0 is bg, >1 is fg in batch_gt_indis
batch_gt_indis
[
pred_per_cls
]
=
gt_inds_arange_pad
[
cur_assign_res
.
gt_inds
+
1
]
-
1
batch_max_overlaps
[
pred_per_cls
]
=
cur_assign_res
.
max_overlaps
batch_gt_labels
[
pred_per_cls
]
=
cur_assign_res
.
labels
assign_result
=
AssignResult
(
batch_num_gts
,
batch_gt_indis
,
batch_max_overlaps
,
batch_gt_labels
)
else
:
# for single class
assign_result
=
self
.
bbox_assigner
.
assign
(
cur_boxes
,
cur_gt_bboxes
,
gt_labels
=
cur_gt_labels
)
# sample boxes
sampling_result
=
self
.
bbox_sampler
.
sample
(
assign_result
,
cur_boxes
,
cur_gt_bboxes
,
cur_gt_labels
)
sampling_results
.
append
(
sampling_result
)
return
sampling_results
def
_semantic_forward_train
(
self
,
x
,
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
):
semantic_results
=
self
.
semantic_head
(
x
)
semantic_targets
=
self
.
semantic_head
.
get_targets
(
voxels_dict
,
gt_bboxes_3d
,
gt_labels_3d
)
loss_semantic
=
self
.
semantic_head
.
loss
(
semantic_results
,
semantic_targets
)
semantic_results
.
update
(
loss_semantic
=
loss_semantic
)
return
semantic_results
tests/test_config.py
View file @
885a225b
...
@@ -61,7 +61,10 @@ def test_config_build_detector():
...
@@ -61,7 +61,10 @@ def test_config_build_detector():
assert
detector
.
roi_head
.
with_mask
==
detector
.
with_mask
assert
detector
.
roi_head
.
with_mask
==
detector
.
with_mask
head_config
=
config_mod
.
model
[
'roi_head'
]
head_config
=
config_mod
.
model
[
'roi_head'
]
_check_roi_head
(
head_config
,
detector
.
roi_head
)
if
head_config
.
type
==
'PartAggregationROIHead'
:
check_parta2_roi_head
(
head_config
,
detector
.
roi_head
)
else
:
_check_roi_head
(
head_config
,
detector
.
roi_head
)
# else:
# else:
# # for single stage detector
# # for single stage detector
# # detectors must have bbox head
# # detectors must have bbox head
...
@@ -319,3 +322,44 @@ def _check_bbox_head(bbox_cfg, bbox_head):
...
@@ -319,3 +322,44 @@ def _check_bbox_head(bbox_cfg, bbox_head):
out_dim
=
(
4
if
bbox_cfg
.
reg_class_agnostic
else
4
*
out_dim
=
(
4
if
bbox_cfg
.
reg_class_agnostic
else
4
*
bbox_cfg
.
num_classes
)
bbox_cfg
.
num_classes
)
assert
bbox_head
.
fc_reg
.
out_features
==
out_dim
assert
bbox_head
.
fc_reg
.
out_features
==
out_dim
def
check_parta2_roi_head
(
config
,
head
):
assert
config
[
'type'
]
==
head
.
__class__
.
__name__
# check seg_roi_extractor
seg_roi_cfg
=
config
.
seg_roi_extractor
seg_roi_extractor
=
head
.
seg_roi_extractor
_check_parta2_roi_extractor
(
seg_roi_cfg
,
seg_roi_extractor
)
# check part_roi_extractor
part_roi_cfg
=
config
.
part_roi_extractor
part_roi_extractor
=
head
.
part_roi_extractor
_check_parta2_roi_extractor
(
part_roi_cfg
,
part_roi_extractor
)
# check bbox head infos
bbox_cfg
=
config
.
bbox_head
bbox_head
=
head
.
bbox_head
_check_parta2_bbox_head
(
bbox_cfg
,
bbox_head
)
def
_check_parta2_roi_extractor
(
config
,
roi_extractor
):
assert
config
[
'type'
]
==
roi_extractor
.
__class__
.
__name__
assert
(
config
.
roi_layer
.
out_size
==
roi_extractor
.
roi_layer
.
out_size
)
assert
(
config
.
roi_layer
.
max_pts_per_voxel
==
roi_extractor
.
roi_layer
.
max_pts_per_voxel
)
def
_check_parta2_bbox_head
(
bbox_cfg
,
bbox_head
):
import
torch.nn
as
nn
if
isinstance
(
bbox_cfg
,
list
):
for
single_bbox_cfg
,
single_bbox_head
in
zip
(
bbox_cfg
,
bbox_head
):
_check_bbox_head
(
single_bbox_cfg
,
single_bbox_head
)
elif
isinstance
(
bbox_head
,
nn
.
ModuleList
):
for
single_bbox_head
in
bbox_head
:
_check_bbox_head
(
bbox_cfg
,
single_bbox_head
)
else
:
assert
bbox_cfg
[
'type'
]
==
bbox_head
.
__class__
.
__name__
assert
bbox_cfg
.
seg_in_channels
==
bbox_head
.
seg_conv
[
0
][
0
].
in_channels
assert
bbox_cfg
.
part_in_channels
==
bbox_head
.
part_conv
[
0
][
0
].
in_channels
tests/test_heads.py
View file @
885a225b
...
@@ -103,18 +103,18 @@ def test_second_head_loss():
...
@@ -103,18 +103,18 @@ def test_second_head_loss():
losses
=
self
.
loss
(
cls_score
,
bbox_pred
,
dir_cls_preds
,
gt_bboxes
,
losses
=
self
.
loss
(
cls_score
,
bbox_pred
,
dir_cls_preds
,
gt_bboxes
,
gt_labels
,
input_metas
)
gt_labels
,
input_metas
)
assert
losses
[
'loss_cls
_3d
'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
cls'
][
0
]
>
0
assert
losses
[
'loss_bbox
_3d
'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
bbox'
][
0
]
>
0
assert
losses
[
'loss_dir
_3d
'
][
0
]
>
0
assert
losses
[
'loss_
rpn_
dir'
][
0
]
>
0
# test empty ground truth case
# test empty ground truth case
gt_bboxes
=
list
(
torch
.
empty
((
2
,
0
,
7
)).
cuda
())
gt_bboxes
=
list
(
torch
.
empty
((
2
,
0
,
7
)).
cuda
())
gt_labels
=
list
(
torch
.
empty
((
2
,
0
)).
cuda
())
gt_labels
=
list
(
torch
.
empty
((
2
,
0
)).
cuda
())
empty_gt_losses
=
self
.
loss
(
cls_score
,
bbox_pred
,
dir_cls_preds
,
gt_bboxes
,
empty_gt_losses
=
self
.
loss
(
cls_score
,
bbox_pred
,
dir_cls_preds
,
gt_bboxes
,
gt_labels
,
input_metas
)
gt_labels
,
input_metas
)
assert
empty_gt_losses
[
'loss_cls
_3d
'
][
0
]
>
0
assert
empty_gt_losses
[
'loss_
rpn_
cls'
][
0
]
>
0
assert
empty_gt_losses
[
'loss_bbox
_3d
'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_
rpn_
bbox'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_dir
_3d
'
][
0
]
==
0
assert
empty_gt_losses
[
'loss_
rpn_
dir'
][
0
]
==
0
def
test_second_head_getboxes
():
def
test_second_head_getboxes
():
...
@@ -147,7 +147,7 @@ def test_parta2_rpnhead_getboxes():
...
@@ -147,7 +147,7 @@ def test_parta2_rpnhead_getboxes():
if
not
torch
.
cuda
.
is_available
():
if
not
torch
.
cuda
.
is_available
():
pytest
.
skip
(
'test requires GPU and torch+cuda'
)
pytest
.
skip
(
'test requires GPU and torch+cuda'
)
rpn_head_cfg
,
proposal_cfg
=
_get_rpn_head_cfg
(
rpn_head_cfg
,
proposal_cfg
=
_get_rpn_head_cfg
(
'kitti/hv_PartA2_secfpn_4x8_c
osine
_80e_kitti-3d-3class.py'
)
'kitti/hv_PartA2_secfpn_4x8_c
yclic
_80e_kitti-3d-3class.py'
)
from
mmdet3d.models.builder
import
build_head
from
mmdet3d.models.builder
import
build_head
self
=
build_head
(
rpn_head_cfg
)
self
=
build_head
(
rpn_head_cfg
)
...
...
tests/test_semantic_heads.py
View file @
885a225b
...
@@ -62,9 +62,7 @@ def test_PointwiseSemanticHead():
...
@@ -62,9 +62,7 @@ def test_PointwiseSemanticHead():
[
voxel_features
.
shape
[
0
],
3
])
[
voxel_features
.
shape
[
0
],
3
])
# test loss
# test loss
loss_dict
=
self
.
loss
(
feats_dict
[
'seg_preds'
],
feats_dict
[
'part_preds'
],
loss_dict
=
self
.
loss
(
feats_dict
,
target_dict
)
target_dict
[
'seg_targets'
],
target_dict
[
'part_targets'
])
assert
loss_dict
[
'loss_seg'
]
>
0
assert
loss_dict
[
'loss_seg'
]
>
0
assert
loss_dict
[
'loss_part'
]
==
0
# no points in gt_boxes
assert
loss_dict
[
'loss_part'
]
==
0
# no points in gt_boxes
total_loss
=
loss_dict
[
'loss_seg'
]
+
loss_dict
[
'loss_part'
]
total_loss
=
loss_dict
[
'loss_seg'
]
+
loss_dict
[
'loss_part'
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment