"...text-generation-inference.git" did not exist on "2b19d671b4d1020e31276477f278ca87cfa37a3c"
Commit 54595292 authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'feature_parta2_roi' into 'master'

Feature parta2 roi

See merge request open-mmlab/mmdet.3d!31
parents 535344de 885a225b
...@@ -10,13 +10,16 @@ model = dict( ...@@ -10,13 +10,16 @@ model = dict(
voxel_size=voxel_size, voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels max_voxels=(16000, 40000) # (training, testing) max_coxels
), ),
voxel_encoder=dict(type='VoxelFeatureExtractorV3'), voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict( middle_encoder=dict(
type='SparseUNet', type='SparseUNet',
in_channels=4, in_channels=4,
output_shape=[41, 1600, 1408], output_shape=[41, 1600, 1408],
pre_act=False, pre_act=False),
),
backbone=dict( backbone=dict(
type='SECOND', type='SECOND',
in_channels=256, in_channels=256,
...@@ -56,8 +59,65 @@ model = dict( ...@@ -56,8 +59,65 @@ model = dict(
loss_weight=1.0), loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict( loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2), type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
)) roi_head=dict(
type='PartAggregationROIHead',
num_classes=3,
semantic_head=dict(
type='PointwiseSemanticHead',
in_channels=16,
extra_width=0.2,
seg_score_thr=0.3,
num_classes=3,
loss_seg=dict(
type='FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='max')),
part_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='avg')),
bbox_head=dict(
type='PartA2BboxHead',
num_classes=3,
seg_in_channels=16,
part_in_channels=4,
seg_conv_channels=[64, 64],
part_conv_channels=[64, 64],
merge_conv_channels=[128, 128],
down_conv_channels=[128, 256],
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
shared_fc_channels=[256, 512, 512, 512],
cls_channels=[256, 256],
reg_channels=[256, 256],
dropout_ratio=0.1,
roi_feat_size=14,
with_corner_loss=True,
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0))))
# model training and testing settings # model training and testing settings
train_cfg = dict( train_cfg = dict(
rpn=dict( rpn=dict(
...@@ -82,7 +142,7 @@ train_cfg = dict( ...@@ -82,7 +142,7 @@ train_cfg = dict(
pos_iou_thr=0.6, pos_iou_thr=0.6,
neg_iou_thr=0.45, neg_iou_thr=0.45,
min_pos_iou=0.45, min_pos_iou=0.45,
ignore_iof_thr=-1), ignore_iof_thr=-1)
], ],
allowed_border=0, allowed_border=0,
pos_weight=-1, pos_weight=-1,
...@@ -93,24 +153,61 @@ train_cfg = dict( ...@@ -93,24 +153,61 @@ train_cfg = dict(
nms_thr=0.8, nms_thr=0.8,
score_thr=0, score_thr=0,
use_rotate_nms=False), use_rotate_nms=False),
) rcnn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1)
],
sampler=dict(
type='IoUNegPiecewiseSampler',
num=128,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True),
cls_pos_thr=0.75,
cls_neg_thr=0.25))
test_cfg = dict( test_cfg = dict(
rpn=dict( rpn=dict(
nms_pre=1024, nms_pre=1024,
max_per_img=100, nms_post=100,
use_rotate_nms=True,
nms_across_levels=False,
nms_thr=0.7, nms_thr=0.7,
score_thr=0)) score_thr=0,
use_rotate_nms=True),
rcnn=dict(
use_rotate_nms=True, use_raw_score=True, nms_thr=0.01, score_thr=0.1))
# dataset settings # dataset settings
dataset_type = 'KittiDataset' dataset_type = 'KittiDataset'
data_root = 'data/kitti/' data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car'] class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict( input_modality = dict(
use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=True) use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False)
db_sampler = dict( db_sampler = dict(
root_path=data_root, root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl', info_path=data_root + 'kitti_dbinfos_train.pkl',
...@@ -119,28 +216,34 @@ db_sampler = dict( ...@@ -119,28 +216,34 @@ db_sampler = dict(
object_rot_range=[0.0, 0.0], object_rot_range=[0.0, 0.0],
prepare=dict( prepare=dict(
filter_by_difficulty=[-1], filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)), filter_by_min_points=dict(
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6), Car=5,
) Pedestrian=10,
Cyclist=10,
)),
sample_groups=dict(
Car=12,
Pedestrian=6,
Cyclist=6,
))
train_pipeline = [ train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler), dict(type='ObjectSample', db_sampler=db_sampler),
dict( dict(
type='ObjectNoise', type='ObjectNoise',
num_try=100, num_try=100,
loc_noise_std=[0, 0, 0], loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0], global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.39269908, 0.39269908]), rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5), dict(type='RandomFlip3D', flip_ratio=0.5),
dict( dict(
type='GlobalRotScale', type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816], rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05], scaling_uniform_noise=[0.95, 1.05]),
trans_normal_noise=[0.2, 0.2, 0.2]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range), dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'), dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names), dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
] ]
test_pipeline = [ test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range), dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
...@@ -148,7 +251,7 @@ test_pipeline = [ ...@@ -148,7 +251,7 @@ test_pipeline = [
type='DefaultFormatBundle3D', type='DefaultFormatBundle3D',
class_names=class_names, class_names=class_names,
with_label=False), with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']), dict(type='Collect3D', keys=['points', 'gt_bboxes_3d'])
] ]
data = dict( data = dict(
...@@ -183,21 +286,19 @@ data = dict( ...@@ -183,21 +286,19 @@ data = dict(
class_names=class_names, class_names=class_names,
with_label=True)) with_label=True))
# optimizer # optimizer
lr = 0.003 # max learning rate lr = 0.001 # max learning rate
optimizer = dict( optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict( lr_config = dict(
policy='cosine', policy='cyclic',
warmup='linear', target_ratio=(10, 1e-4),
warmup_iters=1000, cyclic_times=1,
warmup_ratio=1.0 / 10, step_ratio_up=0.4)
target_lr=1e-5, momentum_config = dict(
as_ratio=True) policy='cyclic',
momentum_config = None target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4)
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
# yapf:disable # yapf:disable
log_config = dict( log_config = dict(
...@@ -209,8 +310,9 @@ log_config = dict( ...@@ -209,8 +310,9 @@ log_config = dict(
# yapf:enable # yapf:enable
# runtime settings # runtime settings
total_epochs = 80 total_epochs = 80
dist_params = dict(backend='nccl', port=29502) dist_params = dict(backend='nccl')
log_level = 'INFO' log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/parta2_secfpn_80e' work_dir = './work_dirs/parta2_secfpn_80e'
load_from = None load_from = None
resume_from = None resume_from = None
......
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='PartA2',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
output_shape=[41, 1600, 1408],
pre_act=False),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
class_name=['Car'],
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
roi_head=dict(
type='PartAggregationROIHead',
num_classes=1,
semantic_head=dict(
type='PointwiseSemanticHead',
in_channels=16,
extra_width=0.2,
seg_score_thr=0.3,
num_classes=1,
loss_seg=dict(
type='FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='max')),
part_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='avg')),
bbox_head=dict(
type='PartA2BboxHead',
num_classes=1,
seg_in_channels=16,
part_in_channels=4,
seg_conv_channels=[64, 64],
part_conv_channels=[64, 64],
merge_conv_channels=[128, 128],
down_conv_channels=[128, 256],
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
shared_fc_channels=[256, 512, 512, 512],
cls_channels=[256, 256],
reg_channels=[256, 256],
dropout_ratio=0.1,
roi_feat_size=14,
with_corner_loss=True,
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
rcnn=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
sampler=dict(
type='IoUNegPiecewiseSampler',
num=128,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True),
cls_pos_thr=0.75,
cls_neg_thr=0.25))
test_cfg = dict(
rpn=dict(
nms_pre=1024,
nms_post=100,
nms_thr=0.7,
score_thr=0,
use_rotate_nms=True),
rcnn=dict(
use_rotate_nms=True, use_raw_score=True, nms_thr=0.01, score_thr=0.1))
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
input_modality = dict(
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15))
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/parta2_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
...@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler, ...@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler, InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult) PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import boxes3d_to_bev_torch_lidar from .transforms import bbox3d2roi, boxes3d_to_bev_torch_lidar
from .assign_sampling import ( # isort:skip, avoid recursive imports from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings build_bbox_coder, # temporally settings
...@@ -22,5 +22,5 @@ __all__ = [ ...@@ -22,5 +22,5 @@ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar', 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes', 'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes' 'CameraInstance3DBoxes', 'bbox3d2roi'
] ]
...@@ -566,3 +566,69 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True): ...@@ -566,3 +566,69 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True):
break break
ret[i, j] = success ret[i, j] = success
return ret return ret
def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (numpy.array): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
numpy.array: box corners with shape (N, 8, 3)
"""
boxes_num = boxes3d.shape[0]
w, l, h = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]
x_corners = np.array(
[w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.],
dtype=np.float32).T
y_corners = np.array(
[-l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2.],
dtype=np.float32).T
if bottom_center:
z_corners = np.zeros((boxes_num, 8), dtype=np.float32)
z_corners[:, 4:8] = h.reshape(boxes_num, 1).repeat(4, axis=1) # (N, 8)
else:
z_corners = np.array([
-h / 2., -h / 2., -h / 2., -h / 2., h / 2., h / 2., h / 2., h / 2.
],
dtype=np.float32).T
ry = boxes3d[:, 6]
zeros, ones = np.zeros(
ry.size, dtype=np.float32), np.ones(
ry.size, dtype=np.float32)
rot_list = np.array([[np.cos(ry), -np.sin(ry), zeros],
[np.sin(ry), np.cos(ry), zeros], [zeros, zeros,
ones]]) # (3, 3, N)
R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3)
temp_corners = np.concatenate((x_corners.reshape(
-1, 8, 1), y_corners.reshape(-1, 8, 1), z_corners.reshape(-1, 8, 1)),
axis=2) # (N, 8, 3)
rotated_corners = np.matmul(temp_corners, R_list) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)
y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)
z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)
corners = np.concatenate(
(x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)),
axis=2)
return corners.astype(np.float32)
...@@ -210,3 +210,70 @@ def enlarge_box3d_lidar(boxes3d, extra_width): ...@@ -210,3 +210,70 @@ def enlarge_box3d_lidar(boxes3d, extra_width):
large_boxes3d[:, 3:6] += extra_width * 2 large_boxes3d[:, 3:6] += extra_width * 2
large_boxes3d[:, 2] -= extra_width # bottom center z minus extra_width large_boxes3d[:, 2] -= extra_width # bottom center z minus extra_width
return large_boxes3d return large_boxes3d
def boxes3d_to_corners3d_lidar_torch(boxes3d, bottom_center=True):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (FloatTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
FloatTensor: box corners with shape (N, 8, 3)
"""
boxes_num = boxes3d.shape[0]
w, l, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
ry = boxes3d[:, 6:7]
zeros = boxes3d.new_zeros(boxes_num, 1)
ones = boxes3d.new_ones(boxes_num, 1)
x_corners = torch.cat(
[w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.],
dim=1) # (N, 8)
y_corners = torch.cat(
[-l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2.],
dim=1) # (N, 8)
if bottom_center:
z_corners = torch.cat([zeros, zeros, zeros, zeros, h, h, h, h],
dim=1) # (N, 8)
else:
z_corners = torch.cat([
-h / 2., -h / 2., -h / 2., -h / 2., h / 2., h / 2., h / 2., h / 2.
],
dim=1) # (N, 8)
temp_corners = torch.cat(
(x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
z_corners.unsqueeze(dim=2)),
dim=2) # (N, 8, 3)
cosa, sina = torch.cos(ry), torch.sin(ry)
raw_1 = torch.cat([cosa, -sina, zeros], dim=1) # (N, 3)
raw_2 = torch.cat([sina, cosa, zeros], dim=1) # (N, 3)
raw_3 = torch.cat([zeros, zeros, ones], dim=1) # (N, 3)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
raw_3.unsqueeze(dim=1)),
dim=1) # (N, 3, 3)
rotated_corners = torch.matmul(temp_corners, R) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
corners = torch.cat((x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)),
dim=2)
return corners
...@@ -88,6 +88,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'): ...@@ -88,6 +88,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
assert bboxes1.size(-1) == bboxes2.size(-1) == 7 assert bboxes1.size(-1) == bboxes2.size(-1) == 7
assert coordinate in ['camera', 'lidar'] assert coordinate in ['camera', 'lidar']
rows = bboxes1.size(0)
cols = bboxes2.size(0)
if rows * cols == 0:
return bboxes1.new(rows, cols)
if coordinate == 'camera': if coordinate == 'camera':
return boxes_iou3d_gpu_camera(bboxes1, bboxes2, mode) return boxes_iou3d_gpu_camera(bboxes1, bboxes2, mode)
elif coordinate == 'lidar': elif coordinate == 'lidar':
......
...@@ -47,3 +47,25 @@ def boxes3d_to_bev_torch_lidar(boxes3d): ...@@ -47,3 +47,25 @@ def boxes3d_to_bev_torch_lidar(boxes3d):
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_w, cv + half_l boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_w, cv + half_l
boxes_bev[:, 4] = boxes3d[:, 6] boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev return boxes_bev
def bbox3d2roi(bbox_list):
"""Convert a list of bboxes to roi format.
Args:
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns:
Tensor: shape (n, c), [batch_ind, x, y ...]
"""
rois_list = []
for img_id, bboxes in enumerate(bbox_list):
if bboxes.size(0) > 0:
img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
rois = torch.cat([img_inds, bboxes], dim=-1)
else:
rois = torch.zeros_like(bboxes)
rois_list.append(rois)
rois = torch.cat(rois_list, 0)
return rois
...@@ -731,7 +731,6 @@ def kitti_eval(gt_annos, ...@@ -731,7 +731,6 @@ def kitti_eval(gt_annos,
result += 'aos AP:{:.2f}, {:.2f}, {:.2f}\n'.format(*mAPaos[:, 0]) result += 'aos AP:{:.2f}, {:.2f}, {:.2f}\n'.format(*mAPaos[:, 0])
# prepare results for logger # prepare results for logger
ret_dict['Overall'] = dict()
for idx in range(3): for idx in range(3):
postfix = f'{difficulty[idx]}' postfix = f'{difficulty[idx]}'
if mAP3d is not None: if mAP3d is not None:
......
...@@ -231,19 +231,15 @@ class PartA2RPNHead(SECONDHead): ...@@ -231,19 +231,15 @@ class PartA2RPNHead(SECONDHead):
labels = labels[inds] labels = labels[inds]
scores = scores[inds] scores = scores[inds]
cls_scores = cls_scores[inds] cls_scores = cls_scores[inds]
dir_scores = dir_scores[inds]
return dict( return dict(
box3d_lidar=bboxes.cpu(), box3d_lidar=bboxes,
scores=scores.cpu(), scores=scores,
label_preds=labels.cpu(), label_preds=labels,
cls_preds=cls_scores.cpu( cls_preds=cls_scores # raw scores [max_num, cls_num]
) # raw scores with shape [max_num, cls_num]
) )
else: else:
return dict( return dict(
box3d_lidar=mlvl_bboxes.new_zeros([0, box3d_lidar=mlvl_bboxes.new_zeros([0, self.box_code_size]),
self.box_code_size]).cpu(), scores=mlvl_bboxes.new_zeros([0]),
scores=mlvl_bboxes.new_zeros([0]).cpu(), label_preds=mlvl_bboxes.new_zeros([0]),
label_preds=mlvl_bboxes.new_zeros([0]).cpu(), cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]
]).cpu())
...@@ -258,9 +258,9 @@ class SECONDHead(nn.Module, AnchorTrainMixin): ...@@ -258,9 +258,9 @@ class SECONDHead(nn.Module, AnchorTrainMixin):
dir_weights_list, dir_weights_list,
num_total_samples=num_total_samples) num_total_samples=num_total_samples)
return dict( return dict(
loss_cls_3d=losses_cls, loss_rpn_cls=losses_cls,
loss_bbox_3d=losses_bbox, loss_rpn_bbox=losses_bbox,
loss_dir_3d=losses_dir) loss_rpn_dir=losses_dir)
def get_bboxes(self, def get_bboxes(self,
cls_scores, cls_scores,
......
...@@ -34,11 +34,13 @@ class PartA2(TwoStageDetector): ...@@ -34,11 +34,13 @@ class PartA2(TwoStageDetector):
self.middle_encoder = builder.build_middle_encoder(middle_encoder) self.middle_encoder = builder.build_middle_encoder(middle_encoder)
def extract_feat(self, points, img_meta): def extract_feat(self, points, img_meta):
voxels, num_points, coors = self.voxelize(points) voxel_dict = self.voxelize(points)
voxel_dict = dict(voxels=voxels, num_points=num_points, coors=coors) voxel_features = self.voxel_encoder(voxel_dict['voxels'],
voxel_features = self.voxel_encoder(voxels, num_points, coors) voxel_dict['num_points'],
batch_size = coors[-1, 0].item() + 1 voxel_dict['coors'])
feats_dict = self.middle_encoder(voxel_features, coors, batch_size) batch_size = voxel_dict['coors'][-1, 0].item() + 1
feats_dict = self.middle_encoder(voxel_features, voxel_dict['coors'],
batch_size)
x = self.backbone(feats_dict['spatial_features']) x = self.backbone(feats_dict['spatial_features'])
if self.with_neck: if self.with_neck:
neck_feats = self.neck(x) neck_feats = self.neck(x)
...@@ -47,20 +49,33 @@ class PartA2(TwoStageDetector): ...@@ -47,20 +49,33 @@ class PartA2(TwoStageDetector):
@torch.no_grad() @torch.no_grad()
def voxelize(self, points): def voxelize(self, points):
voxels, coors, num_points = [], [], [] voxels, coors, num_points, voxel_centers = [], [], [], []
for res in points: for res in points:
res_voxels, res_coors, res_num_points = self.voxel_layer(res) res_voxels, res_coors, res_num_points = self.voxel_layer(res)
res_voxel_centers = (
res_coors[:, [2, 1, 0]] + 0.5) * res_voxels.new_tensor(
self.voxel_layer.voxel_size) + res_voxels.new_tensor(
self.voxel_layer.point_cloud_range[0:3])
voxels.append(res_voxels) voxels.append(res_voxels)
coors.append(res_coors) coors.append(res_coors)
num_points.append(res_num_points) num_points.append(res_num_points)
voxel_centers.append(res_voxel_centers)
voxels = torch.cat(voxels, dim=0) voxels = torch.cat(voxels, dim=0)
num_points = torch.cat(num_points, dim=0) num_points = torch.cat(num_points, dim=0)
voxel_centers = torch.cat(voxel_centers, dim=0)
coors_batch = [] coors_batch = []
for i, coor in enumerate(coors): for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i) coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad) coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0) coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch
voxel_dict = dict(
voxels=voxels,
num_points=num_points,
coors=coors_batch,
voxel_centers=voxel_centers)
return voxel_dict
def forward_train(self, def forward_train(self,
points, points,
...@@ -69,7 +84,6 @@ class PartA2(TwoStageDetector): ...@@ -69,7 +84,6 @@ class PartA2(TwoStageDetector):
gt_labels_3d, gt_labels_3d,
gt_bboxes_ignore=None, gt_bboxes_ignore=None,
proposals=None): proposals=None):
# TODO: complete it
feats_dict, voxels_dict = self.extract_feat(points, img_meta) feats_dict, voxels_dict = self.extract_feat(points, img_meta)
losses = dict() losses = dict()
...@@ -86,7 +100,13 @@ class PartA2(TwoStageDetector): ...@@ -86,7 +100,13 @@ class PartA2(TwoStageDetector):
proposal_inputs = rpn_outs + (img_meta, proposal_cfg) proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs) proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
else: else:
proposal_list = proposals # noqa: F841 proposal_list = proposals
roi_losses = self.roi_head.forward_train(feats_dict, voxels_dict,
img_meta, proposal_list,
gt_bboxes_3d, gt_labels_3d)
losses.update(roi_losses)
return losses return losses
...@@ -102,16 +122,18 @@ class PartA2(TwoStageDetector): ...@@ -102,16 +122,18 @@ class PartA2(TwoStageDetector):
def simple_test(self, def simple_test(self,
points, points,
img_meta, img_meta,
gt_bboxes_3d=None, gt_bboxes_3d,
proposals=None, proposals=None,
rescale=False): rescale=False):
feats_dict, voxels_dict = self.extract_feat(points, img_meta) feats_dict, voxels_dict = self.extract_feat(points, img_meta)
# TODO: complete it
if proposals is None: if self.with_rpn:
proposal_list = self.simple_test_rpn(feats_dict['neck_feats'], rpn_outs = self.rpn_head(feats_dict['neck_feats'])
img_meta, self.test_cfg.rpn) proposal_cfg = self.test_cfg.rpn
bbox_inputs = rpn_outs + (img_meta, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*bbox_inputs)
else: else:
proposal_list = proposals proposal_list = proposals
return self.roi_head.simple_test( return self.roi_head.simple_test(feats_dict, voxels_dict, img_meta,
feats_dict, proposal_list, img_meta, rescale=rescale) proposal_list)
from .base_3droi_head import Base3DRoIHead
from .bbox_heads import PartA2BboxHead
from .mask_heads import PointwiseSemanticHead from .mask_heads import PointwiseSemanticHead
from .part_aggregation_roi_head import PartAggregationROIHead
from .roi_extractors import Single3DRoIAwareExtractor
__all__ = ['PointwiseSemanticHead'] __all__ = [
'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',
'Single3DRoIAwareExtractor', 'PartA2BboxHead'
]
from abc import ABCMeta, abstractmethod
import torch.nn as nn
class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
"""Base class for 3d RoIHeads"""
def __init__(self,
bbox_head=None,
mask_roi_extractor=None,
mask_head=None,
train_cfg=None,
test_cfg=None):
super(Base3DRoIHead, self).__init__()
self.train_cfg = train_cfg
self.test_cfg = test_cfg
if bbox_head is not None:
self.init_bbox_head(bbox_head)
if mask_head is not None:
self.init_mask_head(mask_roi_extractor, mask_head)
self.init_assigner_sampler()
@property
def with_bbox(self):
return hasattr(self, 'bbox_head') and self.bbox_head is not None
@property
def with_mask(self):
return hasattr(self, 'mask_head') and self.mask_head is not None
@abstractmethod
def init_weights(self, pretrained):
pass
@abstractmethod
def init_bbox_head(self):
pass
@abstractmethod
def init_mask_head(self):
pass
@abstractmethod
def init_assigner_sampler(self):
pass
@abstractmethod
def forward_train(self,
x,
img_meta,
proposal_list,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
gt_masks=None,
**kwargs):
"""Forward function during training"""
pass
def simple_test(self,
x,
proposal_list,
img_meta,
proposals=None,
rescale=False,
**kwargs):
"""Test without augmentation."""
pass
def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
pass
...@@ -2,8 +2,9 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead, ...@@ -2,8 +2,9 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
DoubleConvFCBBoxHead, DoubleConvFCBBoxHead,
Shared2FCBBoxHead, Shared2FCBBoxHead,
Shared4Conv1FCBBoxHead) Shared4Conv1FCBBoxHead)
from .parta2_bbox_head import PartA2BboxHead
__all__ = [ __all__ = [
'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead', 'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',
'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead' 'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead'
] ]
This diff is collapsed.
...@@ -126,22 +126,21 @@ class PointwiseSemanticHead(nn.Module): ...@@ -126,22 +126,21 @@ class PointwiseSemanticHead(nn.Module):
part_targets = torch.cat(part_targets, dim=0) part_targets = torch.cat(part_targets, dim=0)
return dict(seg_targets=seg_targets, part_targets=part_targets) return dict(seg_targets=seg_targets, part_targets=part_targets)
def loss(self, seg_preds, part_preds, seg_targets, part_targets): def loss(self, semantic_results, semantic_targets):
"""Calculate point-wise segmentation and part prediction losses. """Calculate point-wise segmentation and part prediction losses.
Args: Args:
seg_preds (torch.Tensor): prediction of binary semantic_results (dict): Results from semantic head.
segmentation with shape [voxel_num, 1]. semantic_targets (dict): Targets of semantic results.
part_preds (torch.Tensor): prediction of part
with shape [voxel_num, 3].
seg_targets (torch.Tensor): target of segmentation
with shape [voxel_num, 1].
part_targets (torch.Tensor): target of part with
shape [voxel_num, 3].
Returns: Returns:
dict: loss of segmentation and part prediction. dict: loss of segmentation and part prediction.
""" """
seg_preds = semantic_results['seg_preds']
part_preds = semantic_results['part_preds']
seg_targets = semantic_targets['seg_targets']
part_targets = semantic_targets['part_targets']
pos_mask = (seg_targets > -1) & (seg_targets < self.num_classes) pos_mask = (seg_targets > -1) & (seg_targets < self.num_classes)
binary_seg_target = pos_mask.long() binary_seg_target = pos_mask.long()
pos = pos_mask.float() pos = pos_mask.float()
......
import torch.nn.functional as F
from mmdet3d.core import AssignResult
from mmdet3d.core.bbox import bbox3d2roi
from mmdet.core import build_assigner, build_sampler
from mmdet.models import HEADS
from ..builder import build_head, build_roi_extractor
from .base_3droi_head import Base3DRoIHead
@HEADS.register_module
class PartAggregationROIHead(Base3DRoIHead):
"""Part aggregation roi head for PartA2"""
def __init__(self,
semantic_head,
num_classes=3,
seg_roi_extractor=None,
part_roi_extractor=None,
bbox_head=None,
train_cfg=None,
test_cfg=None):
super(PartAggregationROIHead, self).__init__(
bbox_head=bbox_head, train_cfg=train_cfg, test_cfg=test_cfg)
self.num_classes = num_classes
assert semantic_head is not None
self.semantic_head = build_head(semantic_head)
if seg_roi_extractor is not None:
self.seg_roi_extractor = build_roi_extractor(seg_roi_extractor)
if part_roi_extractor is not None:
self.part_roi_extractor = build_roi_extractor(part_roi_extractor)
self.init_assigner_sampler()
def init_weights(self, pretrained):
pass
def init_mask_head(self):
pass
def init_bbox_head(self, bbox_head):
self.bbox_head = build_head(bbox_head)
def init_assigner_sampler(self):
self.bbox_assigner = None
self.bbox_sampler = None
if self.train_cfg:
if isinstance(self.train_cfg.assigner, dict):
self.bbox_assigner = build_assigner(self.train_cfg.assigner)
elif isinstance(self.train_cfg.assigner, list):
self.bbox_assigner = [
build_assigner(res) for res in self.train_cfg.assigner
]
self.bbox_sampler = build_sampler(self.train_cfg.sampler)
@property
def with_semantic(self):
return hasattr(self,
'semantic_head') and self.semantic_head is not None
def forward_train(self, feats_dict, voxels_dict, img_meta, proposal_list,
gt_bboxes_3d, gt_labels_3d):
"""Training forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
gt_bboxes_3d (list[FloatTensor]): GT bboxes of each batch.
gt_labels_3d (list[LongTensor]): GT labels of each batch.
Returns:
dict: losses from each head.
"""
losses = dict()
if self.with_semantic:
semantic_results = self._semantic_forward_train(
feats_dict['seg_features'], voxels_dict, gt_bboxes_3d,
gt_labels_3d)
losses.update(semantic_results['loss_semantic'])
sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,
gt_labels_3d)
if self.with_bbox:
bbox_results = self._bbox_forward_train(
feats_dict['seg_features'], semantic_results['part_feats'],
voxels_dict, sample_results)
losses.update(bbox_results['loss_bbox'])
return losses
def simple_test(self, feats_dict, voxels_dict, img_meta, proposal_list,
**kwargs):
"""Simple testing forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
list[dict]: Bbox results of each batch.
"""
assert self.with_bbox, 'Bbox head must be implemented.'
assert self.with_semantic
semantic_results = self.semantic_head(feats_dict['seg_features'])
rois = bbox3d2roi([res['box3d_lidar'] for res in proposal_list])
label_preds = [res['label_preds'] for res in proposal_list]
cls_preds = [res['cls_preds'] for res in proposal_list]
bbox_results = self._bbox_forward(feats_dict['seg_features'],
semantic_results['part_feats'],
voxels_dict, rois)
bbox_list = self.bbox_head.get_bboxes(
rois,
bbox_results['cls_score'],
bbox_results['bbox_pred'],
label_preds,
cls_preds,
img_meta,
cfg=self.test_cfg)
return bbox_list
def _bbox_forward_train(self, seg_feats, part_feats, voxels_dict,
sampling_results):
rois = bbox3d2roi([res.bboxes for res in sampling_results])
bbox_results = self._bbox_forward(seg_feats, part_feats, voxels_dict,
rois)
bbox_targets = self.bbox_head.get_targets(sampling_results,
self.train_cfg)
loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],
bbox_results['bbox_pred'], rois,
*bbox_targets)
bbox_results.update(loss_bbox=loss_bbox)
return bbox_results
def _bbox_forward(self, seg_feats, part_feats, voxels_dict, rois):
pooled_seg_feats = self.seg_roi_extractor(seg_feats,
voxels_dict['voxel_centers'],
voxels_dict['coors'][..., 0],
rois)
pooled_part_feats = self.part_roi_extractor(
part_feats, voxels_dict['voxel_centers'],
voxels_dict['coors'][..., 0], rois)
cls_score, bbox_pred = self.bbox_head(pooled_seg_feats,
pooled_part_feats)
bbox_results = dict(
cls_score=cls_score,
bbox_pred=bbox_pred,
pooled_seg_feats=pooled_seg_feats,
pooled_part_feats=pooled_part_feats)
return bbox_results
def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):
sampling_results = []
# bbox assign
for batch_idx in range(len(proposal_list)):
cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['box3d_lidar']
cur_label_preds = cur_proposal_list['label_preds']
cur_gt_bboxes = gt_bboxes_3d[batch_idx]
cur_gt_labels = gt_labels_3d[batch_idx]
batch_num_gts = 0
batch_gt_indis = cur_gt_labels.new_full((cur_boxes.shape[0], ),
0) # 0 is bg
batch_max_overlaps = cur_boxes.new_zeros(cur_boxes.shape[0])
batch_gt_labels = cur_gt_labels.new_full((cur_boxes.shape[0], ),
-1) # -1 is bg
if isinstance(self.bbox_assigner, list): # for multi classes
for i, assigner in enumerate(self.bbox_assigner):
gt_per_cls = (cur_gt_labels == i)
pred_per_cls = (cur_label_preds == i)
cur_assign_res = assigner.assign(
cur_boxes[pred_per_cls],
cur_gt_bboxes[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls])
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
# gt inds (1-based)
gt_inds_arange_pad = gt_per_cls.nonzero().view(-1) + 1
# pad 0 for indice unassigned
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=0)
# pad -1 for indice ignore
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=-1)
# convert to 0~gt_num+2 for indices
gt_inds_arange_pad += 1
# now 0 is bg, >1 is fg in batch_gt_indis
batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[
cur_assign_res.gt_inds + 1] - 1
batch_max_overlaps[
pred_per_cls] = cur_assign_res.max_overlaps
batch_gt_labels[pred_per_cls] = cur_assign_res.labels
assign_result = AssignResult(batch_num_gts, batch_gt_indis,
batch_max_overlaps,
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_boxes, cur_gt_bboxes, gt_labels=cur_gt_labels)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes,
cur_gt_bboxes,
cur_gt_labels)
sampling_results.append(sampling_result)
return sampling_results
def _semantic_forward_train(self, x, voxels_dict, gt_bboxes_3d,
gt_labels_3d):
semantic_results = self.semantic_head(x)
semantic_targets = self.semantic_head.get_targets(
voxels_dict, gt_bboxes_3d, gt_labels_3d)
loss_semantic = self.semantic_head.loss(semantic_results,
semantic_targets)
semantic_results.update(loss_semantic=loss_semantic)
return semantic_results
...@@ -61,7 +61,10 @@ def test_config_build_detector(): ...@@ -61,7 +61,10 @@ def test_config_build_detector():
assert detector.roi_head.with_mask == detector.with_mask assert detector.roi_head.with_mask == detector.with_mask
head_config = config_mod.model['roi_head'] head_config = config_mod.model['roi_head']
_check_roi_head(head_config, detector.roi_head) if head_config.type == 'PartAggregationROIHead':
check_parta2_roi_head(head_config, detector.roi_head)
else:
_check_roi_head(head_config, detector.roi_head)
# else: # else:
# # for single stage detector # # for single stage detector
# # detectors must have bbox head # # detectors must have bbox head
...@@ -319,3 +322,44 @@ def _check_bbox_head(bbox_cfg, bbox_head): ...@@ -319,3 +322,44 @@ def _check_bbox_head(bbox_cfg, bbox_head):
out_dim = (4 if bbox_cfg.reg_class_agnostic else 4 * out_dim = (4 if bbox_cfg.reg_class_agnostic else 4 *
bbox_cfg.num_classes) bbox_cfg.num_classes)
assert bbox_head.fc_reg.out_features == out_dim assert bbox_head.fc_reg.out_features == out_dim
def check_parta2_roi_head(config, head):
assert config['type'] == head.__class__.__name__
# check seg_roi_extractor
seg_roi_cfg = config.seg_roi_extractor
seg_roi_extractor = head.seg_roi_extractor
_check_parta2_roi_extractor(seg_roi_cfg, seg_roi_extractor)
# check part_roi_extractor
part_roi_cfg = config.part_roi_extractor
part_roi_extractor = head.part_roi_extractor
_check_parta2_roi_extractor(part_roi_cfg, part_roi_extractor)
# check bbox head infos
bbox_cfg = config.bbox_head
bbox_head = head.bbox_head
_check_parta2_bbox_head(bbox_cfg, bbox_head)
def _check_parta2_roi_extractor(config, roi_extractor):
assert config['type'] == roi_extractor.__class__.__name__
assert (config.roi_layer.out_size == roi_extractor.roi_layer.out_size)
assert (config.roi_layer.max_pts_per_voxel ==
roi_extractor.roi_layer.max_pts_per_voxel)
def _check_parta2_bbox_head(bbox_cfg, bbox_head):
import torch.nn as nn
if isinstance(bbox_cfg, list):
for single_bbox_cfg, single_bbox_head in zip(bbox_cfg, bbox_head):
_check_bbox_head(single_bbox_cfg, single_bbox_head)
elif isinstance(bbox_head, nn.ModuleList):
for single_bbox_head in bbox_head:
_check_bbox_head(bbox_cfg, single_bbox_head)
else:
assert bbox_cfg['type'] == bbox_head.__class__.__name__
assert bbox_cfg.seg_in_channels == bbox_head.seg_conv[0][0].in_channels
assert bbox_cfg.part_in_channels == bbox_head.part_conv[0][
0].in_channels
...@@ -103,18 +103,18 @@ def test_second_head_loss(): ...@@ -103,18 +103,18 @@ def test_second_head_loss():
losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes, losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
gt_labels, input_metas) gt_labels, input_metas)
assert losses['loss_cls_3d'][0] > 0 assert losses['loss_rpn_cls'][0] > 0
assert losses['loss_bbox_3d'][0] > 0 assert losses['loss_rpn_bbox'][0] > 0
assert losses['loss_dir_3d'][0] > 0 assert losses['loss_rpn_dir'][0] > 0
# test empty ground truth case # test empty ground truth case
gt_bboxes = list(torch.empty((2, 0, 7)).cuda()) gt_bboxes = list(torch.empty((2, 0, 7)).cuda())
gt_labels = list(torch.empty((2, 0)).cuda()) gt_labels = list(torch.empty((2, 0)).cuda())
empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes, empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
gt_labels, input_metas) gt_labels, input_metas)
assert empty_gt_losses['loss_cls_3d'][0] > 0 assert empty_gt_losses['loss_rpn_cls'][0] > 0
assert empty_gt_losses['loss_bbox_3d'][0] == 0 assert empty_gt_losses['loss_rpn_bbox'][0] == 0
assert empty_gt_losses['loss_dir_3d'][0] == 0 assert empty_gt_losses['loss_rpn_dir'][0] == 0
def test_second_head_getboxes(): def test_second_head_getboxes():
...@@ -147,7 +147,7 @@ def test_parta2_rpnhead_getboxes(): ...@@ -147,7 +147,7 @@ def test_parta2_rpnhead_getboxes():
if not torch.cuda.is_available(): if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda') pytest.skip('test requires GPU and torch+cuda')
rpn_head_cfg, proposal_cfg = _get_rpn_head_cfg( rpn_head_cfg, proposal_cfg = _get_rpn_head_cfg(
'kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py') 'kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py')
from mmdet3d.models.builder import build_head from mmdet3d.models.builder import build_head
self = build_head(rpn_head_cfg) self = build_head(rpn_head_cfg)
......
...@@ -62,9 +62,7 @@ def test_PointwiseSemanticHead(): ...@@ -62,9 +62,7 @@ def test_PointwiseSemanticHead():
[voxel_features.shape[0], 3]) [voxel_features.shape[0], 3])
# test loss # test loss
loss_dict = self.loss(feats_dict['seg_preds'], feats_dict['part_preds'], loss_dict = self.loss(feats_dict, target_dict)
target_dict['seg_targets'],
target_dict['part_targets'])
assert loss_dict['loss_seg'] > 0 assert loss_dict['loss_seg'] > 0
assert loss_dict['loss_part'] == 0 # no points in gt_boxes assert loss_dict['loss_part'] == 0 # no points in gt_boxes
total_loss = loss_dict['loss_seg'] + loss_dict['loss_part'] total_loss = loss_dict['loss_seg'] + loss_dict['loss_part']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment