Commit 885a225b authored by wuyuefeng's avatar wuyuefeng Committed by zhangwenwei
Browse files

Feature parta2 roi

parent 535344de
......@@ -10,13 +10,16 @@ model = dict(
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels
),
voxel_encoder=dict(type='VoxelFeatureExtractorV3'),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
output_shape=[41, 1600, 1408],
pre_act=False,
),
pre_act=False),
backbone=dict(
type='SECOND',
in_channels=256,
......@@ -56,8 +59,65 @@ model = dict(
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2),
))
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
roi_head=dict(
type='PartAggregationROIHead',
num_classes=3,
semantic_head=dict(
type='PointwiseSemanticHead',
in_channels=16,
extra_width=0.2,
seg_score_thr=0.3,
num_classes=3,
loss_seg=dict(
type='FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='max')),
part_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='avg')),
bbox_head=dict(
type='PartA2BboxHead',
num_classes=3,
seg_in_channels=16,
part_in_channels=4,
seg_conv_channels=[64, 64],
part_conv_channels=[64, 64],
merge_conv_channels=[128, 128],
down_conv_channels=[128, 256],
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
shared_fc_channels=[256, 512, 512, 512],
cls_channels=[256, 256],
reg_channels=[256, 256],
dropout_ratio=0.1,
roi_feat_size=14,
with_corner_loss=True,
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
......@@ -82,7 +142,7 @@ train_cfg = dict(
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
ignore_iof_thr=-1)
],
allowed_border=0,
pos_weight=-1,
......@@ -93,24 +153,61 @@ train_cfg = dict(
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
)
rcnn=dict(
assigner=[
dict( # for Pedestrian
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Cyclist
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1)
],
sampler=dict(
type='IoUNegPiecewiseSampler',
num=128,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True),
cls_pos_thr=0.75,
cls_neg_thr=0.25))
test_cfg = dict(
rpn=dict(
nms_pre=1024,
max_per_img=100,
use_rotate_nms=True,
nms_across_levels=False,
nms_post=100,
nms_thr=0.7,
score_thr=0))
score_thr=0,
use_rotate_nms=True),
rcnn=dict(
use_rotate_nms=True, use_raw_score=True, nms_thr=0.01, score_thr=0.1))
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Pedestrian', 'Cyclist', 'Car']
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
input_modality = dict(
use_lidar=True, use_depth=False, use_lidar_intensity=True, use_camera=True)
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
......@@ -119,28 +216,34 @@ db_sampler = dict(
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5, Pedestrian=10, Cyclist=10)),
sample_groups=dict(Car=12, Pedestrian=6, Cyclist=6),
)
filter_by_min_points=dict(
Car=5,
Pedestrian=10,
Cyclist=10,
)),
sample_groups=dict(
Car=12,
Pedestrian=6,
Cyclist=6,
))
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[0, 0, 0],
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.39269908, 0.39269908]),
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05],
trans_normal_noise=[0.2, 0.2, 0.2]),
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d']),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
......@@ -148,7 +251,7 @@ test_pipeline = [
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d']),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d'])
]
data = dict(
......@@ -183,21 +286,19 @@ data = dict(
class_names=class_names,
with_label=True))
# optimizer
lr = 0.003 # max learning rate
optimizer = dict(
type='AdamW',
lr=lr,
betas=(0.95, 0.99), # the momentum is change during training
weight_decay=0.001)
lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cosine',
warmup='linear',
warmup_iters=1000,
warmup_ratio=1.0 / 10,
target_lr=1e-5,
as_ratio=True)
momentum_config = None
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
......@@ -209,8 +310,9 @@ log_config = dict(
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl', port=29502)
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/parta2_secfpn_80e'
load_from = None
resume_from = None
......
# model settings
voxel_size = [0.05, 0.05, 0.1]
point_cloud_range = [0, -40, -3, 70.4, 40, 1] # velodyne coordinates, x, y, z
model = dict(
type='PartA2',
voxel_layer=dict(
max_num_points=5, # max_points_per_voxel
point_cloud_range=point_cloud_range,
voxel_size=voxel_size,
max_voxels=(16000, 40000) # (training, testing) max_coxels
),
voxel_encoder=dict(
type='VoxelFeatureExtractorV3',
num_input_features=4,
num_filters=[4],
with_distance=False),
middle_encoder=dict(
type='SparseUNet',
in_channels=4,
output_shape=[41, 1600, 1408],
pre_act=False),
backbone=dict(
type='SECOND',
in_channels=256,
layer_nums=[5, 5],
layer_strides=[1, 2],
out_channels=[128, 256]),
neck=dict(
type='SECONDFPN',
in_channels=[128, 256],
upsample_strides=[1, 2],
out_channels=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
class_name=['Car'],
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
reshape_out=False),
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(
type='CrossEntropyLoss', use_sigmoid=False, loss_weight=0.2)),
roi_head=dict(
type='PartAggregationROIHead',
num_classes=1,
semantic_head=dict(
type='PointwiseSemanticHead',
in_channels=16,
extra_width=0.2,
seg_score_thr=0.3,
num_classes=1,
loss_seg=dict(
type='FocalLoss',
use_sigmoid=True,
reduction='sum',
gamma=2.0,
alpha=0.25,
loss_weight=1.0),
loss_part=dict(
type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)),
seg_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='max')),
part_roi_extractor=dict(
type='Single3DRoIAwareExtractor',
roi_layer=dict(
type='RoIAwarePool3d',
out_size=14,
max_pts_per_voxel=128,
mode='avg')),
bbox_head=dict(
type='PartA2BboxHead',
num_classes=1,
seg_in_channels=16,
part_in_channels=4,
seg_conv_channels=[64, 64],
part_conv_channels=[64, 64],
merge_conv_channels=[128, 128],
down_conv_channels=[128, 256],
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
shared_fc_channels=[256, 512, 512, 512],
cls_channels=[256, 256],
reg_channels=[256, 256],
dropout_ratio=0.1,
roi_feat_size=14,
with_corner_loss=True,
loss_bbox=dict(
type='SmoothL1Loss',
beta=1.0 / 9.0,
reduction='sum',
loss_weight=1.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0))))
# model training and testing settings
train_cfg = dict(
rpn=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlapsNearest3D'),
pos_iou_thr=0.6,
neg_iou_thr=0.45,
min_pos_iou=0.45,
ignore_iof_thr=-1),
allowed_border=0,
pos_weight=-1,
debug=False),
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
rcnn=dict(
assigner=dict( # for Car
type='MaxIoUAssigner',
iou_calculator=dict(type='BboxOverlaps3D', coordinate='lidar'),
pos_iou_thr=0.55,
neg_iou_thr=0.55,
min_pos_iou=0.55,
ignore_iof_thr=-1),
sampler=dict(
type='IoUNegPiecewiseSampler',
num=128,
pos_fraction=0.55,
neg_piece_fractions=[0.8, 0.2],
neg_iou_piece_thrs=[0.55, 0.1],
neg_pos_ub=-1,
add_gt_as_proposals=False,
return_iou=True),
cls_pos_thr=0.75,
cls_neg_thr=0.25))
test_cfg = dict(
rpn=dict(
nms_pre=1024,
nms_post=100,
nms_thr=0.7,
score_thr=0,
use_rotate_nms=True),
rcnn=dict(
use_rotate_nms=True, use_raw_score=True, nms_thr=0.01, score_thr=0.1))
# dataset settings
dataset_type = 'KittiDataset'
data_root = 'data/kitti/'
class_names = ['Car']
input_modality = dict(
use_lidar=False,
use_lidar_reduced=True,
use_depth=False,
use_lidar_intensity=True,
use_camera=False)
db_sampler = dict(
root_path=data_root,
info_path=data_root + 'kitti_dbinfos_train.pkl',
rate=1.0,
use_road_plane=False,
object_rot_range=[0.0, 0.0],
prepare=dict(
filter_by_difficulty=[-1],
filter_by_min_points=dict(Car=5),
),
sample_groups=dict(Car=15))
train_pipeline = [
dict(type='ObjectSample', db_sampler=db_sampler),
dict(
type='ObjectNoise',
num_try=100,
loc_noise_std=[1.0, 1.0, 0.5],
global_rot_range=[0.0, 0.0],
rot_uniform_noise=[-0.78539816, 0.78539816]),
dict(type='RandomFlip3D', flip_ratio=0.5),
dict(
type='GlobalRotScale',
rot_uniform_noise=[-0.78539816, 0.78539816],
scaling_uniform_noise=[0.95, 1.05]),
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(type='ObjectRangeFilter', point_cloud_range=point_cloud_range),
dict(type='PointShuffle'),
dict(type='DefaultFormatBundle3D', class_names=class_names),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d', 'gt_labels_3d'])
]
test_pipeline = [
dict(type='PointsRangeFilter', point_cloud_range=point_cloud_range),
dict(
type='DefaultFormatBundle3D',
class_names=class_names,
with_label=False),
dict(type='Collect3D', keys=['points', 'gt_bboxes_3d'])
]
data = dict(
samples_per_gpu=2,
workers_per_gpu=2,
train=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_train.pkl',
split='training',
training=True,
pipeline=train_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
val=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='training',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True),
test=dict(
type=dataset_type,
root_path=data_root,
ann_file=data_root + 'kitti_infos_val.pkl',
split='testing',
pipeline=test_pipeline,
modality=input_modality,
class_names=class_names,
with_label=True))
# optimizer
lr = 0.001 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
policy='cyclic',
target_ratio=(10, 1e-4),
cyclic_times=1,
step_ratio_up=0.4)
momentum_config = dict(
policy='cyclic',
target_ratio=(0.85 / 0.95, 1),
cyclic_times=1,
step_ratio_up=0.4)
checkpoint_config = dict(interval=1)
# yapf:disable
log_config = dict(
interval=50,
hooks=[
dict(type='TextLoggerHook'),
dict(type='TensorboardLoggerHook')
])
# yapf:enable
# runtime settings
total_epochs = 80
dist_params = dict(backend='nccl')
log_level = 'INFO'
find_unused_parameters = True
work_dir = './work_dirs/parta2_secfpn_80e'
load_from = None
resume_from = None
workflow = [('train', 1)]
......@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import boxes3d_to_bev_torch_lidar
from .transforms import bbox3d2roi, boxes3d_to_bev_torch_lidar
from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings
......@@ -22,5 +22,5 @@ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes'
'CameraInstance3DBoxes', 'bbox3d2roi'
]
......@@ -566,3 +566,69 @@ def points_in_convex_polygon_jit(points, polygon, clockwise=True):
break
ret[i, j] = success
return ret
def boxes3d_to_corners3d_lidar(boxes3d, bottom_center=True):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (numpy.array): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
numpy.array: box corners with shape (N, 8, 3)
"""
boxes_num = boxes3d.shape[0]
w, l, h = boxes3d[:, 3], boxes3d[:, 4], boxes3d[:, 5]
x_corners = np.array(
[w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.],
dtype=np.float32).T
y_corners = np.array(
[-l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2.],
dtype=np.float32).T
if bottom_center:
z_corners = np.zeros((boxes_num, 8), dtype=np.float32)
z_corners[:, 4:8] = h.reshape(boxes_num, 1).repeat(4, axis=1) # (N, 8)
else:
z_corners = np.array([
-h / 2., -h / 2., -h / 2., -h / 2., h / 2., h / 2., h / 2., h / 2.
],
dtype=np.float32).T
ry = boxes3d[:, 6]
zeros, ones = np.zeros(
ry.size, dtype=np.float32), np.ones(
ry.size, dtype=np.float32)
rot_list = np.array([[np.cos(ry), -np.sin(ry), zeros],
[np.sin(ry), np.cos(ry), zeros], [zeros, zeros,
ones]]) # (3, 3, N)
R_list = np.transpose(rot_list, (2, 0, 1)) # (N, 3, 3)
temp_corners = np.concatenate((x_corners.reshape(
-1, 8, 1), y_corners.reshape(-1, 8, 1), z_corners.reshape(-1, 8, 1)),
axis=2) # (N, 8, 3)
rotated_corners = np.matmul(temp_corners, R_list) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.reshape(-1, 1) + x_corners.reshape(-1, 8)
y = y_loc.reshape(-1, 1) + y_corners.reshape(-1, 8)
z = z_loc.reshape(-1, 1) + z_corners.reshape(-1, 8)
corners = np.concatenate(
(x.reshape(-1, 8, 1), y.reshape(-1, 8, 1), z.reshape(-1, 8, 1)),
axis=2)
return corners.astype(np.float32)
......@@ -210,3 +210,70 @@ def enlarge_box3d_lidar(boxes3d, extra_width):
large_boxes3d[:, 3:6] += extra_width * 2
large_boxes3d[:, 2] -= extra_width # bottom center z minus extra_width
return large_boxes3d
def boxes3d_to_corners3d_lidar_torch(boxes3d, bottom_center=True):
"""convert kitti center boxes to corners
7 -------- 4
/| /|
6 -------- 5 .
| | | |
. 3 -------- 0
|/ |/
2 -------- 1
Args:
boxes3d (FloatTensor): (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords,
see the definition of ry in KITTI dataset
bottom_center (bool): whether z is on the bottom center of object.
Returns:
FloatTensor: box corners with shape (N, 8, 3)
"""
boxes_num = boxes3d.shape[0]
w, l, h = boxes3d[:, 3:4], boxes3d[:, 4:5], boxes3d[:, 5:6]
ry = boxes3d[:, 6:7]
zeros = boxes3d.new_zeros(boxes_num, 1)
ones = boxes3d.new_ones(boxes_num, 1)
x_corners = torch.cat(
[w / 2., -w / 2., -w / 2., w / 2., w / 2., -w / 2., -w / 2., w / 2.],
dim=1) # (N, 8)
y_corners = torch.cat(
[-l / 2., -l / 2., l / 2., l / 2., -l / 2., -l / 2., l / 2., l / 2.],
dim=1) # (N, 8)
if bottom_center:
z_corners = torch.cat([zeros, zeros, zeros, zeros, h, h, h, h],
dim=1) # (N, 8)
else:
z_corners = torch.cat([
-h / 2., -h / 2., -h / 2., -h / 2., h / 2., h / 2., h / 2., h / 2.
],
dim=1) # (N, 8)
temp_corners = torch.cat(
(x_corners.unsqueeze(dim=2), y_corners.unsqueeze(dim=2),
z_corners.unsqueeze(dim=2)),
dim=2) # (N, 8, 3)
cosa, sina = torch.cos(ry), torch.sin(ry)
raw_1 = torch.cat([cosa, -sina, zeros], dim=1) # (N, 3)
raw_2 = torch.cat([sina, cosa, zeros], dim=1) # (N, 3)
raw_3 = torch.cat([zeros, zeros, ones], dim=1) # (N, 3)
R = torch.cat((raw_1.unsqueeze(dim=1), raw_2.unsqueeze(dim=1),
raw_3.unsqueeze(dim=1)),
dim=1) # (N, 3, 3)
rotated_corners = torch.matmul(temp_corners, R) # (N, 8, 3)
x_corners = rotated_corners[:, :, 0]
y_corners = rotated_corners[:, :, 1]
z_corners = rotated_corners[:, :, 2]
x_loc, y_loc, z_loc = boxes3d[:, 0], boxes3d[:, 1], boxes3d[:, 2]
x = x_loc.view(-1, 1) + x_corners.view(-1, 8)
y = y_loc.view(-1, 1) + y_corners.view(-1, 8)
z = z_loc.view(-1, 1) + z_corners.view(-1, 8)
corners = torch.cat((x.view(-1, 8, 1), y.view(-1, 8, 1), z.view(-1, 8, 1)),
dim=2)
return corners
......@@ -88,6 +88,11 @@ def bbox_overlaps_3d(bboxes1, bboxes2, mode='iou', coordinate='camera'):
assert bboxes1.size(-1) == bboxes2.size(-1) == 7
assert coordinate in ['camera', 'lidar']
rows = bboxes1.size(0)
cols = bboxes2.size(0)
if rows * cols == 0:
return bboxes1.new(rows, cols)
if coordinate == 'camera':
return boxes_iou3d_gpu_camera(bboxes1, bboxes2, mode)
elif coordinate == 'lidar':
......
......@@ -47,3 +47,25 @@ def boxes3d_to_bev_torch_lidar(boxes3d):
boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_w, cv + half_l
boxes_bev[:, 4] = boxes3d[:, 6]
return boxes_bev
def bbox3d2roi(bbox_list):
"""Convert a list of bboxes to roi format.
Args:
bbox_list (list[Tensor]): a list of bboxes corresponding to a batch
of images.
Returns:
Tensor: shape (n, c), [batch_ind, x, y ...]
"""
rois_list = []
for img_id, bboxes in enumerate(bbox_list):
if bboxes.size(0) > 0:
img_inds = bboxes.new_full((bboxes.size(0), 1), img_id)
rois = torch.cat([img_inds, bboxes], dim=-1)
else:
rois = torch.zeros_like(bboxes)
rois_list.append(rois)
rois = torch.cat(rois_list, 0)
return rois
......@@ -731,7 +731,6 @@ def kitti_eval(gt_annos,
result += 'aos AP:{:.2f}, {:.2f}, {:.2f}\n'.format(*mAPaos[:, 0])
# prepare results for logger
ret_dict['Overall'] = dict()
for idx in range(3):
postfix = f'{difficulty[idx]}'
if mAP3d is not None:
......
......@@ -231,19 +231,15 @@ class PartA2RPNHead(SECONDHead):
labels = labels[inds]
scores = scores[inds]
cls_scores = cls_scores[inds]
dir_scores = dir_scores[inds]
return dict(
box3d_lidar=bboxes.cpu(),
scores=scores.cpu(),
label_preds=labels.cpu(),
cls_preds=cls_scores.cpu(
) # raw scores with shape [max_num, cls_num]
box3d_lidar=bboxes,
scores=scores,
label_preds=labels,
cls_preds=cls_scores # raw scores [max_num, cls_num]
)
else:
return dict(
box3d_lidar=mlvl_bboxes.new_zeros([0,
self.box_code_size]).cpu(),
scores=mlvl_bboxes.new_zeros([0]).cpu(),
label_preds=mlvl_bboxes.new_zeros([0]).cpu(),
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]
]).cpu())
box3d_lidar=mlvl_bboxes.new_zeros([0, self.box_code_size]),
scores=mlvl_bboxes.new_zeros([0]),
label_preds=mlvl_bboxes.new_zeros([0]),
cls_preds=mlvl_bboxes.new_zeros([0, mlvl_cls_score.shape[-1]]))
......@@ -258,9 +258,9 @@ class SECONDHead(nn.Module, AnchorTrainMixin):
dir_weights_list,
num_total_samples=num_total_samples)
return dict(
loss_cls_3d=losses_cls,
loss_bbox_3d=losses_bbox,
loss_dir_3d=losses_dir)
loss_rpn_cls=losses_cls,
loss_rpn_bbox=losses_bbox,
loss_rpn_dir=losses_dir)
def get_bboxes(self,
cls_scores,
......
......@@ -34,11 +34,13 @@ class PartA2(TwoStageDetector):
self.middle_encoder = builder.build_middle_encoder(middle_encoder)
def extract_feat(self, points, img_meta):
voxels, num_points, coors = self.voxelize(points)
voxel_dict = dict(voxels=voxels, num_points=num_points, coors=coors)
voxel_features = self.voxel_encoder(voxels, num_points, coors)
batch_size = coors[-1, 0].item() + 1
feats_dict = self.middle_encoder(voxel_features, coors, batch_size)
voxel_dict = self.voxelize(points)
voxel_features = self.voxel_encoder(voxel_dict['voxels'],
voxel_dict['num_points'],
voxel_dict['coors'])
batch_size = voxel_dict['coors'][-1, 0].item() + 1
feats_dict = self.middle_encoder(voxel_features, voxel_dict['coors'],
batch_size)
x = self.backbone(feats_dict['spatial_features'])
if self.with_neck:
neck_feats = self.neck(x)
......@@ -47,20 +49,33 @@ class PartA2(TwoStageDetector):
@torch.no_grad()
def voxelize(self, points):
voxels, coors, num_points = [], [], []
voxels, coors, num_points, voxel_centers = [], [], [], []
for res in points:
res_voxels, res_coors, res_num_points = self.voxel_layer(res)
res_voxel_centers = (
res_coors[:, [2, 1, 0]] + 0.5) * res_voxels.new_tensor(
self.voxel_layer.voxel_size) + res_voxels.new_tensor(
self.voxel_layer.point_cloud_range[0:3])
voxels.append(res_voxels)
coors.append(res_coors)
num_points.append(res_num_points)
voxel_centers.append(res_voxel_centers)
voxels = torch.cat(voxels, dim=0)
num_points = torch.cat(num_points, dim=0)
voxel_centers = torch.cat(voxel_centers, dim=0)
coors_batch = []
for i, coor in enumerate(coors):
coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
coors_batch.append(coor_pad)
coors_batch = torch.cat(coors_batch, dim=0)
return voxels, num_points, coors_batch
voxel_dict = dict(
voxels=voxels,
num_points=num_points,
coors=coors_batch,
voxel_centers=voxel_centers)
return voxel_dict
def forward_train(self,
points,
......@@ -69,7 +84,6 @@ class PartA2(TwoStageDetector):
gt_labels_3d,
gt_bboxes_ignore=None,
proposals=None):
# TODO: complete it
feats_dict, voxels_dict = self.extract_feat(points, img_meta)
losses = dict()
......@@ -86,7 +100,13 @@ class PartA2(TwoStageDetector):
proposal_inputs = rpn_outs + (img_meta, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*proposal_inputs)
else:
proposal_list = proposals # noqa: F841
proposal_list = proposals
roi_losses = self.roi_head.forward_train(feats_dict, voxels_dict,
img_meta, proposal_list,
gt_bboxes_3d, gt_labels_3d)
losses.update(roi_losses)
return losses
......@@ -102,16 +122,18 @@ class PartA2(TwoStageDetector):
def simple_test(self,
points,
img_meta,
gt_bboxes_3d=None,
gt_bboxes_3d,
proposals=None,
rescale=False):
feats_dict, voxels_dict = self.extract_feat(points, img_meta)
# TODO: complete it
if proposals is None:
proposal_list = self.simple_test_rpn(feats_dict['neck_feats'],
img_meta, self.test_cfg.rpn)
if self.with_rpn:
rpn_outs = self.rpn_head(feats_dict['neck_feats'])
proposal_cfg = self.test_cfg.rpn
bbox_inputs = rpn_outs + (img_meta, proposal_cfg)
proposal_list = self.rpn_head.get_bboxes(*bbox_inputs)
else:
proposal_list = proposals
return self.roi_head.simple_test(
feats_dict, proposal_list, img_meta, rescale=rescale)
return self.roi_head.simple_test(feats_dict, voxels_dict, img_meta,
proposal_list)
from .base_3droi_head import Base3DRoIHead
from .bbox_heads import PartA2BboxHead
from .mask_heads import PointwiseSemanticHead
from .part_aggregation_roi_head import PartAggregationROIHead
from .roi_extractors import Single3DRoIAwareExtractor
__all__ = ['PointwiseSemanticHead']
__all__ = [
'Base3DRoIHead', 'PartAggregationROIHead', 'PointwiseSemanticHead',
'Single3DRoIAwareExtractor', 'PartA2BboxHead'
]
from abc import ABCMeta, abstractmethod
import torch.nn as nn
class Base3DRoIHead(nn.Module, metaclass=ABCMeta):
"""Base class for 3d RoIHeads"""
def __init__(self,
bbox_head=None,
mask_roi_extractor=None,
mask_head=None,
train_cfg=None,
test_cfg=None):
super(Base3DRoIHead, self).__init__()
self.train_cfg = train_cfg
self.test_cfg = test_cfg
if bbox_head is not None:
self.init_bbox_head(bbox_head)
if mask_head is not None:
self.init_mask_head(mask_roi_extractor, mask_head)
self.init_assigner_sampler()
@property
def with_bbox(self):
return hasattr(self, 'bbox_head') and self.bbox_head is not None
@property
def with_mask(self):
return hasattr(self, 'mask_head') and self.mask_head is not None
@abstractmethod
def init_weights(self, pretrained):
pass
@abstractmethod
def init_bbox_head(self):
pass
@abstractmethod
def init_mask_head(self):
pass
@abstractmethod
def init_assigner_sampler(self):
pass
@abstractmethod
def forward_train(self,
x,
img_meta,
proposal_list,
gt_bboxes,
gt_labels,
gt_bboxes_ignore=None,
gt_masks=None,
**kwargs):
"""Forward function during training"""
pass
def simple_test(self,
x,
proposal_list,
img_meta,
proposals=None,
rescale=False,
**kwargs):
"""Test without augmentation."""
pass
def aug_test(self, x, proposal_list, img_metas, rescale=False, **kwargs):
"""Test with augmentations.
If rescale is False, then returned bboxes and masks will fit the scale
of imgs[0].
"""
pass
......@@ -2,8 +2,9 @@ from mmdet.models.roi_heads.bbox_heads import (BBoxHead, ConvFCBBoxHead,
DoubleConvFCBBoxHead,
Shared2FCBBoxHead,
Shared4Conv1FCBBoxHead)
from .parta2_bbox_head import PartA2BboxHead
__all__ = [
'BBoxHead', 'ConvFCBBoxHead', 'Shared2FCBBoxHead',
'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead'
'Shared4Conv1FCBBoxHead', 'DoubleConvFCBBoxHead', 'PartA2BboxHead'
]
import numpy as np
import torch
import torch.nn as nn
from mmcv.cnn import ConvModule, build_norm_layer, normal_init, xavier_init
import mmdet3d.ops.spconv as spconv
from mmdet3d.core import build_bbox_coder, multi_apply
from mmdet3d.core.bbox import box_torch_ops
from mmdet3d.models.builder import build_loss
from mmdet3d.ops.iou3d.iou3d_utils import (boxes3d_to_bev_torch_lidar, nms_gpu,
nms_normal_gpu)
from mmdet.models import HEADS
@HEADS.register_module()
class PartA2BboxHead(nn.Module):
"""PartA2 rcnn box head.
Args:
num_classes (int): The number of classes to prediction.
seg_in_channels (int): Input channels of segmentation
convolution layer.
part_in_channels (int): Input channels of part convolution layer.
seg_conv_channels (list(int)): Out channels of each
segmentation convolution layer.
part_conv_channels (list(int)): Out channels of each
part convolution layer.
merge_conv_channels (list(int)): Out channels of each
feature merged convolution layer.
down_conv_channels (list(int)): Out channels of each
downsampled convolution layer.
shared_fc_channels (list(int)): Out channels of each shared fc layer.
cls_channels (list(int)): Out channels of each classification layer.
reg_channels (list(int)): Out channels of each regression layer.
dropout_ratio (float): Dropout ratio of classification and
regression layers.
roi_feat_size (int): The size of pooled roi features.
with_corner_loss (bool): Whether to use corner loss or not.
bbox_coder (BaseBBoxCoder): Bbox coder for box head.
conv_cfg (dict): Config dict of convolutional layers
norm_cfg (dict): Config dict of normalization layers
loss_bbox (dict): Config dict of box regression loss.
loss_cls (dict): Config dict of classifacation loss.
"""
def __init__(self,
num_classes,
seg_in_channels,
part_in_channels,
seg_conv_channels=None,
part_conv_channels=None,
merge_conv_channels=None,
down_conv_channels=None,
shared_fc_channels=None,
cls_channels=None,
reg_channels=None,
dropout_ratio=0.1,
roi_feat_size=14,
with_corner_loss=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
conv_cfg=dict(type='Conv1d'),
norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
loss_bbox=dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='none',
loss_weight=1.0)):
super(PartA2BboxHead, self).__init__()
self.num_classes = num_classes
self.with_corner_loss = with_corner_loss
self.bbox_coder = build_bbox_coder(bbox_coder)
self.loss_bbox = build_loss(loss_bbox)
self.loss_cls = build_loss(loss_cls)
self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
assert down_conv_channels[-1] == shared_fc_channels[0]
# init layers
block = self.post_act_block
part_channel_last = part_in_channels
part_conv = []
for i, channel in enumerate(part_conv_channels):
part_conv.append(
block(
part_channel_last,
channel,
3,
padding=1,
norm_cfg=norm_cfg,
indice_key=f'rcnn_part{i}'))
part_channel_last = channel
self.part_conv = spconv.SparseSequential(*part_conv)
seg_channel_last = seg_in_channels
seg_conv = []
for i, channel in enumerate(seg_conv_channels):
seg_conv.append(
block(
seg_channel_last,
channel,
3,
padding=1,
norm_cfg=norm_cfg,
indice_key=f'rcnn_seg{i}'))
seg_channel_last = channel
self.seg_conv = spconv.SparseSequential(*seg_conv)
self.conv_down = spconv.SparseSequential()
merge_conv_channel_last = part_channel_last + seg_channel_last
merge_conv = []
for i, channel in enumerate(merge_conv_channels):
merge_conv.append(
block(
merge_conv_channel_last,
channel,
3,
padding=1,
norm_cfg=norm_cfg,
indice_key=f'rcnn_down0'))
merge_conv_channel_last = channel
down_conv_channel_last = merge_conv_channel_last
conv_down = []
for i, channel in enumerate(down_conv_channels):
conv_down.append(
block(
down_conv_channel_last,
channel,
3,
padding=1,
norm_cfg=norm_cfg,
indice_key=f'rcnn_down1'))
down_conv_channel_last = channel
self.conv_down.add_module('merge_conv',
spconv.SparseSequential(*merge_conv))
self.conv_down.add_module(
'max_pool3d', spconv.SparseMaxPool3d(kernel_size=2, stride=2))
self.conv_down.add_module('down_conv',
spconv.SparseSequential(*conv_down))
shared_fc_list = []
pool_size = roi_feat_size // 2
pre_channel = shared_fc_channels[0] * pool_size**3
for k in range(1, len(shared_fc_channels)):
shared_fc_list.append(
ConvModule(
pre_channel,
shared_fc_channels[k],
1,
padding=0,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
inplace=True))
pre_channel = shared_fc_channels[k]
if k != len(shared_fc_channels) - 1 and dropout_ratio > 0:
shared_fc_list.append(nn.Dropout(dropout_ratio))
self.shared_fc = nn.Sequential(*shared_fc_list)
# Classification layer
channel_in = shared_fc_channels[-1]
cls_channel = 1
cls_layers = []
pre_channel = channel_in
for k in range(0, len(cls_channels)):
cls_layers.append(
ConvModule(
pre_channel,
cls_channels[k],
1,
padding=0,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
inplace=True))
pre_channel = cls_channels[k]
cls_layers.append(
ConvModule(
pre_channel,
cls_channel,
1,
padding=0,
conv_cfg=conv_cfg,
act_cfg=None))
if dropout_ratio >= 0:
cls_layers.insert(1, nn.Dropout(dropout_ratio))
self.conv_cls = nn.Sequential(*cls_layers)
# Regression layer
reg_layers = []
pre_channel = channel_in
for k in range(0, len(reg_channels)):
reg_layers.append(
ConvModule(
pre_channel,
reg_channels[k],
1,
padding=0,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
inplace=True))
pre_channel = reg_channels[k]
reg_layers.append(
ConvModule(
pre_channel,
self.bbox_coder.code_size,
1,
padding=0,
conv_cfg=conv_cfg,
act_cfg=None))
if dropout_ratio >= 0:
reg_layers.insert(1, nn.Dropout(dropout_ratio))
self.conv_reg = nn.Sequential(*reg_layers)
self.init_weights()
def init_weights(self):
for m in self.modules():
if isinstance(m, (nn.Conv2d, nn.Conv1d)):
xavier_init(m, distribution='uniform')
normal_init(self.conv_reg[-1].conv, mean=0, std=0.001)
def post_act_block(self,
in_channels,
out_channels,
kernel_size,
indice_key,
stride=1,
padding=0,
conv_type='subm',
norm_cfg=None):
"""Make post activate sparse convolution block.
Args:
in_channels (int): the number of input channels
out_channels (int): the number of out channels
kernel_size (int): kernel size of convolution
indice_key (str): the indice key used for sparse tensor
stride (int): the stride of convolution
padding (int or list[int]): the padding number of input
conv_type (str): conv type in 'subm', 'spconv' or 'inverseconv'
norm_cfg (dict[str]): config of normalization layer
Returns:
spconv.SparseSequential: post activate sparse convolution block.
"""
# TODO: clean post_act_block by existing bottlnecks.
assert conv_type in ['subm', 'spconv', 'inverseconv']
if conv_type == 'subm':
m = spconv.SparseSequential(
spconv.SubMConv3d(
in_channels,
out_channels,
kernel_size,
bias=False,
indice_key=indice_key),
build_norm_layer(norm_cfg, out_channels)[1],
nn.ReLU(inplace=True))
elif conv_type == 'spconv':
m = spconv.SparseSequential(
spconv.SparseConv3d(
in_channels,
out_channels,
kernel_size,
stride=stride,
padding=padding,
bias=False,
indice_key=indice_key),
build_norm_layer(norm_cfg, out_channels)[1],
nn.ReLU(inplace=True))
elif conv_type == 'inverseconv':
m = spconv.SparseSequential(
spconv.SparseInverseConv3d(
in_channels,
out_channels,
kernel_size,
bias=False,
indice_key=indice_key),
build_norm_layer(norm_cfg, out_channels)[1],
nn.ReLU(inplace=True))
else:
raise NotImplementedError
return m
def forward(self, seg_feats, part_feats):
# (B * N, out_x, out_y, out_z, 4)
rcnn_batch_size = part_feats.shape[0]
# transform to sparse tensors
sparse_shape = part_feats.shape[1:4]
# (non_empty_num, 4) ==> [bs_idx, x_idx, y_idx, z_idx]
sparse_idx = part_feats.sum(dim=-1).nonzero()
part_features = part_feats[sparse_idx[:, 0], sparse_idx[:, 1],
sparse_idx[:, 2], sparse_idx[:, 3]]
seg_features = seg_feats[sparse_idx[:, 0], sparse_idx[:, 1],
sparse_idx[:, 2], sparse_idx[:, 3]]
coords = sparse_idx.int()
part_features = spconv.SparseConvTensor(part_features, coords,
sparse_shape, rcnn_batch_size)
seg_features = spconv.SparseConvTensor(seg_features, coords,
sparse_shape, rcnn_batch_size)
# forward rcnn network
x_part = self.part_conv(part_features)
x_rpn = self.seg_conv(seg_features)
merged_feature = torch.cat((x_rpn.features, x_part.features),
dim=1) # (N, C)
shared_feature = spconv.SparseConvTensor(merged_feature, coords,
sparse_shape, rcnn_batch_size)
x = self.conv_down(shared_feature)
shared_feature = x.dense().view(rcnn_batch_size, -1, 1)
shared_feature = self.shared_fc(shared_feature)
cls_score = self.conv_cls(shared_feature).transpose(
1, 2).contiguous().squeeze(dim=1) # (B, 1)
bbox_pred = self.conv_reg(shared_feature).transpose(
1, 2).contiguous().squeeze(dim=1) # (B, C)
return cls_score, bbox_pred
def loss(self, cls_score, bbox_pred, rois, labels, bbox_targets,
pos_gt_bboxes, reg_mask, label_weights, bbox_weights):
losses = dict()
rcnn_batch_size = cls_score.shape[0]
# calculate class loss
cls_flat = cls_score.view(-1)
loss_cls = self.loss_cls(cls_flat, labels, label_weights)
losses['loss_cls'] = loss_cls
# calculate regression loss
code_size = self.bbox_coder.code_size
pos_inds = (reg_mask > 0)
if pos_inds.any() == 0:
# fake a part loss
losses['loss_bbox'] = loss_cls.new_tensor(0)
if self.with_corner_loss:
losses['loss_corner'] = loss_cls.new_tensor(0)
else:
pos_bbox_pred = bbox_pred.view(rcnn_batch_size, -1)[pos_inds]
bbox_weights_flat = bbox_weights[pos_inds].view(-1, 1).repeat(
1, pos_bbox_pred.shape[-1])
loss_bbox = self.loss_bbox(
pos_bbox_pred.unsqueeze(dim=0), bbox_targets.unsqueeze(dim=0),
bbox_weights_flat.unsqueeze(dim=0))
losses['loss_bbox'] = loss_bbox
if self.with_corner_loss:
pos_roi_boxes3d = rois[..., 1:].view(-1, code_size)[pos_inds]
pos_roi_boxes3d = pos_roi_boxes3d.view(-1, code_size)
batch_anchors = pos_roi_boxes3d.clone().detach()
pos_rois_rotation = pos_roi_boxes3d[..., 6].view(-1)
roi_xyz = pos_roi_boxes3d[..., 0:3].view(-1, 3)
batch_anchors[..., 0:3] = 0
# decode boxes
pred_boxes3d = self.bbox_coder.decode(
batch_anchors,
pos_bbox_pred.view(-1, code_size)).view(-1, code_size)
pred_boxes3d[..., 0:3] = box_torch_ops.rotation_3d_in_axis(
pred_boxes3d[..., 0:3].unsqueeze(1),
(pos_rois_rotation + np.pi / 2),
axis=2).squeeze(1)
pred_boxes3d[:, 0:3] += roi_xyz
# calculate corner loss
loss_corner = self.get_corner_loss_lidar(
pred_boxes3d, pos_gt_bboxes)
losses['loss_corner'] = loss_corner
return losses
def get_targets(self, sampling_results, rcnn_train_cfg, concat=True):
pos_bboxes_list = [res.pos_bboxes for res in sampling_results]
pos_gt_bboxes_list = [res.pos_gt_bboxes for res in sampling_results]
iou_list = [res.iou for res in sampling_results]
targets = multi_apply(
self._get_target_single,
pos_bboxes_list,
pos_gt_bboxes_list,
iou_list,
cfg=rcnn_train_cfg)
(label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights) = targets
if concat:
label = torch.cat(label, 0)
bbox_targets = torch.cat(bbox_targets, 0)
pos_gt_bboxes = torch.cat(pos_gt_bboxes, 0)
reg_mask = torch.cat(reg_mask, 0)
label_weights = torch.cat(label_weights, 0)
label_weights /= torch.clamp(label_weights.sum(), min=1.0)
bbox_weights = torch.cat(bbox_weights, 0)
bbox_weights /= torch.clamp(bbox_weights.sum(), min=1.0)
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def _get_target_single(self, pos_bboxes, pos_gt_bboxes, ious, cfg):
cls_pos_mask = ious > cfg.cls_pos_thr
cls_neg_mask = ious < cfg.cls_neg_thr
interval_mask = (cls_pos_mask == 0) & (cls_neg_mask == 0)
# iou regression target
label = (cls_pos_mask > 0).float()
label[interval_mask] = ious[interval_mask] * 2 - 0.5
# label weights
label_weights = (label >= 0).float()
# box regression target
reg_mask = pos_bboxes.new_zeros(ious.size(0)).long()
reg_mask[0:pos_gt_bboxes.size(0)] = 1
bbox_weights = (reg_mask > 0).float()
if reg_mask.bool().any():
pos_gt_bboxes_ct = pos_gt_bboxes.clone().detach()
roi_center = pos_bboxes[..., 0:3]
roi_ry = pos_bboxes[..., 6] % (2 * np.pi)
# canonical transformation
pos_gt_bboxes_ct[..., 0:3] -= roi_center
pos_gt_bboxes_ct[..., 6] -= roi_ry
pos_gt_bboxes_ct[..., 0:3] = box_torch_ops.rotation_3d_in_axis(
pos_gt_bboxes_ct[..., 0:3].unsqueeze(1),
-(roi_ry + np.pi / 2),
axis=2).squeeze(1)
# flip orientation if rois have opposite orientation
ry_label = pos_gt_bboxes_ct[..., 6] % (2 * np.pi) # 0 ~ 2pi
opposite_flag = (ry_label > np.pi * 0.5) & (ry_label < np.pi * 1.5)
ry_label[opposite_flag] = (ry_label[opposite_flag] + np.pi) % (
2 * np.pi) # (0 ~ pi/2, 3pi/2 ~ 2pi)
flag = ry_label > np.pi
ry_label[flag] = ry_label[flag] - np.pi * 2 # (-pi/2, pi/2)
ry_label = torch.clamp(ry_label, min=-np.pi / 2, max=np.pi / 2)
pos_gt_bboxes_ct[..., 6] = ry_label
rois_anchor = pos_bboxes.clone().detach()
rois_anchor[:, 0:3] = 0
rois_anchor[:, 6] = 0
bbox_targets = self.bbox_coder.encode(rois_anchor,
pos_gt_bboxes_ct)
else:
# no fg bbox
bbox_targets = pos_gt_bboxes.new_empty((0, 7))
return (label, bbox_targets, pos_gt_bboxes, reg_mask, label_weights,
bbox_weights)
def get_corner_loss_lidar(self, pred_bbox3d, gt_bbox3d, delta=1):
"""Calculate corner loss of given boxes.
Args:
pred_bbox3d (FloatTensor): predicted boxes with shape (N, 7).
gt_bbox3d (FloatTensor): gt boxes with shape (N, 7).
Returns:
FloatTensor: Calculated corner loss with shape (N).
"""
assert pred_bbox3d.shape[0] == gt_bbox3d.shape[0]
pred_box_corners = box_torch_ops.boxes3d_to_corners3d_lidar_torch(
pred_bbox3d)
gt_box_corners = box_torch_ops.boxes3d_to_corners3d_lidar_torch(
gt_bbox3d)
gt_bbox3d_flip = gt_bbox3d.clone()
gt_bbox3d_flip[:, 6] += np.pi
gt_box_corners_flip = box_torch_ops.boxes3d_to_corners3d_lidar_torch(
gt_bbox3d_flip)
corner_dist = torch.min(
torch.norm(pred_box_corners - gt_box_corners, dim=2),
torch.norm(pred_box_corners - gt_box_corners_flip,
dim=2)) # (N, 8)
# huber loss
abs_error = torch.abs(corner_dist)
quadratic = torch.clamp(abs_error, max=delta)
linear = (abs_error - quadratic)
corner_loss = 0.5 * quadratic**2 + delta * linear
return corner_loss.mean(dim=1)
def get_bboxes(self,
rois,
cls_score,
bbox_pred,
class_labels,
class_pred,
img_meta,
cfg=None):
roi_batch_id = rois[..., 0]
roi_boxes = rois[..., 1:] # boxes without batch id
batch_size = int(roi_batch_id.max().item() + 1)
# decode boxes
roi_ry = roi_boxes[..., 6].view(-1)
roi_xyz = roi_boxes[..., 0:3].view(-1, 3)
local_roi_boxes = roi_boxes.clone().detach()
local_roi_boxes[..., 0:3] = 0
rcnn_boxes3d = self.bbox_coder.decode(local_roi_boxes, bbox_pred)
rcnn_boxes3d[..., 0:3] = box_torch_ops.rotation_3d_in_axis(
rcnn_boxes3d[..., 0:3].unsqueeze(1), (roi_ry + np.pi / 2),
axis=2).squeeze(1)
rcnn_boxes3d[:, 0:3] += roi_xyz
# post processing
result_list = []
for batch_id in range(batch_size):
cur_class_labels = class_labels[batch_id]
cur_cls_score = cls_score[roi_batch_id == batch_id].view(-1)
cur_box_prob = class_pred[batch_id]
cur_rcnn_boxes3d = rcnn_boxes3d[roi_batch_id == batch_id]
selected = self.multi_class_nms(cur_box_prob, cur_rcnn_boxes3d,
cfg.score_thr, cfg.nms_thr,
cfg.use_rotate_nms)
selected_bboxes = cur_rcnn_boxes3d[selected]
selected_label_preds = cur_class_labels[selected]
if cfg.use_raw_score:
selected_scores = cur_cls_score[selected]
else:
selected_scores = torch.sigmoid(cur_cls_score)[selected]
cur_result = dict(
box3d_lidar=selected_bboxes.cpu(),
scores=selected_scores.cpu(),
label_preds=selected_label_preds.cpu(),
sample_idx=img_meta[batch_id]['sample_idx'])
result_list.append(cur_result)
return result_list
def multi_class_nms(self,
box_probs,
box_preds,
score_thr,
nms_thr,
use_rotate_nms=True):
normalized_scores = torch.sigmoid(box_probs)
if use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
assert box_probs.shape[
1] == self.num_classes, f'box_probs shape: {str(box_probs.shape)}'
selected_list = []
selected_labels = []
boxes_for_nms = boxes3d_to_bev_torch_lidar(box_preds)
score_thresh = score_thr if isinstance(
score_thr, list) else [score_thr for x in range(self.num_classes)]
nms_thresh = nms_thr if isinstance(
nms_thr, list) else [nms_thr for x in range(self.num_classes)]
for k in range(0, self.num_classes):
class_scores_keep = normalized_scores[:, k] >= score_thresh[k]
if class_scores_keep.int().sum() > 0:
original_idxs = class_scores_keep.nonzero().view(-1)
cur_boxes_for_nms = boxes_for_nms[class_scores_keep]
cur_rank_scores = box_probs[class_scores_keep, k]
cur_selected = nms_func(cur_boxes_for_nms, cur_rank_scores,
nms_thresh[k])
if cur_selected.shape[0] == 0:
continue
selected_list.append(original_idxs[cur_selected])
selected_labels.append(
torch.full([cur_selected.shape[0]],
k + 1,
dtype=torch.int64,
device=box_preds.device))
selected = torch.cat(
selected_list, dim=0) if len(selected_list) > 0 else []
return selected
......@@ -126,22 +126,21 @@ class PointwiseSemanticHead(nn.Module):
part_targets = torch.cat(part_targets, dim=0)
return dict(seg_targets=seg_targets, part_targets=part_targets)
def loss(self, seg_preds, part_preds, seg_targets, part_targets):
def loss(self, semantic_results, semantic_targets):
"""Calculate point-wise segmentation and part prediction losses.
Args:
seg_preds (torch.Tensor): prediction of binary
segmentation with shape [voxel_num, 1].
part_preds (torch.Tensor): prediction of part
with shape [voxel_num, 3].
seg_targets (torch.Tensor): target of segmentation
with shape [voxel_num, 1].
part_targets (torch.Tensor): target of part with
shape [voxel_num, 3].
semantic_results (dict): Results from semantic head.
semantic_targets (dict): Targets of semantic results.
Returns:
dict: loss of segmentation and part prediction.
"""
seg_preds = semantic_results['seg_preds']
part_preds = semantic_results['part_preds']
seg_targets = semantic_targets['seg_targets']
part_targets = semantic_targets['part_targets']
pos_mask = (seg_targets > -1) & (seg_targets < self.num_classes)
binary_seg_target = pos_mask.long()
pos = pos_mask.float()
......
import torch.nn.functional as F
from mmdet3d.core import AssignResult
from mmdet3d.core.bbox import bbox3d2roi
from mmdet.core import build_assigner, build_sampler
from mmdet.models import HEADS
from ..builder import build_head, build_roi_extractor
from .base_3droi_head import Base3DRoIHead
@HEADS.register_module
class PartAggregationROIHead(Base3DRoIHead):
"""Part aggregation roi head for PartA2"""
def __init__(self,
semantic_head,
num_classes=3,
seg_roi_extractor=None,
part_roi_extractor=None,
bbox_head=None,
train_cfg=None,
test_cfg=None):
super(PartAggregationROIHead, self).__init__(
bbox_head=bbox_head, train_cfg=train_cfg, test_cfg=test_cfg)
self.num_classes = num_classes
assert semantic_head is not None
self.semantic_head = build_head(semantic_head)
if seg_roi_extractor is not None:
self.seg_roi_extractor = build_roi_extractor(seg_roi_extractor)
if part_roi_extractor is not None:
self.part_roi_extractor = build_roi_extractor(part_roi_extractor)
self.init_assigner_sampler()
def init_weights(self, pretrained):
pass
def init_mask_head(self):
pass
def init_bbox_head(self, bbox_head):
self.bbox_head = build_head(bbox_head)
def init_assigner_sampler(self):
self.bbox_assigner = None
self.bbox_sampler = None
if self.train_cfg:
if isinstance(self.train_cfg.assigner, dict):
self.bbox_assigner = build_assigner(self.train_cfg.assigner)
elif isinstance(self.train_cfg.assigner, list):
self.bbox_assigner = [
build_assigner(res) for res in self.train_cfg.assigner
]
self.bbox_sampler = build_sampler(self.train_cfg.sampler)
@property
def with_semantic(self):
return hasattr(self,
'semantic_head') and self.semantic_head is not None
def forward_train(self, feats_dict, voxels_dict, img_meta, proposal_list,
gt_bboxes_3d, gt_labels_3d):
"""Training forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
gt_bboxes_3d (list[FloatTensor]): GT bboxes of each batch.
gt_labels_3d (list[LongTensor]): GT labels of each batch.
Returns:
dict: losses from each head.
"""
losses = dict()
if self.with_semantic:
semantic_results = self._semantic_forward_train(
feats_dict['seg_features'], voxels_dict, gt_bboxes_3d,
gt_labels_3d)
losses.update(semantic_results['loss_semantic'])
sample_results = self._assign_and_sample(proposal_list, gt_bboxes_3d,
gt_labels_3d)
if self.with_bbox:
bbox_results = self._bbox_forward_train(
feats_dict['seg_features'], semantic_results['part_feats'],
voxels_dict, sample_results)
losses.update(bbox_results['loss_bbox'])
return losses
def simple_test(self, feats_dict, voxels_dict, img_meta, proposal_list,
**kwargs):
"""Simple testing forward function of PartAggregationROIHead
Args:
feats_dict (dict): Contains features from the first stage.
voxels_dict (dict): Contains information of voxels.
img_metas (list[dict]): Meta info of each image.
proposal_list (list[dict]): Proposal information from rpn.
Returns:
list[dict]: Bbox results of each batch.
"""
assert self.with_bbox, 'Bbox head must be implemented.'
assert self.with_semantic
semantic_results = self.semantic_head(feats_dict['seg_features'])
rois = bbox3d2roi([res['box3d_lidar'] for res in proposal_list])
label_preds = [res['label_preds'] for res in proposal_list]
cls_preds = [res['cls_preds'] for res in proposal_list]
bbox_results = self._bbox_forward(feats_dict['seg_features'],
semantic_results['part_feats'],
voxels_dict, rois)
bbox_list = self.bbox_head.get_bboxes(
rois,
bbox_results['cls_score'],
bbox_results['bbox_pred'],
label_preds,
cls_preds,
img_meta,
cfg=self.test_cfg)
return bbox_list
def _bbox_forward_train(self, seg_feats, part_feats, voxels_dict,
sampling_results):
rois = bbox3d2roi([res.bboxes for res in sampling_results])
bbox_results = self._bbox_forward(seg_feats, part_feats, voxels_dict,
rois)
bbox_targets = self.bbox_head.get_targets(sampling_results,
self.train_cfg)
loss_bbox = self.bbox_head.loss(bbox_results['cls_score'],
bbox_results['bbox_pred'], rois,
*bbox_targets)
bbox_results.update(loss_bbox=loss_bbox)
return bbox_results
def _bbox_forward(self, seg_feats, part_feats, voxels_dict, rois):
pooled_seg_feats = self.seg_roi_extractor(seg_feats,
voxels_dict['voxel_centers'],
voxels_dict['coors'][..., 0],
rois)
pooled_part_feats = self.part_roi_extractor(
part_feats, voxels_dict['voxel_centers'],
voxels_dict['coors'][..., 0], rois)
cls_score, bbox_pred = self.bbox_head(pooled_seg_feats,
pooled_part_feats)
bbox_results = dict(
cls_score=cls_score,
bbox_pred=bbox_pred,
pooled_seg_feats=pooled_seg_feats,
pooled_part_feats=pooled_part_feats)
return bbox_results
def _assign_and_sample(self, proposal_list, gt_bboxes_3d, gt_labels_3d):
sampling_results = []
# bbox assign
for batch_idx in range(len(proposal_list)):
cur_proposal_list = proposal_list[batch_idx]
cur_boxes = cur_proposal_list['box3d_lidar']
cur_label_preds = cur_proposal_list['label_preds']
cur_gt_bboxes = gt_bboxes_3d[batch_idx]
cur_gt_labels = gt_labels_3d[batch_idx]
batch_num_gts = 0
batch_gt_indis = cur_gt_labels.new_full((cur_boxes.shape[0], ),
0) # 0 is bg
batch_max_overlaps = cur_boxes.new_zeros(cur_boxes.shape[0])
batch_gt_labels = cur_gt_labels.new_full((cur_boxes.shape[0], ),
-1) # -1 is bg
if isinstance(self.bbox_assigner, list): # for multi classes
for i, assigner in enumerate(self.bbox_assigner):
gt_per_cls = (cur_gt_labels == i)
pred_per_cls = (cur_label_preds == i)
cur_assign_res = assigner.assign(
cur_boxes[pred_per_cls],
cur_gt_bboxes[gt_per_cls],
gt_labels=cur_gt_labels[gt_per_cls])
# gather assign_results in different class into one result
batch_num_gts += cur_assign_res.num_gts
# gt inds (1-based)
gt_inds_arange_pad = gt_per_cls.nonzero().view(-1) + 1
# pad 0 for indice unassigned
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=0)
# pad -1 for indice ignore
gt_inds_arange_pad = F.pad(
gt_inds_arange_pad, (1, 0), mode='constant', value=-1)
# convert to 0~gt_num+2 for indices
gt_inds_arange_pad += 1
# now 0 is bg, >1 is fg in batch_gt_indis
batch_gt_indis[pred_per_cls] = gt_inds_arange_pad[
cur_assign_res.gt_inds + 1] - 1
batch_max_overlaps[
pred_per_cls] = cur_assign_res.max_overlaps
batch_gt_labels[pred_per_cls] = cur_assign_res.labels
assign_result = AssignResult(batch_num_gts, batch_gt_indis,
batch_max_overlaps,
batch_gt_labels)
else: # for single class
assign_result = self.bbox_assigner.assign(
cur_boxes, cur_gt_bboxes, gt_labels=cur_gt_labels)
# sample boxes
sampling_result = self.bbox_sampler.sample(assign_result,
cur_boxes,
cur_gt_bboxes,
cur_gt_labels)
sampling_results.append(sampling_result)
return sampling_results
def _semantic_forward_train(self, x, voxels_dict, gt_bboxes_3d,
gt_labels_3d):
semantic_results = self.semantic_head(x)
semantic_targets = self.semantic_head.get_targets(
voxels_dict, gt_bboxes_3d, gt_labels_3d)
loss_semantic = self.semantic_head.loss(semantic_results,
semantic_targets)
semantic_results.update(loss_semantic=loss_semantic)
return semantic_results
......@@ -61,7 +61,10 @@ def test_config_build_detector():
assert detector.roi_head.with_mask == detector.with_mask
head_config = config_mod.model['roi_head']
_check_roi_head(head_config, detector.roi_head)
if head_config.type == 'PartAggregationROIHead':
check_parta2_roi_head(head_config, detector.roi_head)
else:
_check_roi_head(head_config, detector.roi_head)
# else:
# # for single stage detector
# # detectors must have bbox head
......@@ -319,3 +322,44 @@ def _check_bbox_head(bbox_cfg, bbox_head):
out_dim = (4 if bbox_cfg.reg_class_agnostic else 4 *
bbox_cfg.num_classes)
assert bbox_head.fc_reg.out_features == out_dim
def check_parta2_roi_head(config, head):
assert config['type'] == head.__class__.__name__
# check seg_roi_extractor
seg_roi_cfg = config.seg_roi_extractor
seg_roi_extractor = head.seg_roi_extractor
_check_parta2_roi_extractor(seg_roi_cfg, seg_roi_extractor)
# check part_roi_extractor
part_roi_cfg = config.part_roi_extractor
part_roi_extractor = head.part_roi_extractor
_check_parta2_roi_extractor(part_roi_cfg, part_roi_extractor)
# check bbox head infos
bbox_cfg = config.bbox_head
bbox_head = head.bbox_head
_check_parta2_bbox_head(bbox_cfg, bbox_head)
def _check_parta2_roi_extractor(config, roi_extractor):
assert config['type'] == roi_extractor.__class__.__name__
assert (config.roi_layer.out_size == roi_extractor.roi_layer.out_size)
assert (config.roi_layer.max_pts_per_voxel ==
roi_extractor.roi_layer.max_pts_per_voxel)
def _check_parta2_bbox_head(bbox_cfg, bbox_head):
import torch.nn as nn
if isinstance(bbox_cfg, list):
for single_bbox_cfg, single_bbox_head in zip(bbox_cfg, bbox_head):
_check_bbox_head(single_bbox_cfg, single_bbox_head)
elif isinstance(bbox_head, nn.ModuleList):
for single_bbox_head in bbox_head:
_check_bbox_head(bbox_cfg, single_bbox_head)
else:
assert bbox_cfg['type'] == bbox_head.__class__.__name__
assert bbox_cfg.seg_in_channels == bbox_head.seg_conv[0][0].in_channels
assert bbox_cfg.part_in_channels == bbox_head.part_conv[0][
0].in_channels
......@@ -103,18 +103,18 @@ def test_second_head_loss():
losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
gt_labels, input_metas)
assert losses['loss_cls_3d'][0] > 0
assert losses['loss_bbox_3d'][0] > 0
assert losses['loss_dir_3d'][0] > 0
assert losses['loss_rpn_cls'][0] > 0
assert losses['loss_rpn_bbox'][0] > 0
assert losses['loss_rpn_dir'][0] > 0
# test empty ground truth case
gt_bboxes = list(torch.empty((2, 0, 7)).cuda())
gt_labels = list(torch.empty((2, 0)).cuda())
empty_gt_losses = self.loss(cls_score, bbox_pred, dir_cls_preds, gt_bboxes,
gt_labels, input_metas)
assert empty_gt_losses['loss_cls_3d'][0] > 0
assert empty_gt_losses['loss_bbox_3d'][0] == 0
assert empty_gt_losses['loss_dir_3d'][0] == 0
assert empty_gt_losses['loss_rpn_cls'][0] > 0
assert empty_gt_losses['loss_rpn_bbox'][0] == 0
assert empty_gt_losses['loss_rpn_dir'][0] == 0
def test_second_head_getboxes():
......@@ -147,7 +147,7 @@ def test_parta2_rpnhead_getboxes():
if not torch.cuda.is_available():
pytest.skip('test requires GPU and torch+cuda')
rpn_head_cfg, proposal_cfg = _get_rpn_head_cfg(
'kitti/hv_PartA2_secfpn_4x8_cosine_80e_kitti-3d-3class.py')
'kitti/hv_PartA2_secfpn_4x8_cyclic_80e_kitti-3d-3class.py')
from mmdet3d.models.builder import build_head
self = build_head(rpn_head_cfg)
......
......@@ -62,9 +62,7 @@ def test_PointwiseSemanticHead():
[voxel_features.shape[0], 3])
# test loss
loss_dict = self.loss(feats_dict['seg_preds'], feats_dict['part_preds'],
target_dict['seg_targets'],
target_dict['part_targets'])
loss_dict = self.loss(feats_dict, target_dict)
assert loss_dict['loss_seg'] > 0
assert loss_dict['loss_part'] == 0 # no points in gt_boxes
total_loss = loss_dict['loss_seg'] + loss_dict['loss_part']
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment