Commit fb2120b9 authored by zhangwenwei's avatar zhangwenwei
Browse files

Merge branch 'dense_heads' into 'master'

Refactor dense heads

See merge request open-mmlab/mmdet.3d!32
parents 8c5dd998 e3cd3c1d
...@@ -65,12 +65,11 @@ model = dict( ...@@ -65,12 +65,11 @@ model = dict(
out_channels=[256, 256], out_channels=[256, 256],
), ),
pts_bbox_head=dict( pts_bbox_head=dict(
type='SECONDHead', type='Anchor3DHead',
class_name=['Pedestrian', 'Cyclist', 'Car'], num_classes=3,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[ ranges=[
...@@ -85,7 +84,7 @@ model = dict( ...@@ -85,7 +84,7 @@ model = dict(
assigner_per_size=True, assigner_per_size=True,
diff_rad_by_sin=True, diff_rad_by_sin=True,
assign_per_class=True, assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
...@@ -132,8 +131,8 @@ test_cfg = dict( ...@@ -132,8 +131,8 @@ test_cfg = dict(
nms_thr=0.01, nms_thr=0.01,
score_thr=0.3, score_thr=0.3,
min_bbox_size=0, min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0], nms_pre=100,
), ) max_num=50))
# dataset settings # dataset settings
dataset_type = 'KittiDataset' dataset_type = 'KittiDataset'
......
...@@ -37,12 +37,11 @@ model = dict( ...@@ -37,12 +37,11 @@ model = dict(
out_channels=[128, 128, 128], out_channels=[128, 128, 128],
), ),
bbox_head=dict( bbox_head=dict(
type='SECONDHead', type='Anchor3DHead',
class_name=['Car'], num_classes=1,
in_channels=384, in_channels=384,
feat_channels=384, feat_channels=384,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]], ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
...@@ -51,7 +50,7 @@ model = dict( ...@@ -51,7 +50,7 @@ model = dict(
rotations=[0, 1.57], rotations=[0, 1.57],
reshape_out=True), reshape_out=True),
diff_rad_by_sin=True, diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
...@@ -81,10 +80,8 @@ test_cfg = dict( ...@@ -81,10 +80,8 @@ test_cfg = dict(
nms_thr=0.01, nms_thr=0.01,
score_thr=0.3, score_thr=0.3,
min_bbox_size=0, min_bbox_size=0,
post_center_limit_range=point_cloud_range, nms_pre=100,
# soft-nms is also supported for rcnn testing max_num=50)
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings # dataset settings
dataset_type = 'KittiDataset' dataset_type = 'KittiDataset'
......
...@@ -34,12 +34,11 @@ model = dict( ...@@ -34,12 +34,11 @@ model = dict(
out_channels=[256, 256], out_channels=[256, 256],
), ),
bbox_head=dict( bbox_head=dict(
type='SECONDHead', type='Anchor3DHead',
class_name=['Pedestrian', 'Cyclist', 'Car'], num_classes=3,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[ ranges=[
...@@ -54,7 +53,7 @@ model = dict( ...@@ -54,7 +53,7 @@ model = dict(
diff_rad_by_sin=True, diff_rad_by_sin=True,
assigner_per_size=True, assigner_per_size=True,
assign_per_class=True, assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
...@@ -100,10 +99,8 @@ test_cfg = dict( ...@@ -100,10 +99,8 @@ test_cfg = dict(
nms_thr=0.01, nms_thr=0.01,
score_thr=0.3, score_thr=0.3,
min_bbox_size=0, min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0], nms_pre=100,
# soft-nms is also supported for rcnn testing max_num=50)
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings # dataset settings
dataset_type = 'KittiDataset' dataset_type = 'KittiDataset'
......
...@@ -34,12 +34,11 @@ model = dict( ...@@ -34,12 +34,11 @@ model = dict(
out_channels=[256, 256], out_channels=[256, 256],
), ),
bbox_head=dict( bbox_head=dict(
type='SECONDHead', type='Anchor3DHead',
class_name=['Car'], num_classes=1,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
...@@ -78,10 +77,8 @@ test_cfg = dict( ...@@ -78,10 +77,8 @@ test_cfg = dict(
nms_thr=0.01, nms_thr=0.01,
score_thr=0.3, score_thr=0.3,
min_bbox_size=0, min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0], nms_pre=100,
# soft-nms is also supported for rcnn testing max_num=50)
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
# dataset settings # dataset settings
dataset_type = 'KittiDataset' dataset_type = 'KittiDataset'
......
...@@ -33,11 +33,10 @@ model = dict( ...@@ -33,11 +33,10 @@ model = dict(
out_channels=[256, 256]), out_channels=[256, 256]),
rpn_head=dict( rpn_head=dict(
type='PartA2RPNHead', type='PartA2RPNHead',
class_name=['Pedestrian', 'Cyclist', 'Car'], num_classes=3,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6], ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
......
...@@ -33,11 +33,10 @@ model = dict( ...@@ -33,11 +33,10 @@ model = dict(
out_channels=[256, 256]), out_channels=[256, 256]),
rpn_head=dict( rpn_head=dict(
type='PartA2RPNHead', type='PartA2RPNHead',
class_name=['Car'], num_classes=1,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
...@@ -132,6 +131,7 @@ train_cfg = dict( ...@@ -132,6 +131,7 @@ train_cfg = dict(
rpn_proposal=dict( rpn_proposal=dict(
nms_pre=9000, nms_pre=9000,
nms_post=512, nms_post=512,
max_num=512,
nms_thr=0.8, nms_thr=0.8,
score_thr=0, score_thr=0,
use_rotate_nms=False), use_rotate_nms=False),
...@@ -158,6 +158,7 @@ test_cfg = dict( ...@@ -158,6 +158,7 @@ test_cfg = dict(
rpn=dict( rpn=dict(
nms_pre=1024, nms_pre=1024,
nms_post=100, nms_post=100,
max_num=100,
nms_thr=0.7, nms_thr=0.7,
score_thr=0, score_thr=0,
use_rotate_nms=True), use_rotate_nms=True),
......
...@@ -36,12 +36,11 @@ model = dict( ...@@ -36,12 +36,11 @@ model = dict(
out_channels=[128, 128, 128], out_channels=[128, 128, 128],
), ),
bbox_head=dict( bbox_head=dict(
type='SECONDHead', type='Anchor3DHead',
class_name=['Car'], num_classes=1,
in_channels=384, in_channels=384,
feat_channels=384, feat_channels=384,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]], ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
...@@ -80,8 +79,8 @@ test_cfg = dict( ...@@ -80,8 +79,8 @@ test_cfg = dict(
nms_thr=0.01, nms_thr=0.01,
score_thr=0.3, score_thr=0.3,
min_bbox_size=0, min_bbox_size=0,
post_center_limit_range=point_cloud_range, nms_pre=100,
) max_num=50)
# dataset settings # dataset settings
dataset_type = 'KittiDataset' dataset_type = 'KittiDataset'
......
...@@ -34,12 +34,11 @@ model = dict( ...@@ -34,12 +34,11 @@ model = dict(
out_channels=[256, 256], out_channels=[256, 256],
), ),
bbox_head=dict( bbox_head=dict(
type='SECONDHead', type='Anchor3DHead',
class_name=['Car'], num_classes=1,
in_channels=512, in_channels=512,
feat_channels=512, feat_channels=512,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]], ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
...@@ -78,8 +77,8 @@ test_cfg = dict( ...@@ -78,8 +77,8 @@ test_cfg = dict(
nms_thr=0.01, nms_thr=0.01,
score_thr=0.3, score_thr=0.3,
min_bbox_size=0, min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0], nms_pre=100,
) max_num=50)
# dataset settings # dataset settings
dataset_type = 'KittiDataset' dataset_type = 'KittiDataset'
...@@ -135,7 +134,7 @@ test_pipeline = [ ...@@ -135,7 +134,7 @@ test_pipeline = [
] ]
data = dict( data = dict(
samples_per_gpu=4, samples_per_gpu=6,
workers_per_gpu=4, workers_per_gpu=4,
train=dict( train=dict(
type=dataset_type, type=dataset_type,
...@@ -166,7 +165,7 @@ data = dict( ...@@ -166,7 +165,7 @@ data = dict(
class_names=class_names, class_names=class_names,
with_label=True)) with_label=True))
# optimizer # optimizer
lr = 0.001 # max learning rate lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01) optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2)) optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict( lr_config = dict(
......
...@@ -44,13 +44,11 @@ model = dict( ...@@ -44,13 +44,11 @@ model = dict(
out_channels=[128, 128, 128], out_channels=[128, 128, 128],
), ),
pts_bbox_head=dict( pts_bbox_head=dict(
type='Anchor3DVeloHead', type='Anchor3DHead',
class_names=class_names,
num_classes=10, num_classes=10,
in_channels=384, in_channels=384,
feat_channels=384, feat_channels=384,
use_direction_classifier=True, use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict( anchor_generator=dict(
type='Anchor3DRangeGenerator', type='Anchor3DRangeGenerator',
ranges=[ ranges=[
...@@ -79,7 +77,7 @@ model = dict( ...@@ -79,7 +77,7 @@ model = dict(
diff_rad_by_sin=True, diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4 dir_offset=0.7854, # pi/4
dir_limit_offset=0, dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ), bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict( loss_cls=dict(
type='FocalLoss', type='FocalLoss',
use_sigmoid=True, use_sigmoid=True,
...@@ -113,10 +111,7 @@ test_cfg = dict( ...@@ -113,10 +111,7 @@ test_cfg = dict(
nms_thr=0.2, nms_thr=0.2,
score_thr=0.05, score_thr=0.05,
min_bbox_size=0, min_bbox_size=0,
max_per_img=500, max_num=500
post_center_limit_range=point_cloud_range,
# TODO: check whether need to change this
# post_center_limit_range=[-59.6, -59.6, -6, 59.6, 59.6, 4],
# soft-nms is also supported for rcnn testing # soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05) # e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)) ))
...@@ -209,7 +204,7 @@ lr_config = dict( ...@@ -209,7 +204,7 @@ lr_config = dict(
momentum_config = None momentum_config = None
checkpoint_config = dict(interval=1) checkpoint_config = dict(interval=1)
# yapf:disable # yapf:disable
evaluation = dict(interval=20) evaluation = dict(interval=24)
log_config = dict( log_config = dict(
interval=50, interval=50,
hooks=[ hooks=[
......
...@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler, ...@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler, InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult) PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import bbox3d2roi, boxes3d_to_bev_torch_lidar from .transforms import bbox3d2result, bbox3d2roi, boxes3d_to_bev_torch_lidar
from .assign_sampling import ( # isort:skip, avoid recursive imports from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings build_bbox_coder, # temporally settings
...@@ -22,5 +22,5 @@ __all__ = [ ...@@ -22,5 +22,5 @@ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar', 'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d', 'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes', 'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi' 'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result'
] ]
from mmdet.core.bbox import build_bbox_coder from mmdet.core.bbox import build_bbox_coder
from .delta_xywh_bbox_coder import DeltaXYZWLHRBBoxCoder from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
__all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder'] __all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder']
...@@ -69,3 +69,18 @@ def bbox3d2roi(bbox_list): ...@@ -69,3 +69,18 @@ def bbox3d2roi(bbox_list):
rois_list.append(rois) rois_list.append(rois)
rois = torch.cat(rois_list, 0) rois = torch.cat(rois_list, 0)
return rois return rois
def bbox3d2result(bboxes, scores, labels):
"""Convert detection results to a list of numpy arrays.
Args:
bboxes (Tensor): shape (n, 5)
labels (Tensor): shape (n, )
scores (Tensor): shape (n, )
Returns:
dict(Tensor): bbox results in cpu mode
"""
return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu())
from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks, from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores, merge_aug_proposals, merge_aug_scores,
multiclass_nms) multiclass_nms)
from .box3d_nms import box3d_multiclass_nms
__all__ = [ __all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes', 'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
'merge_aug_scores', 'merge_aug_masks' 'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms'
] ]
import torch
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
def box3d_multiclass_nms(mlvl_bboxes,
mlvl_bboxes_for_nms,
mlvl_scores,
score_thr,
max_num,
cfg,
mlvl_dir_scores=None):
# do multi class nms
# the fg class id range: [0, num_classes-1]
num_classes = mlvl_scores.shape[1] - 1
bboxes = []
scores = []
labels = []
dir_scores = []
for i in range(0, num_classes):
# get bboxes and scores of this class
cls_inds = mlvl_scores[:, i] > score_thr
if not cls_inds.any():
continue
_scores = mlvl_scores[cls_inds, i]
_bboxes_for_nms = mlvl_bboxes_for_nms[cls_inds, :]
if cfg.use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)
_mlvl_bboxes = mlvl_bboxes[cls_inds, :]
bboxes.append(_mlvl_bboxes[selected])
scores.append(_scores[selected])
cls_label = mlvl_bboxes.new_full((len(selected), ),
i,
dtype=torch.long)
labels.append(cls_label)
if mlvl_dir_scores is not None:
_mlvl_dir_scores = mlvl_dir_scores[cls_inds]
dir_scores.append(_mlvl_dir_scores[selected])
if bboxes:
bboxes = torch.cat(bboxes, dim=0)
scores = torch.cat(scores, dim=0)
labels = torch.cat(labels, dim=0)
if mlvl_dir_scores is not None:
dir_scores = torch.cat(dir_scores, dim=0)
if bboxes.shape[0] > max_num:
_, inds = scores.sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds, :]
labels = labels[inds]
scores = scores[inds]
if mlvl_dir_scores is not None:
dir_scores = dir_scores[inds]
else:
bboxes = mlvl_scores.new_zeros((0, mlvl_bboxes.size(-1)))
scores = mlvl_scores.new_zeros((0, ))
labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1)))
dir_scores = mlvl_scores.new_zeros((0, ))
return bboxes, scores, labels, dir_scores
...@@ -275,12 +275,9 @@ class KittiDataset(torch_data.Dataset): ...@@ -275,12 +275,9 @@ class KittiDataset(torch_data.Dataset):
else: else:
tmp_dir = None tmp_dir = None
if not isinstance(outputs[0][0], dict): if not isinstance(outputs[0], dict):
sample_idx = [
info['image']['image_idx'] for info in self.kitti_infos
]
result_files = self.bbox2result_kitti2d(outputs, self.class_names, result_files = self.bbox2result_kitti2d(outputs, self.class_names,
sample_idx, pklfile_prefix, pklfile_prefix,
submission_prefix) submission_prefix)
else: else:
result_files = self.bbox2result_kitti(outputs, self.class_names, result_files = self.bbox2result_kitti(outputs, self.class_names,
...@@ -330,6 +327,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -330,6 +327,7 @@ class KittiDataset(torch_data.Dataset):
class_names, class_names,
pklfile_prefix=None, pklfile_prefix=None,
submission_prefix=None): submission_prefix=None):
assert len(net_outputs) == len(self.kitti_infos)
if submission_prefix is not None: if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix) mmcv.mkdir_or_exist(submission_prefix)
...@@ -339,13 +337,11 @@ class KittiDataset(torch_data.Dataset): ...@@ -339,13 +337,11 @@ class KittiDataset(torch_data.Dataset):
mmcv.track_iter_progress(net_outputs)): mmcv.track_iter_progress(net_outputs)):
annos = [] annos = []
info = self.kitti_infos[idx] info = self.kitti_infos[idx]
sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2] image_shape = info['image']['image_shape'][:2]
for i, box_dict in enumerate(pred_dicts):
num_example = 0 box_dict = self.convert_valid_bboxes(pred_dicts, info)
sample_idx = box_dict['sample_idx'] if len(box_dict['bbox']) > 0:
box_dict = self.convert_valid_bboxes(box_dict, info)
if box_dict['bbox'] is not None or box_dict['bbox'].size.numel(
) != 0:
box_2d_preds = box_dict['bbox'] box_2d_preds = box_dict['bbox']
box_preds = box_dict['box3d_camera'] box_preds = box_dict['box3d_camera']
scores = box_dict['scores'] scores = box_dict['scores']
...@@ -363,11 +359,10 @@ class KittiDataset(torch_data.Dataset): ...@@ -363,11 +359,10 @@ class KittiDataset(torch_data.Dataset):
'rotation_y': [], 'rotation_y': [],
'score': [] 'score': []
} }
gt_iou = scores * 0
for box, box_lidar, bbox, score, label, cur_gt_iou in zip( for box, box_lidar, bbox, score, label in zip(
box_preds, box_preds_lidar, box_2d_preds, scores, box_preds, box_preds_lidar, box_2d_preds, scores,
label_preds, gt_iou): label_preds):
bbox[2:] = np.minimum(bbox[2:], image_shape[::-1]) bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
bbox[:2] = np.maximum(bbox[:2], [0, 0]) bbox[:2] = np.maximum(bbox[:2], [0, 0])
anno['name'].append(class_names[int(label)]) anno['name'].append(class_names[int(label)])
...@@ -379,12 +374,8 @@ class KittiDataset(torch_data.Dataset): ...@@ -379,12 +374,8 @@ class KittiDataset(torch_data.Dataset):
anno['dimensions'].append(box[3:6]) anno['dimensions'].append(box[3:6])
anno['location'].append(box[:3]) anno['location'].append(box[:3])
anno['rotation_y'].append(box[6]) anno['rotation_y'].append(box[6])
# anno["gt_iou"].append(cur_gt_iou)
anno['score'].append(score) anno['score'].append(score)
num_example += 1
if num_example != 0:
anno = {k: np.stack(v) for k, v in anno.items()} anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno) annos.append(anno)
...@@ -399,19 +390,16 @@ class KittiDataset(torch_data.Dataset): ...@@ -399,19 +390,16 @@ class KittiDataset(torch_data.Dataset):
print( print(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} ' '{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} ' '{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}' '{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.
.format(anno['name'][idx], format(anno['name'][idx], anno['alpha'][idx],
anno['alpha'][idx], bbox[idx][0], bbox[idx][0], bbox[idx][1],
bbox[idx][1], bbox[idx][2], bbox[idx][2], bbox[idx][3],
bbox[idx][3], dims[idx][1], dims[idx][1], dims[idx][2],
dims[idx][2], dims[idx][0], dims[idx][0], loc[idx][0], loc[idx][1],
loc[idx][0], loc[idx][1], loc[idx][2], anno['rotation_y'][idx],
loc[idx][2],
anno['rotation_y'][idx],
anno['score'][idx]), anno['score'][idx]),
file=f) file=f)
else:
if num_example == 0:
annos.append({ annos.append({
'name': np.array([]), 'name': np.array([]),
'truncated': np.array([]), 'truncated': np.array([]),
...@@ -424,7 +412,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -424,7 +412,7 @@ class KittiDataset(torch_data.Dataset):
'score': np.array([]), 'score': np.array([]),
}) })
annos[-1]['sample_idx'] = np.array( annos[-1]['sample_idx'] = np.array(
[sample_idx] * num_example, dtype=np.int64) [sample_idx] * len(annos[-1]['score']), dtype=np.int64)
det_annos += annos det_annos += annos
...@@ -439,7 +427,6 @@ class KittiDataset(torch_data.Dataset): ...@@ -439,7 +427,6 @@ class KittiDataset(torch_data.Dataset):
def bbox2result_kitti2d(self, def bbox2result_kitti2d(self,
net_outputs, net_outputs,
class_names, class_names,
sample_ids,
pklfile_prefix=None, pklfile_prefix=None,
submission_prefix=None): submission_prefix=None):
"""Convert results to kitti format for evaluation and test submission """Convert results to kitti format for evaluation and test submission
...@@ -447,18 +434,16 @@ class KittiDataset(torch_data.Dataset): ...@@ -447,18 +434,16 @@ class KittiDataset(torch_data.Dataset):
Args: Args:
net_outputs (List[array]): list of array storing the bbox and score net_outputs (List[array]): list of array storing the bbox and score
class_nanes (List[String]): A list of class names class_nanes (List[String]): A list of class names
sample_idx (List[Int]): A list of samples' index,
should have the same length as net_outputs.
pklfile_prefix (str | None): The prefix of pkl file. pklfile_prefix (str | None): The prefix of pkl file.
submission_prefix (str | None): The prefix of submission file. submission_prefix (str | None): The prefix of submission file.
Return: Return:
List([dict]): A list of dict have the kitti format List([dict]): A list of dict have the kitti format
""" """
assert len(net_outputs) == len(sample_ids) assert len(net_outputs) == len(self.kitti_infos)
det_annos = [] det_annos = []
print('Converting prediction to KITTI format') print('\nConverting prediction to KITTI format')
for i, bboxes_per_sample in enumerate( for i, bboxes_per_sample in enumerate(
mmcv.track_iter_progress(net_outputs)): mmcv.track_iter_progress(net_outputs)):
annos = [] annos = []
...@@ -472,7 +457,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -472,7 +457,7 @@ class KittiDataset(torch_data.Dataset):
location=[], location=[],
rotation_y=[], rotation_y=[],
score=[]) score=[])
sample_idx = sample_ids[i] sample_idx = self.kitti_infos[i]['image']['image_idx']
num_example = 0 num_example = 0
for label in range(len(bboxes_per_sample)): for label in range(len(bboxes_per_sample)):
...@@ -526,7 +511,7 @@ class KittiDataset(torch_data.Dataset): ...@@ -526,7 +511,7 @@ class KittiDataset(torch_data.Dataset):
mmcv.mkdir_or_exist(submission_prefix) mmcv.mkdir_or_exist(submission_prefix)
print(f'Saving KITTI submission to {submission_prefix}') print(f'Saving KITTI submission to {submission_prefix}')
for i, anno in enumerate(det_annos): for i, anno in enumerate(det_annos):
sample_idx = sample_ids[i] sample_idx = self.kitti_infos[i]['image']['image_idx']
cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt' cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(cur_det_file, 'w') as f: with open(cur_det_file, 'w') as f:
bbox = anno['bbox'] bbox = anno['bbox']
...@@ -551,9 +536,9 @@ class KittiDataset(torch_data.Dataset): ...@@ -551,9 +536,9 @@ class KittiDataset(torch_data.Dataset):
def convert_valid_bboxes(self, box_dict, info): def convert_valid_bboxes(self, box_dict, info):
# TODO: refactor this function # TODO: refactor this function
final_box_preds = box_dict['box3d_lidar'] final_box_preds = box_dict['boxes_3d']
final_scores = box_dict['scores'] final_scores = box_dict['scores_3d']
final_labels = box_dict['label_preds'] final_labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx'] sample_idx = info['image']['image_idx']
final_box_preds[:, -1] = box_np_ops.limit_period( final_box_preds[:, -1] = box_np_ops.limit_period(
final_box_preds[:, -1] - np.pi, offset=0.5, period=np.pi * 2) final_box_preds[:, -1] - np.pi, offset=0.5, period=np.pi * 2)
......
...@@ -275,15 +275,14 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -275,15 +275,14 @@ class NuScenesDataset(torch_data.Dataset):
def _format_bbox(self, results, jsonfile_prefix=None): def _format_bbox(self, results, jsonfile_prefix=None):
nusc_annos = {} nusc_annos = {}
mapped_class_names = self.class_names mapped_class_names = self.class_names
token2info = {}
for info in self.data_infos:
token2info[info['token']] = info
print('Start to convert detection format...') print('Start to convert detection format...')
for det in mmcv.track_iter_progress(results): for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
annos = [] annos = []
boxes = output_to_nusc_box(det[0]) boxes = output_to_nusc_box(det)
boxes = lidar_nusc_box_to_global(token2info[det[0]['sample_idx']], sample_token = self.data_infos[sample_id]['token']
boxes, mapped_class_names, boxes = lidar_nusc_box_to_global(self.data_infos[sample_id], boxes,
mapped_class_names,
self.eval_detection_configs, self.eval_detection_configs,
self.eval_version) self.eval_version)
for i, box in enumerate(boxes): for i, box in enumerate(boxes):
...@@ -310,7 +309,7 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -310,7 +309,7 @@ class NuScenesDataset(torch_data.Dataset):
attr = NuScenesDataset.DefaultAttribute[name] attr = NuScenesDataset.DefaultAttribute[name]
nusc_anno = dict( nusc_anno = dict(
sample_token=det[0]['sample_idx'], sample_token=sample_token,
translation=box.center.tolist(), translation=box.center.tolist(),
size=box.wlh.tolist(), size=box.wlh.tolist(),
rotation=box.orientation.elements.tolist(), rotation=box.orientation.elements.tolist(),
...@@ -319,7 +318,7 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -319,7 +318,7 @@ class NuScenesDataset(torch_data.Dataset):
detection_score=box.score, detection_score=box.score,
attribute_name=attr) attribute_name=attr)
annos.append(nusc_anno) annos.append(nusc_anno)
nusc_annos[det[0]['sample_idx']] = annos nusc_annos[sample_token] = annos
nusc_submissions = { nusc_submissions = {
'meta': self.modality, 'meta': self.modality,
'results': nusc_annos, 'results': nusc_annos,
...@@ -401,7 +400,7 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -401,7 +400,7 @@ class NuScenesDataset(torch_data.Dataset):
else: else:
result_files = dict() result_files = dict()
for name in results[0]: for name in results[0]:
print('Formating bboxes of {}'.format(name)) print(f'\nFormating bboxes of {name}')
results_ = [out[name] for out in results] results_ = [out[name] for out in results]
tmp_file_ = osp.join(jsonfile_prefix, name) tmp_file_ = osp.join(jsonfile_prefix, name)
result_files.update( result_files.update(
...@@ -445,9 +444,9 @@ class NuScenesDataset(torch_data.Dataset): ...@@ -445,9 +444,9 @@ class NuScenesDataset(torch_data.Dataset):
def output_to_nusc_box(detection): def output_to_nusc_box(detection):
box3d = detection['box3d_lidar'].numpy() box3d = detection['boxes_3d'].numpy()
scores = detection['scores'].numpy() scores = detection['scores_3d'].numpy()
labels = detection['label_preds'].numpy() labels = detection['labels_3d'].numpy()
# TODO: check whether this is necessary # TODO: check whether this is necessary
# with dir_offset & dir_limit in the head # with dir_offset & dir_limit in the head
box3d[:, 6] = -box3d[:, 6] - np.pi / 2 box3d[:, 6] = -box3d[:, 6] - np.pi / 2
......
from .anchor_heads import * # noqa: F401,F403
from .backbones import * # noqa: F401,F403 from .backbones import * # noqa: F401,F403
from .builder import (build_backbone, build_detector, build_fusion_layer, from .builder import (build_backbone, build_detector, build_fusion_layer,
build_head, build_loss, build_middle_encoder, build_neck, build_head, build_loss, build_middle_encoder, build_neck,
build_roi_extractor, build_shared_head, build_roi_extractor, build_shared_head,
build_voxel_encoder) build_voxel_encoder)
from .dense_heads import * # noqa: F401,F403
from .detectors import * # noqa: F401,F403 from .detectors import * # noqa: F401,F403
from .fusion_layers import * # noqa: F401,F403 from .fusion_layers import * # noqa: F401,F403
from .losses import * # noqa: F401,F403 from .losses import * # noqa: F401,F403
......
from .boxvelo_head import Anchor3DVeloHead
from .parta2_rpn_head import PartA2RPNHead
from .second_head import SECONDHead
__all__ = ['Anchor3DVeloHead', 'SECONDHead', 'PartA2RPNHead']
import numpy as np
import torch
from mmcv.cnn import bias_init_with_prob, normal_init
from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from mmdet.models import HEADS
from .second_head import SECONDHead
@HEADS.register_module()
class Anchor3DVeloHead(SECONDHead):
"""Anchor-based head for 3D anchor with velocity
Args:
in_channels (int): Number of channels in the input feature map.
feat_channels (int): Number of channels of the feature map.
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
""" # noqa: W605
def __init__(self,
class_names,
num_classes,
in_channels,
train_cfg,
test_cfg,
feat_channels=256,
use_direction_classifier=True,
encode_bg_as_zeros=False,
box_code_size=9,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
custom_values=[0, 0],
reshape_out=True,
),
assigner_per_size=False,
assign_per_class=False,
diff_rad_by_sin=True,
dir_offset=0,
dir_limit_offset=1,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):
super().__init__(class_names, in_channels, train_cfg, test_cfg,
feat_channels, use_direction_classifier,
encode_bg_as_zeros, box_code_size, anchor_generator,
assigner_per_size, assign_per_class, diff_rad_by_sin,
dir_offset, dir_limit_offset, bbox_coder, loss_cls,
loss_bbox, loss_dir)
self.num_classes = num_classes
# build head layers & losses
if not self.use_sigmoid_cls:
self.num_classes += 1
self._init_layers()
def init_weights(self):
# pass
# use the initialization when ready
bias_cls = bias_init_with_prob(0.01)
normal_init(self.conv_cls, std=0.01, bias=bias_cls)
normal_init(self.conv_reg, std=0.01)
@staticmethod
def add_sin_difference(boxes1, boxes2):
# Caution: the 7th dim is the rotation, (last dim without velo)
rad_pred_encoding = torch.sin(boxes1[..., 6:7]) * torch.cos(
boxes2[..., 6:7])
rad_tg_encoding = torch.cos(boxes1[..., 6:7]) * torch.sin(boxes2[...,
6:7])
boxes1 = torch.cat(
[boxes1[..., :6], rad_pred_encoding, boxes1[..., 7:]], dim=-1)
boxes2 = torch.cat([boxes2[..., :6], rad_tg_encoding, boxes2[..., 7:]],
dim=-1)
return boxes1, boxes2
def get_bboxes_single(self,
cls_scores,
bbox_preds,
dir_cls_preds,
mlvl_anchors,
input_meta,
rescale=False):
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_bboxes = []
mlvl_scores = []
mlvl_dir_scores = []
for cls_score, bbox_pred, dir_cls_pred, anchors in zip(
cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.num_classes)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2,
0).reshape(-1, self.box_code_size)
nms_pre = self.test_cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
if self.use_sigmoid_cls:
max_scores, _ = scores.max(dim=1)
else:
max_scores, _ = scores[:, :-1].max(dim=1)
_, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
dir_cls_score = dir_cls_score[topk_inds]
bboxes = self.bbox_coder.decode(anchors, bbox_pred)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes)
mlvl_scores = torch.cat(mlvl_scores)
mlvl_dir_scores = torch.cat(mlvl_dir_scores)
if self.use_sigmoid_cls:
# Add a dummy background class to the front when using sigmoid
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
score_thr = self.test_cfg.get('score_thr', 0)
result = self.multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_scores, mlvl_dir_scores, score_thr,
self.test_cfg.max_per_img)
result.update(dict(sample_idx=input_meta['sample_idx']))
return result
def multiclass_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms, mlvl_scores,
mlvl_dir_scores, score_thr, max_num):
# do multi class nms
# the fg class id range: [0, num_classes-1]
num_classes = mlvl_scores.shape[1] - 1
bboxes = []
scores = []
labels = []
dir_scores = []
for i in range(0, num_classes):
# get bboxes and scores of this class
cls_inds = mlvl_scores[:, i] > score_thr
if not cls_inds.any():
continue
_scores = mlvl_scores[cls_inds, i]
_bboxes_for_nms = mlvl_bboxes_for_nms[cls_inds, :]
if self.test_cfg.use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
selected = nms_func(_bboxes_for_nms, _scores,
self.test_cfg.nms_thr)
_mlvl_bboxes = mlvl_bboxes[cls_inds, :]
_mlvl_dir_scores = mlvl_dir_scores[cls_inds]
if len(selected) > 0:
bboxes.append(_mlvl_bboxes[selected])
scores.append(_scores[selected])
dir_scores.append(_mlvl_dir_scores[selected])
dir_rot = box_torch_ops.limit_period(
bboxes[-1][..., 6] - self.dir_offset,
self.dir_limit_offset, np.pi)
bboxes[-1][..., 6] = (
dir_rot + self.dir_offset +
np.pi * dir_scores[-1].to(bboxes[-1].dtype))
cls_label = mlvl_bboxes.new_full((len(selected), ),
i,
dtype=torch.long)
labels.append(cls_label)
if bboxes:
bboxes = torch.cat(bboxes, dim=0)
scores = torch.cat(scores, dim=0)
labels = torch.cat(labels, dim=0)
dir_scores = torch.cat(dir_scores, dim=0)
if bboxes.shape[0] > max_num:
_, inds = scores.sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds, :]
labels = labels[inds]
scores = scores[inds]
dir_scores = dir_scores[inds]
return dict(
box3d_lidar=bboxes.cpu(),
scores=scores.cpu(),
label_preds=labels.cpu(),
)
else:
return dict(
box3d_lidar=mlvl_bboxes.new_zeros([0,
self.box_code_size]).cpu(),
scores=mlvl_bboxes.new_zeros([0]).cpu(),
label_preds=mlvl_bboxes.new_zeros([0, 4]).cpu(),
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment