Commit e3cd3c1d authored by zhangwenwei's avatar zhangwenwei
Browse files

Refactor dense heads

parent 8c5dd998
......@@ -65,12 +65,11 @@ model = dict(
out_channels=[256, 256],
),
pts_bbox_head=dict(
type='SECONDHead',
class_name=['Pedestrian', 'Cyclist', 'Car'],
type='Anchor3DHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
......@@ -85,7 +84,7 @@ model = dict(
assigner_per_size=True,
diff_rad_by_sin=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
......@@ -132,8 +131,8 @@ test_cfg = dict(
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
), )
nms_pre=100,
max_num=50))
# dataset settings
dataset_type = 'KittiDataset'
......
......@@ -37,12 +37,11 @@ model = dict(
out_channels=[128, 128, 128],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Car'],
type='Anchor3DHead',
num_classes=1,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
......@@ -51,7 +50,7 @@ model = dict(
rotations=[0, 1.57],
reshape_out=True),
diff_rad_by_sin=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
......@@ -81,10 +80,8 @@ test_cfg = dict(
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=point_cloud_range,
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
......
......@@ -34,12 +34,11 @@ model = dict(
out_channels=[256, 256],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Pedestrian', 'Cyclist', 'Car'],
type='Anchor3DHead',
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
......@@ -54,7 +53,7 @@ model = dict(
diff_rad_by_sin=True,
assigner_per_size=True,
assign_per_class=True,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
......@@ -100,10 +99,8 @@ test_cfg = dict(
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
......
......@@ -34,12 +34,11 @@ model = dict(
out_channels=[256, 256],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Car'],
type='Anchor3DHead',
num_classes=1,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
......@@ -78,10 +77,8 @@ test_cfg = dict(
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
)
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
......
......@@ -33,11 +33,10 @@ model = dict(
out_channels=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
class_name=['Pedestrian', 'Cyclist', 'Car'],
num_classes=3,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -0.6, 70.4, 40.0, -0.6],
......
......@@ -33,11 +33,10 @@ model = dict(
out_channels=[256, 256]),
rpn_head=dict(
type='PartA2RPNHead',
class_name=['Car'],
num_classes=1,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
......@@ -132,6 +131,7 @@ train_cfg = dict(
rpn_proposal=dict(
nms_pre=9000,
nms_post=512,
max_num=512,
nms_thr=0.8,
score_thr=0,
use_rotate_nms=False),
......@@ -158,6 +158,7 @@ test_cfg = dict(
rpn=dict(
nms_pre=1024,
nms_post=100,
max_num=100,
nms_thr=0.7,
score_thr=0,
use_rotate_nms=True),
......
......@@ -36,12 +36,11 @@ model = dict(
out_channels=[128, 128, 128],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Car'],
type='Anchor3DHead',
num_classes=1,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -39.68, -1.78, 69.12, 39.68, -1.78]],
......@@ -80,8 +79,8 @@ test_cfg = dict(
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=point_cloud_range,
)
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
......
......@@ -34,12 +34,11 @@ model = dict(
out_channels=[256, 256],
),
bbox_head=dict(
type='SECONDHead',
class_name=['Car'],
type='Anchor3DHead',
num_classes=1,
in_channels=512,
feat_channels=512,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[[0, -40.0, -1.78, 70.4, 40.0, -1.78]],
......@@ -78,8 +77,8 @@ test_cfg = dict(
nms_thr=0.01,
score_thr=0.3,
min_bbox_size=0,
post_center_limit_range=[0, -40, -3, 70.4, 40, 0.0],
)
nms_pre=100,
max_num=50)
# dataset settings
dataset_type = 'KittiDataset'
......@@ -135,7 +134,7 @@ test_pipeline = [
]
data = dict(
samples_per_gpu=4,
samples_per_gpu=6,
workers_per_gpu=4,
train=dict(
type=dataset_type,
......@@ -166,7 +165,7 @@ data = dict(
class_names=class_names,
with_label=True))
# optimizer
lr = 0.001 # max learning rate
lr = 0.0018 # max learning rate
optimizer = dict(type='AdamW', lr=lr, betas=(0.95, 0.99), weight_decay=0.01)
optimizer_config = dict(grad_clip=dict(max_norm=10, norm_type=2))
lr_config = dict(
......
......@@ -44,13 +44,11 @@ model = dict(
out_channels=[128, 128, 128],
),
pts_bbox_head=dict(
type='Anchor3DVeloHead',
class_names=class_names,
type='Anchor3DHead',
num_classes=10,
in_channels=384,
feat_channels=384,
use_direction_classifier=True,
encode_bg_as_zeros=True,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
ranges=[
......@@ -79,7 +77,7 @@ model = dict(
diff_rad_by_sin=True,
dir_offset=0.7854, # pi/4
dir_limit_offset=0,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', ),
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder', code_size=9),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
......@@ -113,10 +111,7 @@ test_cfg = dict(
nms_thr=0.2,
score_thr=0.05,
min_bbox_size=0,
max_per_img=500,
post_center_limit_range=point_cloud_range,
# TODO: check whether need to change this
# post_center_limit_range=[-59.6, -59.6, -6, 59.6, 59.6, 4],
max_num=500
# soft-nms is also supported for rcnn testing
# e.g., nms=dict(type='soft_nms', iou_thr=0.5, min_score=0.05)
))
......@@ -209,7 +204,7 @@ lr_config = dict(
momentum_config = None
checkpoint_config = dict(interval=1)
# yapf:disable
evaluation = dict(interval=20)
evaluation = dict(interval=24)
log_config = dict(
interval=50,
hooks=[
......
......@@ -8,7 +8,7 @@ from .samplers import (BaseSampler, CombinedSampler,
InstanceBalancedPosSampler, IoUBalancedNegSampler,
PseudoSampler, RandomSampler, SamplingResult)
from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
from .transforms import bbox3d2roi, boxes3d_to_bev_torch_lidar
from .transforms import bbox3d2result, bbox3d2roi, boxes3d_to_bev_torch_lidar
from .assign_sampling import ( # isort:skip, avoid recursive imports
build_bbox_coder, # temporally settings
......@@ -22,5 +22,5 @@ __all__ = [
'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
'CameraInstance3DBoxes', 'bbox3d2roi'
'CameraInstance3DBoxes', 'bbox3d2roi', 'bbox3d2result'
]
from mmdet.core.bbox import build_bbox_coder
from .delta_xywh_bbox_coder import DeltaXYZWLHRBBoxCoder
from .delta_xyzwhlr_bbox_coder import DeltaXYZWLHRBBoxCoder
__all__ = ['build_bbox_coder', 'DeltaXYZWLHRBBoxCoder']
......@@ -69,3 +69,18 @@ def bbox3d2roi(bbox_list):
rois_list.append(rois)
rois = torch.cat(rois_list, 0)
return rois
def bbox3d2result(bboxes, scores, labels):
"""Convert detection results to a list of numpy arrays.
Args:
bboxes (Tensor): shape (n, 5)
labels (Tensor): shape (n, )
scores (Tensor): shape (n, )
Returns:
dict(Tensor): bbox results in cpu mode
"""
return dict(
boxes_3d=bboxes.cpu(), scores_3d=scores.cpu(), labels_3d=labels.cpu())
from mmdet.core.post_processing import (merge_aug_bboxes, merge_aug_masks,
merge_aug_proposals, merge_aug_scores,
multiclass_nms)
from .box3d_nms import box3d_multiclass_nms
__all__ = [
'multiclass_nms', 'merge_aug_proposals', 'merge_aug_bboxes',
'merge_aug_scores', 'merge_aug_masks'
'merge_aug_scores', 'merge_aug_masks', 'box3d_multiclass_nms'
]
import torch
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
def box3d_multiclass_nms(mlvl_bboxes,
mlvl_bboxes_for_nms,
mlvl_scores,
score_thr,
max_num,
cfg,
mlvl_dir_scores=None):
# do multi class nms
# the fg class id range: [0, num_classes-1]
num_classes = mlvl_scores.shape[1] - 1
bboxes = []
scores = []
labels = []
dir_scores = []
for i in range(0, num_classes):
# get bboxes and scores of this class
cls_inds = mlvl_scores[:, i] > score_thr
if not cls_inds.any():
continue
_scores = mlvl_scores[cls_inds, i]
_bboxes_for_nms = mlvl_bboxes_for_nms[cls_inds, :]
if cfg.use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
selected = nms_func(_bboxes_for_nms, _scores, cfg.nms_thr)
_mlvl_bboxes = mlvl_bboxes[cls_inds, :]
bboxes.append(_mlvl_bboxes[selected])
scores.append(_scores[selected])
cls_label = mlvl_bboxes.new_full((len(selected), ),
i,
dtype=torch.long)
labels.append(cls_label)
if mlvl_dir_scores is not None:
_mlvl_dir_scores = mlvl_dir_scores[cls_inds]
dir_scores.append(_mlvl_dir_scores[selected])
if bboxes:
bboxes = torch.cat(bboxes, dim=0)
scores = torch.cat(scores, dim=0)
labels = torch.cat(labels, dim=0)
if mlvl_dir_scores is not None:
dir_scores = torch.cat(dir_scores, dim=0)
if bboxes.shape[0] > max_num:
_, inds = scores.sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds, :]
labels = labels[inds]
scores = scores[inds]
if mlvl_dir_scores is not None:
dir_scores = dir_scores[inds]
else:
bboxes = mlvl_scores.new_zeros((0, mlvl_bboxes.size(-1)))
scores = mlvl_scores.new_zeros((0, ))
labels = mlvl_scores.new_zeros((0, mlvl_scores.size(-1)))
dir_scores = mlvl_scores.new_zeros((0, ))
return bboxes, scores, labels, dir_scores
......@@ -275,12 +275,9 @@ class KittiDataset(torch_data.Dataset):
else:
tmp_dir = None
if not isinstance(outputs[0][0], dict):
sample_idx = [
info['image']['image_idx'] for info in self.kitti_infos
]
if not isinstance(outputs[0], dict):
result_files = self.bbox2result_kitti2d(outputs, self.class_names,
sample_idx, pklfile_prefix,
pklfile_prefix,
submission_prefix)
else:
result_files = self.bbox2result_kitti(outputs, self.class_names,
......@@ -330,6 +327,7 @@ class KittiDataset(torch_data.Dataset):
class_names,
pklfile_prefix=None,
submission_prefix=None):
assert len(net_outputs) == len(self.kitti_infos)
if submission_prefix is not None:
mmcv.mkdir_or_exist(submission_prefix)
......@@ -339,13 +337,11 @@ class KittiDataset(torch_data.Dataset):
mmcv.track_iter_progress(net_outputs)):
annos = []
info = self.kitti_infos[idx]
sample_idx = info['image']['image_idx']
image_shape = info['image']['image_shape'][:2]
for i, box_dict in enumerate(pred_dicts):
num_example = 0
sample_idx = box_dict['sample_idx']
box_dict = self.convert_valid_bboxes(box_dict, info)
if box_dict['bbox'] is not None or box_dict['bbox'].size.numel(
) != 0:
box_dict = self.convert_valid_bboxes(pred_dicts, info)
if len(box_dict['bbox']) > 0:
box_2d_preds = box_dict['bbox']
box_preds = box_dict['box3d_camera']
scores = box_dict['scores']
......@@ -363,11 +359,10 @@ class KittiDataset(torch_data.Dataset):
'rotation_y': [],
'score': []
}
gt_iou = scores * 0
for box, box_lidar, bbox, score, label, cur_gt_iou in zip(
for box, box_lidar, bbox, score, label in zip(
box_preds, box_preds_lidar, box_2d_preds, scores,
label_preds, gt_iou):
label_preds):
bbox[2:] = np.minimum(bbox[2:], image_shape[::-1])
bbox[:2] = np.maximum(bbox[:2], [0, 0])
anno['name'].append(class_names[int(label)])
......@@ -379,12 +374,8 @@ class KittiDataset(torch_data.Dataset):
anno['dimensions'].append(box[3:6])
anno['location'].append(box[:3])
anno['rotation_y'].append(box[6])
# anno["gt_iou"].append(cur_gt_iou)
anno['score'].append(score)
num_example += 1
if num_example != 0:
anno = {k: np.stack(v) for k, v in anno.items()}
annos.append(anno)
......@@ -399,19 +390,16 @@ class KittiDataset(torch_data.Dataset):
print(
'{} -1 -1 {:.4f} {:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} '
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'
.format(anno['name'][idx],
anno['alpha'][idx], bbox[idx][0],
bbox[idx][1], bbox[idx][2],
bbox[idx][3], dims[idx][1],
dims[idx][2], dims[idx][0],
loc[idx][0], loc[idx][1],
loc[idx][2],
anno['rotation_y'][idx],
'{:.4f} {:.4f} {:.4f} {:.4f} {:.4f} {:.4f}'.
format(anno['name'][idx], anno['alpha'][idx],
bbox[idx][0], bbox[idx][1],
bbox[idx][2], bbox[idx][3],
dims[idx][1], dims[idx][2],
dims[idx][0], loc[idx][0], loc[idx][1],
loc[idx][2], anno['rotation_y'][idx],
anno['score'][idx]),
file=f)
if num_example == 0:
else:
annos.append({
'name': np.array([]),
'truncated': np.array([]),
......@@ -424,7 +412,7 @@ class KittiDataset(torch_data.Dataset):
'score': np.array([]),
})
annos[-1]['sample_idx'] = np.array(
[sample_idx] * num_example, dtype=np.int64)
[sample_idx] * len(annos[-1]['score']), dtype=np.int64)
det_annos += annos
......@@ -439,7 +427,6 @@ class KittiDataset(torch_data.Dataset):
def bbox2result_kitti2d(self,
net_outputs,
class_names,
sample_ids,
pklfile_prefix=None,
submission_prefix=None):
"""Convert results to kitti format for evaluation and test submission
......@@ -447,18 +434,16 @@ class KittiDataset(torch_data.Dataset):
Args:
net_outputs (List[array]): list of array storing the bbox and score
class_nanes (List[String]): A list of class names
sample_idx (List[Int]): A list of samples' index,
should have the same length as net_outputs.
pklfile_prefix (str | None): The prefix of pkl file.
submission_prefix (str | None): The prefix of submission file.
Return:
List([dict]): A list of dict have the kitti format
"""
assert len(net_outputs) == len(sample_ids)
assert len(net_outputs) == len(self.kitti_infos)
det_annos = []
print('Converting prediction to KITTI format')
print('\nConverting prediction to KITTI format')
for i, bboxes_per_sample in enumerate(
mmcv.track_iter_progress(net_outputs)):
annos = []
......@@ -472,7 +457,7 @@ class KittiDataset(torch_data.Dataset):
location=[],
rotation_y=[],
score=[])
sample_idx = sample_ids[i]
sample_idx = self.kitti_infos[i]['image']['image_idx']
num_example = 0
for label in range(len(bboxes_per_sample)):
......@@ -526,7 +511,7 @@ class KittiDataset(torch_data.Dataset):
mmcv.mkdir_or_exist(submission_prefix)
print(f'Saving KITTI submission to {submission_prefix}')
for i, anno in enumerate(det_annos):
sample_idx = sample_ids[i]
sample_idx = self.kitti_infos[i]['image']['image_idx']
cur_det_file = f'{submission_prefix}/{sample_idx:06d}.txt'
with open(cur_det_file, 'w') as f:
bbox = anno['bbox']
......@@ -551,9 +536,9 @@ class KittiDataset(torch_data.Dataset):
def convert_valid_bboxes(self, box_dict, info):
# TODO: refactor this function
final_box_preds = box_dict['box3d_lidar']
final_scores = box_dict['scores']
final_labels = box_dict['label_preds']
final_box_preds = box_dict['boxes_3d']
final_scores = box_dict['scores_3d']
final_labels = box_dict['labels_3d']
sample_idx = info['image']['image_idx']
final_box_preds[:, -1] = box_np_ops.limit_period(
final_box_preds[:, -1] - np.pi, offset=0.5, period=np.pi * 2)
......
......@@ -275,15 +275,14 @@ class NuScenesDataset(torch_data.Dataset):
def _format_bbox(self, results, jsonfile_prefix=None):
nusc_annos = {}
mapped_class_names = self.class_names
token2info = {}
for info in self.data_infos:
token2info[info['token']] = info
print('Start to convert detection format...')
for det in mmcv.track_iter_progress(results):
for sample_id, det in enumerate(mmcv.track_iter_progress(results)):
annos = []
boxes = output_to_nusc_box(det[0])
boxes = lidar_nusc_box_to_global(token2info[det[0]['sample_idx']],
boxes, mapped_class_names,
boxes = output_to_nusc_box(det)
sample_token = self.data_infos[sample_id]['token']
boxes = lidar_nusc_box_to_global(self.data_infos[sample_id], boxes,
mapped_class_names,
self.eval_detection_configs,
self.eval_version)
for i, box in enumerate(boxes):
......@@ -310,7 +309,7 @@ class NuScenesDataset(torch_data.Dataset):
attr = NuScenesDataset.DefaultAttribute[name]
nusc_anno = dict(
sample_token=det[0]['sample_idx'],
sample_token=sample_token,
translation=box.center.tolist(),
size=box.wlh.tolist(),
rotation=box.orientation.elements.tolist(),
......@@ -319,7 +318,7 @@ class NuScenesDataset(torch_data.Dataset):
detection_score=box.score,
attribute_name=attr)
annos.append(nusc_anno)
nusc_annos[det[0]['sample_idx']] = annos
nusc_annos[sample_token] = annos
nusc_submissions = {
'meta': self.modality,
'results': nusc_annos,
......@@ -401,7 +400,7 @@ class NuScenesDataset(torch_data.Dataset):
else:
result_files = dict()
for name in results[0]:
print('Formating bboxes of {}'.format(name))
print(f'\nFormating bboxes of {name}')
results_ = [out[name] for out in results]
tmp_file_ = osp.join(jsonfile_prefix, name)
result_files.update(
......@@ -445,9 +444,9 @@ class NuScenesDataset(torch_data.Dataset):
def output_to_nusc_box(detection):
box3d = detection['box3d_lidar'].numpy()
scores = detection['scores'].numpy()
labels = detection['label_preds'].numpy()
box3d = detection['boxes_3d'].numpy()
scores = detection['scores_3d'].numpy()
labels = detection['labels_3d'].numpy()
# TODO: check whether this is necessary
# with dir_offset & dir_limit in the head
box3d[:, 6] = -box3d[:, 6] - np.pi / 2
......
from .anchor_heads import * # noqa: F401,F403
from .backbones import * # noqa: F401,F403
from .builder import (build_backbone, build_detector, build_fusion_layer,
build_head, build_loss, build_middle_encoder, build_neck,
build_roi_extractor, build_shared_head,
build_voxel_encoder)
from .dense_heads import * # noqa: F401,F403
from .detectors import * # noqa: F401,F403
from .fusion_layers import * # noqa: F401,F403
from .losses import * # noqa: F401,F403
......
from .boxvelo_head import Anchor3DVeloHead
from .parta2_rpn_head import PartA2RPNHead
from .second_head import SECONDHead
__all__ = ['Anchor3DVeloHead', 'SECONDHead', 'PartA2RPNHead']
import numpy as np
import torch
from mmcv.cnn import bias_init_with_prob, normal_init
from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar
from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu
from mmdet.models import HEADS
from .second_head import SECONDHead
@HEADS.register_module()
class Anchor3DVeloHead(SECONDHead):
"""Anchor-based head for 3D anchor with velocity
Args:
in_channels (int): Number of channels in the input feature map.
feat_channels (int): Number of channels of the feature map.
loss_cls (dict): Config of classification loss.
loss_bbox (dict): Config of localization loss.
""" # noqa: W605
def __init__(self,
class_names,
num_classes,
in_channels,
train_cfg,
test_cfg,
feat_channels=256,
use_direction_classifier=True,
encode_bg_as_zeros=False,
box_code_size=9,
anchor_generator=dict(
type='Anchor3DRangeGenerator',
range=[0, -39.68, -1.78, 69.12, 39.68, -1.78],
strides=[2],
sizes=[[1.6, 3.9, 1.56]],
rotations=[0, 1.57],
custom_values=[0, 0],
reshape_out=True,
),
assigner_per_size=False,
assign_per_class=False,
diff_rad_by_sin=True,
dir_offset=0,
dir_limit_offset=1,
bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
loss_cls=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
loss_weight=1.0),
loss_bbox=dict(
type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0),
loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)):
super().__init__(class_names, in_channels, train_cfg, test_cfg,
feat_channels, use_direction_classifier,
encode_bg_as_zeros, box_code_size, anchor_generator,
assigner_per_size, assign_per_class, diff_rad_by_sin,
dir_offset, dir_limit_offset, bbox_coder, loss_cls,
loss_bbox, loss_dir)
self.num_classes = num_classes
# build head layers & losses
if not self.use_sigmoid_cls:
self.num_classes += 1
self._init_layers()
def init_weights(self):
# pass
# use the initialization when ready
bias_cls = bias_init_with_prob(0.01)
normal_init(self.conv_cls, std=0.01, bias=bias_cls)
normal_init(self.conv_reg, std=0.01)
@staticmethod
def add_sin_difference(boxes1, boxes2):
# Caution: the 7th dim is the rotation, (last dim without velo)
rad_pred_encoding = torch.sin(boxes1[..., 6:7]) * torch.cos(
boxes2[..., 6:7])
rad_tg_encoding = torch.cos(boxes1[..., 6:7]) * torch.sin(boxes2[...,
6:7])
boxes1 = torch.cat(
[boxes1[..., :6], rad_pred_encoding, boxes1[..., 7:]], dim=-1)
boxes2 = torch.cat([boxes2[..., :6], rad_tg_encoding, boxes2[..., 7:]],
dim=-1)
return boxes1, boxes2
def get_bboxes_single(self,
cls_scores,
bbox_preds,
dir_cls_preds,
mlvl_anchors,
input_meta,
rescale=False):
assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors)
mlvl_bboxes = []
mlvl_scores = []
mlvl_dir_scores = []
for cls_score, bbox_pred, dir_cls_pred, anchors in zip(
cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors):
assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:]
dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2)
dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1]
cls_score = cls_score.permute(1, 2,
0).reshape(-1, self.num_classes)
if self.use_sigmoid_cls:
scores = cls_score.sigmoid()
else:
scores = cls_score.softmax(-1)
bbox_pred = bbox_pred.permute(1, 2,
0).reshape(-1, self.box_code_size)
nms_pre = self.test_cfg.get('nms_pre', -1)
if nms_pre > 0 and scores.shape[0] > nms_pre:
if self.use_sigmoid_cls:
max_scores, _ = scores.max(dim=1)
else:
max_scores, _ = scores[:, :-1].max(dim=1)
_, topk_inds = max_scores.topk(nms_pre)
anchors = anchors[topk_inds, :]
bbox_pred = bbox_pred[topk_inds, :]
scores = scores[topk_inds, :]
dir_cls_score = dir_cls_score[topk_inds]
bboxes = self.bbox_coder.decode(anchors, bbox_pred)
mlvl_bboxes.append(bboxes)
mlvl_scores.append(scores)
mlvl_dir_scores.append(dir_cls_score)
mlvl_bboxes = torch.cat(mlvl_bboxes)
mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes)
mlvl_scores = torch.cat(mlvl_scores)
mlvl_dir_scores = torch.cat(mlvl_dir_scores)
if self.use_sigmoid_cls:
# Add a dummy background class to the front when using sigmoid
padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1)
mlvl_scores = torch.cat([mlvl_scores, padding], dim=1)
score_thr = self.test_cfg.get('score_thr', 0)
result = self.multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms,
mlvl_scores, mlvl_dir_scores, score_thr,
self.test_cfg.max_per_img)
result.update(dict(sample_idx=input_meta['sample_idx']))
return result
def multiclass_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms, mlvl_scores,
mlvl_dir_scores, score_thr, max_num):
# do multi class nms
# the fg class id range: [0, num_classes-1]
num_classes = mlvl_scores.shape[1] - 1
bboxes = []
scores = []
labels = []
dir_scores = []
for i in range(0, num_classes):
# get bboxes and scores of this class
cls_inds = mlvl_scores[:, i] > score_thr
if not cls_inds.any():
continue
_scores = mlvl_scores[cls_inds, i]
_bboxes_for_nms = mlvl_bboxes_for_nms[cls_inds, :]
if self.test_cfg.use_rotate_nms:
nms_func = nms_gpu
else:
nms_func = nms_normal_gpu
selected = nms_func(_bboxes_for_nms, _scores,
self.test_cfg.nms_thr)
_mlvl_bboxes = mlvl_bboxes[cls_inds, :]
_mlvl_dir_scores = mlvl_dir_scores[cls_inds]
if len(selected) > 0:
bboxes.append(_mlvl_bboxes[selected])
scores.append(_scores[selected])
dir_scores.append(_mlvl_dir_scores[selected])
dir_rot = box_torch_ops.limit_period(
bboxes[-1][..., 6] - self.dir_offset,
self.dir_limit_offset, np.pi)
bboxes[-1][..., 6] = (
dir_rot + self.dir_offset +
np.pi * dir_scores[-1].to(bboxes[-1].dtype))
cls_label = mlvl_bboxes.new_full((len(selected), ),
i,
dtype=torch.long)
labels.append(cls_label)
if bboxes:
bboxes = torch.cat(bboxes, dim=0)
scores = torch.cat(scores, dim=0)
labels = torch.cat(labels, dim=0)
dir_scores = torch.cat(dir_scores, dim=0)
if bboxes.shape[0] > max_num:
_, inds = scores.sort(descending=True)
inds = inds[:max_num]
bboxes = bboxes[inds, :]
labels = labels[inds]
scores = scores[inds]
dir_scores = dir_scores[inds]
return dict(
box3d_lidar=bboxes.cpu(),
scores=scores.cpu(),
label_preds=labels.cpu(),
)
else:
return dict(
box3d_lidar=mlvl_bboxes.new_zeros([0,
self.box_code_size]).cpu(),
scores=mlvl_bboxes.new_zeros([0]).cpu(),
label_preds=mlvl_bboxes.new_zeros([0, 4]).cpu(),
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment