import numpy as np import torch from mmcv.cnn import normal_init from mmdet3d.core import box_torch_ops, boxes3d_to_bev_torch_lidar from mmdet3d.ops.iou3d.iou3d_utils import nms_gpu, nms_normal_gpu from ..registry import HEADS from ..utils import bias_init_with_prob from .second_head import SECONDHead @HEADS.register_module class Anchor3DVeloHead(SECONDHead): """Anchor-based head for 3D anchor with velocity Args: in_channels (int): Number of channels in the input feature map. feat_channels (int): Number of channels of the feature map. anchor_scales (Iterable): Anchor scales. anchor_ratios (Iterable): Anchor aspect ratios. anchor_strides (Iterable): Anchor strides. anchor_base_sizes (Iterable): Anchor base sizes. target_means (Iterable): Mean values of regression targets. target_stds (Iterable): Std values of regression targets. loss_cls (dict): Config of classification loss. loss_bbox (dict): Config of localization loss. """ # noqa: W605 def __init__(self, class_names, num_classes, in_channels, train_cfg, test_cfg, cache_anchor=False, feat_channels=256, use_direction_classifier=True, encode_bg_as_zeros=False, box_code_size=9, anchor_generator=dict(type='AnchorGeneratorRange', ), anchor_range=[0, -39.68, -1.78, 69.12, 39.68, -1.78], anchor_strides=[2], anchor_sizes=[[1.6, 3.9, 1.56]], anchor_rotations=[0, 1.57], anchor_custom_values=[0, 0], assigner_per_size=False, assign_per_class=False, diff_rad_by_sin=True, dir_offset=0, dir_limit_offset=1, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0), bbox_coder=dict(type='ResidualCoder', ), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_dir=dict(type='CrossEntropyLoss', loss_weight=0.2)): super().__init__(class_names, in_channels, train_cfg, test_cfg, cache_anchor, feat_channels, use_direction_classifier, encode_bg_as_zeros, box_code_size, anchor_generator, anchor_range, anchor_strides, anchor_sizes, anchor_rotations, anchor_custom_values, assigner_per_size, assign_per_class, diff_rad_by_sin, dir_offset, dir_limit_offset, target_means, target_stds, bbox_coder, loss_cls, loss_bbox, loss_dir) self.num_classes = num_classes # build head layers & losses if not self.use_sigmoid_cls: self.num_classes += 1 self._init_layers() def init_weights(self): # pass # use the initialization when ready bias_cls = bias_init_with_prob(0.01) normal_init(self.conv_cls, std=0.01, bias=bias_cls) normal_init(self.conv_reg, std=0.01) @staticmethod def add_sin_difference(boxes1, boxes2): # Caution: the 7th dim is the rotation, (last dim without velo) rad_pred_encoding = torch.sin(boxes1[..., 6:7]) * torch.cos( boxes2[..., 6:7]) rad_tg_encoding = torch.cos(boxes1[..., 6:7]) * torch.sin(boxes2[..., 6:7]) boxes1 = torch.cat( [boxes1[..., :6], rad_pred_encoding, boxes1[..., 7:]], dim=-1) boxes2 = torch.cat([boxes2[..., :6], rad_tg_encoding, boxes2[..., 7:]], dim=-1) return boxes1, boxes2 def get_bboxes_single(self, cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors, input_meta, rescale=False): assert len(cls_scores) == len(bbox_preds) == len(mlvl_anchors) mlvl_bboxes = [] mlvl_scores = [] mlvl_dir_scores = [] for cls_score, bbox_pred, dir_cls_pred, anchors in zip( cls_scores, bbox_preds, dir_cls_preds, mlvl_anchors): assert cls_score.size()[-2:] == bbox_pred.size()[-2:] assert cls_score.size()[-2:] == dir_cls_pred.size()[-2:] dir_cls_pred = dir_cls_pred.permute(1, 2, 0).reshape(-1, 2) dir_cls_score = torch.max(dir_cls_pred, dim=-1)[1] cls_score = cls_score.permute(1, 2, 0).reshape(-1, self.num_classes) if self.use_sigmoid_cls: scores = cls_score.sigmoid() else: scores = cls_score.softmax(-1) bbox_pred = bbox_pred.permute(1, 2, 0).reshape(-1, self.box_code_size) nms_pre = self.test_cfg.get('nms_pre', -1) if nms_pre > 0 and scores.shape[0] > nms_pre: if self.use_sigmoid_cls: max_scores, _ = scores.max(dim=1) else: max_scores, _ = scores[:, :-1].max(dim=1) _, topk_inds = max_scores.topk(nms_pre) anchors = anchors[topk_inds, :] bbox_pred = bbox_pred[topk_inds, :] scores = scores[topk_inds, :] dir_cls_score = dir_cls_score[topk_inds] bboxes = self.bbox_coder.decode_torch(anchors, bbox_pred, self.target_means, self.target_stds) mlvl_bboxes.append(bboxes) mlvl_scores.append(scores) mlvl_dir_scores.append(dir_cls_score) mlvl_bboxes = torch.cat(mlvl_bboxes) mlvl_bboxes_for_nms = boxes3d_to_bev_torch_lidar(mlvl_bboxes) mlvl_scores = torch.cat(mlvl_scores) mlvl_dir_scores = torch.cat(mlvl_dir_scores) if self.use_sigmoid_cls: # Add a dummy background class to the front when using sigmoid padding = mlvl_scores.new_zeros(mlvl_scores.shape[0], 1) mlvl_scores = torch.cat([mlvl_scores, padding], dim=1) score_thr = self.test_cfg.get('score_thr', 0) result = self.multiclass_nms(mlvl_bboxes, mlvl_bboxes_for_nms, mlvl_scores, mlvl_dir_scores, score_thr, self.test_cfg.max_per_img) result.update(dict(sample_idx=input_meta['sample_idx'])) return result def multiclass_nms(self, mlvl_bboxes, mlvl_bboxes_for_nms, mlvl_scores, mlvl_dir_scores, score_thr, max_num): # do multi class nms # the fg class id range: [0, num_classes-1] num_classes = mlvl_scores.shape[1] - 1 bboxes = [] scores = [] labels = [] dir_scores = [] for i in range(0, num_classes): # get bboxes and scores of this class cls_inds = mlvl_scores[:, i] > score_thr if not cls_inds.any(): continue _scores = mlvl_scores[cls_inds, i] _bboxes_for_nms = mlvl_bboxes_for_nms[cls_inds, :] if self.test_cfg.use_rotate_nms: nms_func = nms_gpu else: nms_func = nms_normal_gpu selected = nms_func(_bboxes_for_nms, _scores, self.test_cfg.nms_thr) _mlvl_bboxes = mlvl_bboxes[cls_inds, :] _mlvl_dir_scores = mlvl_dir_scores[cls_inds] if len(selected) > 0: bboxes.append(_mlvl_bboxes[selected]) scores.append(_scores[selected]) dir_scores.append(_mlvl_dir_scores[selected]) dir_rot = box_torch_ops.limit_period( bboxes[-1][..., 6] - self.dir_offset, self.dir_limit_offset, np.pi) bboxes[-1][..., 6] = ( dir_rot + self.dir_offset + np.pi * dir_scores[-1].to(bboxes[-1].dtype)) cls_label = mlvl_bboxes.new_full((len(selected), ), i, dtype=torch.long) labels.append(cls_label) if bboxes: bboxes = torch.cat(bboxes, dim=0) scores = torch.cat(scores, dim=0) labels = torch.cat(labels, dim=0) dir_scores = torch.cat(dir_scores, dim=0) if bboxes.shape[0] > max_num: _, inds = scores.sort(descending=True) inds = inds[:max_num] bboxes = bboxes[inds, :] labels = labels[inds] scores = scores[inds] dir_scores = dir_scores[inds] return dict( box3d_lidar=bboxes.cpu(), scores=scores.cpu(), label_preds=labels.cpu(), ) else: return dict( box3d_lidar=mlvl_bboxes.new_zeros([0, self.box_code_size]).cpu(), scores=mlvl_bboxes.new_zeros([0]).cpu(), label_preds=mlvl_bboxes.new_zeros([0, 4]).cpu(), )