import mmcv import numpy as np import torch def bbox2delta(proposals, gt, means=[0, 0, 0, 0], stds=[1, 1, 1, 1]): assert proposals.size() == gt.size() proposals = proposals.float() gt = gt.float() px = (proposals[..., 0] + proposals[..., 2]) * 0.5 py = (proposals[..., 1] + proposals[..., 3]) * 0.5 pw = proposals[..., 2] - proposals[..., 0] ph = proposals[..., 3] - proposals[..., 1] gx = (gt[..., 0] + gt[..., 2]) * 0.5 gy = (gt[..., 1] + gt[..., 3]) * 0.5 gw = gt[..., 2] - gt[..., 0] gh = gt[..., 3] - gt[..., 1] dx = (gx - px) / pw dy = (gy - py) / ph dw = torch.log(gw / pw) dh = torch.log(gh / ph) deltas = torch.stack([dx, dy, dw, dh], dim=-1) means = deltas.new_tensor(means).unsqueeze(0) stds = deltas.new_tensor(stds).unsqueeze(0) deltas = deltas.sub_(means).div_(stds) return deltas def delta2bbox(rois, deltas, means=[0, 0, 0, 0], stds=[1, 1, 1, 1], max_shape=None, wh_ratio_clip=16 / 1000): """ Apply deltas to shift/scale base boxes. Typically the rois are anchor or proposed bounding boxes and the deltas are network outputs used to shift/scale those boxes. Args: rois (Tensor): boxes to be transformed. Has shape (N, 4) deltas (Tensor): encoded offsets with respect to each roi. Has shape (N, 4). Note N = num_anchors * W * H when rois is a grid of anchors. Offset encoding follows [1]_. means (list): denormalizing means for delta coordinates stds (list): denormalizing standard deviation for delta coordinates max_shape (tuple[int, int]): maximum bounds for boxes. specifies (H, W) wh_ratio_clip (float): maximum aspect ratio for boxes. Returns: Tensor: boxes with shape (N, 4), where columns represent tl_x, tl_y, br_x, br_y. References: .. [1] https://arxiv.org/abs/1311.2524 Example: >>> rois = torch.Tensor([[ 0., 0., 1., 1.], >>> [ 0., 0., 1., 1.], >>> [ 0., 0., 1., 1.], >>> [ 5., 5., 5., 5.]]) >>> deltas = torch.Tensor([[ 0., 0., 0., 0.], >>> [ 1., 1., 1., 1.], >>> [ 0., 0., 2., -1.], >>> [ 0.7, -1.9, -0.5, 0.3]]) >>> delta2bbox(rois, deltas, max_shape=(32, 32)) tensor([[0.0000, 0.0000, 1.0000, 1.0000], [0.2817, 0.2817, 4.7183, 4.7183], [0.0000, 0.6321, 7.3891, 0.3679], [5.8967, 2.9251, 5.5033, 3.2749]]) """ means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4) stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4) denorm_deltas = deltas * stds + means dx = denorm_deltas[:, 0::4] dy = denorm_deltas[:, 1::4] dw = denorm_deltas[:, 2::4] dh = denorm_deltas[:, 3::4] max_ratio = np.abs(np.log(wh_ratio_clip)) dw = dw.clamp(min=-max_ratio, max=max_ratio) dh = dh.clamp(min=-max_ratio, max=max_ratio) # Compute center of each roi px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx) py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy) # Compute width/height of each roi pw = (rois[:, 2] - rois[:, 0]).unsqueeze(1).expand_as(dw) ph = (rois[:, 3] - rois[:, 1]).unsqueeze(1).expand_as(dh) # Use exp(network energy) to enlarge/shrink each roi gw = pw * dw.exp() gh = ph * dh.exp() # Use network energy to shift the center of each roi gx = torch.addcmul(px, 1, pw, dx) # gx = px + pw * dx gy = torch.addcmul(py, 1, ph, dy) # gy = py + ph * dy # Convert center-xy/width/height to top-left, bottom-right x1 = gx - gw * 0.5 y1 = gy - gh * 0.5 x2 = gx + gw * 0.5 y2 = gy + gh * 0.5 if max_shape is not None: x1 = x1.clamp(min=0, max=max_shape[1]) y1 = y1.clamp(min=0, max=max_shape[0]) x2 = x2.clamp(min=0, max=max_shape[1]) y2 = y2.clamp(min=0, max=max_shape[0]) bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas) return bboxes def bbox_flip(bboxes, img_shape): """Flip bboxes horizontally. Args: bboxes(Tensor or ndarray): Shape (..., 4*k) img_shape(tuple): Image shape. Returns: Same type as `bboxes`: Flipped bboxes. """ if isinstance(bboxes, torch.Tensor): assert bboxes.shape[-1] % 4 == 0 flipped = bboxes.clone() flipped[:, 0::4] = img_shape[1] - bboxes[:, 2::4] flipped[:, 2::4] = img_shape[1] - bboxes[:, 0::4] return flipped elif isinstance(bboxes, np.ndarray): return mmcv.bbox_flip(bboxes, img_shape) def bbox_mapping(bboxes, img_shape, scale_factor, flip): """Map bboxes from the original image scale to testing scale""" new_bboxes = bboxes * scale_factor if flip: new_bboxes = bbox_flip(new_bboxes, img_shape) return new_bboxes def bbox_mapping_back(bboxes, img_shape, scale_factor, flip): """Map bboxes from testing scale to original image scale""" new_bboxes = bbox_flip(bboxes, img_shape) if flip else bboxes new_bboxes = new_bboxes / scale_factor return new_bboxes def bbox2roi(bbox_list): """Convert a list of bboxes to roi format. Args: bbox_list (list[Tensor]): a list of bboxes corresponding to a batch of images. Returns: Tensor: shape (n, 5), [batch_ind, x1, y1, x2, y2] """ rois_list = [] for img_id, bboxes in enumerate(bbox_list): if bboxes.size(0) > 0: img_inds = bboxes.new_full((bboxes.size(0), 1), img_id) rois = torch.cat([img_inds, bboxes[:, :4]], dim=-1) else: rois = bboxes.new_zeros((0, 5)) rois_list.append(rois) rois = torch.cat(rois_list, 0) return rois def roi2bbox(rois): bbox_list = [] img_ids = torch.unique(rois[:, 0].cpu(), sorted=True) for img_id in img_ids: inds = (rois[:, 0] == img_id.item()) bbox = rois[inds, 1:] bbox_list.append(bbox) return bbox_list def bbox2result_coco(bboxes, labels, num_classes): """Convert detection results to a list of numpy arrays. Args: bboxes (Tensor): shape (n, 5) labels (Tensor): shape (n, ) num_classes (int): class number, including background class Returns: list(ndarray): bbox results of each class """ if bboxes.shape[0] == 0: return [np.zeros((0, 5), dtype=np.float32) for i in range(num_classes)] else: bboxes = bboxes.cpu().numpy() labels = labels.cpu().numpy() return [bboxes[labels == i, :] for i in range(num_classes)] def distance2bbox(points, distance, max_shape=None): """Decode distance prediction to bounding box. Args: points (Tensor): Shape (n, 2), [x, y]. distance (Tensor): Distance from the given point to 4 boundaries (left, top, right, bottom). max_shape (tuple): Shape of the image. Returns: Tensor: Decoded bboxes. """ x1 = points[:, 0] - distance[:, 0] y1 = points[:, 1] - distance[:, 1] x2 = points[:, 0] + distance[:, 2] y2 = points[:, 1] + distance[:, 3] if max_shape is not None: x1 = x1.clamp(min=0, max=max_shape[1]) y1 = y1.clamp(min=0, max=max_shape[0]) x2 = x2.clamp(min=0, max=max_shape[1]) y2 = y2.clamp(min=0, max=max_shape[0]) return torch.stack([x1, y1, x2, y2], -1) def transform_lidar_to_cam(boxes_lidar): """ Only transform format, not exactly in camera coords :param boxes_lidar: (N, 3 or 7) [x, y, z, w, l, h, ry] in LiDAR coords :return: boxes_cam: (N, 3 or 7) [x, y, z, h, w, l, ry] in camera coords """ # boxes_cam = boxes_lidar.new_tensor(boxes_lidar.data) boxes_cam = boxes_lidar.clone().detach() boxes_cam[:, 0] = -boxes_lidar[:, 1] boxes_cam[:, 1] = -boxes_lidar[:, 2] boxes_cam[:, 2] = boxes_lidar[:, 0] if boxes_cam.shape[1] > 3: boxes_cam[:, [3, 4, 5]] = boxes_lidar[:, [5, 3, 4]] return boxes_cam def boxes3d_to_bev_torch(boxes3d): """ :param boxes3d: (N, 7) [x, y, z, h, w, l, ry] in camera coords :return: boxes_bev: (N, 5) [x1, y1, x2, y2, ry] """ boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5))) cu, cv = boxes3d[:, 0], boxes3d[:, 2] half_l, half_w = boxes3d[:, 5] / 2, boxes3d[:, 4] / 2 boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_l, cv - half_w boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_l, cv + half_w boxes_bev[:, 4] = boxes3d[:, 6] return boxes_bev def boxes3d_to_bev_torch_lidar(boxes3d): """ :param boxes3d: (N, 7) [x, y, z, w, l, h, ry] in LiDAR coords :return: boxes_bev: (N, 5) [x1, y1, x2, y2, ry] """ boxes_bev = boxes3d.new(torch.Size((boxes3d.shape[0], 5))) cu, cv = boxes3d[:, 0], boxes3d[:, 1] half_l, half_w = boxes3d[:, 4] / 2, boxes3d[:, 3] / 2 boxes_bev[:, 0], boxes_bev[:, 1] = cu - half_w, cv - half_l boxes_bev[:, 2], boxes_bev[:, 3] = cu + half_w, cv + half_l boxes_bev[:, 4] = boxes3d[:, 6] return boxes_bev