import shapely.geometry import numpy as np import torch import copy def bbox_camera2lidar(bboxes, tr_velo_to_cam, r0_rect): ''' bboxes: shape=(N, 7) tr_velo_to_cam: shape=(4, 4) r0_rect: shape=(4, 4) return: shape=(N, 7) ''' x_size, y_size, z_size = bboxes[:, 3:4], bboxes[:, 4:5], bboxes[:, 5:6] xyz_size = np.concatenate([z_size, x_size, y_size], axis=1) extended_xyz = np.pad( bboxes[:, :3], ((0, 0), (0, 1)), 'constant', constant_values=1.0) rt_mat = np.linalg.inv(r0_rect @ tr_velo_to_cam) xyz = extended_xyz @ rt_mat.T bboxes_lidar = np.concatenate( [xyz[:, :3], xyz_size, bboxes[:, 6:]], axis=1) return np.array(bboxes_lidar, dtype=np.float32) def bbox_lidar2camera(bboxes, tr_velo_to_cam, r0_rect): ''' bboxes: shape=(N, 7) tr_velo_to_cam: shape=(4, 4) r0_rect: shape=(4, 4) return: shape=(N, 7) ''' x_size, y_size, z_size = bboxes[:, 3:4], bboxes[:, 4:5], bboxes[:, 5:6] xyz_size = torch.cat([y_size, z_size, x_size], axis=1) extended_xyz = torch.nn.functional.pad( bboxes[:, :3], (0, 1), 'constant', value=1.0) rt_mat = r0_rect @ tr_velo_to_cam xyz = extended_xyz @ rt_mat.T bboxes_camera = torch.cat([xyz[:, :3], xyz_size, bboxes[:, 6:]], axis=1) return bboxes_camera def bbox3d2corners_camera(bboxes): ''' bboxes: shape=(n, 7) return: shape=(n, 8, 3) z (front) 6 ------ 5 / / | / | / 2 -|---- 1 | / | | | | |o ------> x(right) | 7 -----| 4 | |/ o |/ | 3 ------ 0 | v y(down) ''' centers, dims, angles = bboxes[:, :3], bboxes[:, 3:6], bboxes[:, 6] # 1.generate bbox corner coordinates, clockwise from minimal point bboxes_corners = torch.tensor([[0.5, 0.0, -0.5], [0.5, -1.0, -0.5], [-0.5, -1.0, -0.5], [-0.5, 0.0, -0.5], [0.5, 0.0, 0.5], [0.5, -1.0, 0.5], [-0.5, -1.0, 0.5], [-0.5, 0.0, 0.5]]) # (1, 8, 3) * (n, 1, 3) -> (n, 8, 3) bboxes_corners = bboxes_corners[None, :, :] * dims[:, None, :] # 2. rotate around y axis rot_sin, rot_cos = torch.sin(angles), torch.cos(angles) # in fact, angle rot_mat = torch.stack([torch.stack([rot_cos, torch.zeros_like(rot_cos), rot_sin]), torch.stack([torch.zeros_like(rot_cos), torch.ones_like( rot_cos), torch.zeros_like(rot_cos)]), torch.stack([-rot_sin, torch.zeros_like(rot_cos), rot_cos])]) # (3, 3, n) rot_mat = torch.permute(rot_mat, (2, 1, 0)) # (n, 3, 3) bboxes_corners = bboxes_corners @ rot_mat # (n, 8, 3) # 3. translate to centers bboxes_corners += centers[:, None, :] return bboxes_corners.clone().detach() def points_camera2image(points, P2): ''' points: shape=(N, 8, 3) P2: shape=(4, 4) return: shape=(N, 8, 2) ''' extended_points = torch.nn.functional.pad( points, (0, 1), 'constant', value=1.0) # (n, 8, 4) image_points = extended_points @ P2.T # (N, 8, 4) image_points = image_points[:, :, :2] / image_points[:, :, 2:3] return image_points.clone().detach() def keep_bbox_from_image_range( result, calib_info, num_images, image_info, cam_sync=False): r0_rect = calib_info['R0_rect'] lidar_bboxes = result['lidar_bboxes'] labels = result['labels'] scores = result['scores'] total_keep_flag = torch.zeros(lidar_bboxes.size(dim=0)).bool() for i in range(num_images): h, w = image_info['camera'][i]['image_shape'] tr_velo_to_cam = calib_info['Tr_velo_to_cam_' + str(i)] P = calib_info['P' + str(i)] camera_bboxes = bbox_lidar2camera( lidar_bboxes, tr_velo_to_cam, r0_rect) # (n, 7) if i == 0: main_camera_bboxes = camera_bboxes.clone() bboxes_points = bbox3d2corners_camera(camera_bboxes) # (n, 8, 3) image_points = points_camera2image(bboxes_points, P) # (n, 8, 2) image_x1y1 = torch.min(image_points, axis=1)[0] # (n, 2) image_x1y1 = torch.maximum(image_x1y1, torch.tensor(0)) image_x2y2 = torch.max(image_points, axis=1)[0] # (n, 2) image_x2y2 = torch.minimum(image_x2y2, torch.tensor([w, h])) bboxes2d = torch.cat([image_x1y1, image_x2y2], axis=-1) keep_flag = (image_x1y1[:, 0] < w) & (image_x1y1[:, 1] < h) & ( image_x2y2[:, 0] > 0) & (image_x2y2[:, 1] > 0) & (camera_bboxes[:, 2] > 0) total_keep_flag = total_keep_flag | keep_flag if cam_sync: result = { 'lidar_bboxes': lidar_bboxes[total_keep_flag], 'labels': labels[total_keep_flag], 'scores': scores[total_keep_flag], 'bboxes2d': bboxes2d[total_keep_flag], 'camera_bboxes': main_camera_bboxes[total_keep_flag] } else: result = { 'lidar_bboxes': lidar_bboxes, 'labels': labels, 'scores': scores, 'bboxes2d': bboxes2d, 'camera_bboxes': main_camera_bboxes } return result def limit_period(val, offset=0.5, period=np.pi): """ val: array or float offset: float period: float return: Value in the range of [-offset * period, (1-offset) * period] """ limited_val = val - np.floor(val / period + offset) * period return limited_val def iou2d(bboxes1, bboxes2, metric=0): ''' bboxes1: (n, 4), (x1, y1, x2, y2) bboxes2: (m, 4), (x1, y1, x2, y2) return: (n, m) ''' rows = len(bboxes1) cols = len(bboxes2) if rows * cols == 0: return torch.empty((rows, cols)) bboxes_x1 = torch.maximum( bboxes1[:, 0][:, None], bboxes2[:, 0][None, :]) # (n, m) bboxes_y1 = torch.maximum( bboxes1[:, 1][:, None], bboxes2[:, 1][None, :]) # (n, m) bboxes_x2 = torch.minimum(bboxes1[:, 2][:, None], bboxes2[:, 2][None, :]) bboxes_y2 = torch.minimum(bboxes1[:, 3][:, None], bboxes2[:, 3][None, :]) bboxes_w = torch.clamp(bboxes_x2 - bboxes_x1, min=0) bboxes_h = torch.clamp(bboxes_y2 - bboxes_y1, min=0) iou_area = bboxes_w * bboxes_h # (n, m) bboxes1_wh = bboxes1[:, 2:] - bboxes1[:, :2] area1 = bboxes1_wh[:, 0] * bboxes1_wh[:, 1] # (n, ) bboxes2_wh = bboxes2[:, 2:] - bboxes2[:, :2] area2 = bboxes2_wh[:, 0] * bboxes2_wh[:, 1] # (m, ) if metric == 0: iou = iou_area / (area1[:, None] + area2[None, :] - iou_area + 1e-8) elif metric == 1: iou = iou_area / (area1[:, None] + 1e-8) return iou def nearest_bev(bboxes): ''' bboxes: (n, 7), (x, y, z, w, l, h, theta) return: (n, 4), (x1, y1, x2, y2) ''' bboxes_bev = copy.deepcopy(bboxes[:, [0, 1, 3, 4]]) bboxes_angle = limit_period( bboxes[:, 6].cpu(), offset=0.5, period=np.pi).to(bboxes_bev) bboxes_bev = torch.where(torch.abs( bboxes_angle[:, None]) > np.pi / 4, bboxes_bev[:, [0, 1, 3, 2]], bboxes_bev) bboxes_xy = bboxes_bev[:, :2] bboxes_wl = bboxes_bev[:, 2:] bboxes_bev_x1y1x2y2 = torch.cat( [bboxes_xy - bboxes_wl / 2, bboxes_xy + bboxes_wl / 2], dim=-1) return bboxes_bev_x1y1x2y2 def iou2d_nearest(bboxes1, bboxes2): ''' bboxes1: (n, 7), (x, y, z, w, l, h, theta) bboxes2: (m, 7), return: (n, m) ''' bboxes1_bev = nearest_bev(bboxes1) bboxes2_bev = nearest_bev(bboxes2) iou = iou2d(bboxes1_bev, bboxes2_bev) return iou def limit_period(val, offset=0.5, period=np.pi): """ val: array or float offset: float period: float return: Value in the range of [-offset * period, (1-offset) * period] """ limited_val = val - np.floor(val / period + offset) * period return limited_val def iou3d_camera(bboxes1, bboxes2): ''' bboxes1: (n, 7), (x, y, z, w, l, h, theta) bboxes2: (m, 7) return: (n, m) ''' rows = len(bboxes1) cols = len(bboxes2) if rows * cols == 0: return torch.empty((rows, cols)) # 1. height overlap bboxes1_bottom, bboxes2_bottom = bboxes1[:, 1] - \ bboxes1[:, 4], bboxes2[:, 1] - bboxes2[:, 4] # (n, ), (m, ) bboxes1_top, bboxes2_top = bboxes1[:, 1], bboxes2[:, 1] # (n, ), (m, ) bboxes_bottom = torch.maximum( bboxes1_bottom[:, None], bboxes2_bottom[None, :]) # (n, m) bboxes_top = torch.minimum(bboxes1_top[:, None], bboxes2_top[None, :]) height_overlap = torch.clamp(bboxes_top - bboxes_bottom, min=0) # 2. bev overlap bboxes1_x1y1 = bboxes1[:, [0, 2]] - bboxes1[:, [3, 5]] / 2 bboxes1_x2y2 = bboxes1[:, [0, 2]] + bboxes1[:, [3, 5]] / 2 bboxes2_x1y1 = bboxes2[:, [0, 2]] - bboxes2[:, [3, 5]] / 2 bboxes2_x2y2 = bboxes2[:, [0, 2]] + bboxes2[:, [3, 5]] / 2 bboxes1_bev = torch.cat( [bboxes1_x1y1, bboxes1_x2y2, bboxes1[:, 6:]], dim=-1) bboxes2_bev = torch.cat( [bboxes2_x1y1, bboxes2_x2y2, bboxes2[:, 6:]], dim=-1) bev_overlap = ( rotated_box_iou( bboxes1_bev, bboxes2_bev)).to( device=height_overlap.device) # (n, m) # 3. overlap and volume overlap = height_overlap * bev_overlap volume1 = bboxes1[:, 3] * bboxes1[:, 4] * bboxes1[:, 5] volume2 = bboxes2[:, 3] * bboxes2[:, 4] * bboxes2[:, 5] volume = volume1[:, None] + volume2[None, :] # (n, m) # 4. iou iou = overlap / (volume - overlap + 1e-8) return iou def boxes_overlap_bev(boxes_a, boxes_b): """Calculate boxes Overlap in the bird view. Args: boxes_a (torch.Tensor): Input boxes a with shape (M, 5). boxes_b (torch.Tensor): Input boxes b with shape (N, 5). Returns: ans_overlap (torch.Tensor): Overlap result with shape (M, N). """ ans_overlap = boxes_a.new_zeros( torch.Size((boxes_a.shape[0], boxes_b.shape[0]))) if ans_overlap.size(0) * ans_overlap.size(1) == 0: return ans_overlap boxes_overlap_bev_gpu( boxes_a.contiguous(), boxes_b.contiguous(), ans_overlap) return ans_overlap def rotated_box_iou(boxes1, boxes2): """ Calculates IoU for rotated bounding boxes. Args: boxes1 (torch.Tensor): Tensor of shape (N, 5) representing rotated boxes in format (x_center, y_center, width, height, angle). boxes2 (torch.Tensor): Tensor of shape (M, 5) representing rotated boxes in the same format. Returns: torch.Tensor: IoU matrix of shape (N, M). """ # Convert boxes to polygons polygons1 = boxes_to_polygons(boxes1) polygons2 = boxes_to_polygons(boxes2) # Calculate IoU for each pair of polygons ious = torch.zeros((boxes1.shape[0], boxes2.shape[0])) overlaps = torch.zeros((boxes1.shape[0], boxes2.shape[0])) for i in range(boxes1.shape[0]): for j in range(boxes2.shape[0]): intersection = polygon_intersection(polygons1[i], polygons2[j]) union = polygon_union(polygons1[i], polygons2[j]) ious[i, j] = intersection / union overlaps[i, j] = intersection return overlaps def boxes_to_polygons(boxes): # Implementation to convert boxes to polygons polygons = [] for box in boxes: x_min = box[0] y_min = box[1] x_max = box[2] y_max = box[3] polygon = shapely.geometry.Polygon( [(x_min, y_min), (x_max, y_min), (x_max, y_max), (x_min, y_max)]) polygon = shapely.affinity.rotate( polygon, -1 * box[4], use_radians=True) polygons.append(polygon) return polygons def polygon_intersection(polygon1, polygon2): return shapely.intersection(polygon1, polygon2).area def polygon_union(polygon1, polygon2): # Implementation to calculate union area of polygons return shapely.union(polygon1, polygon2).area