process.py 11.6 KB
Newer Older
yangzhong's avatar
yangzhong committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
import shapely.geometry
import numpy as np
import torch
import copy


def bbox_camera2lidar(bboxes, tr_velo_to_cam, r0_rect):
    '''
    bboxes: shape=(N, 7)
    tr_velo_to_cam: shape=(4, 4)
    r0_rect: shape=(4, 4)
    return: shape=(N, 7)
    '''
    x_size, y_size, z_size = bboxes[:, 3:4], bboxes[:, 4:5], bboxes[:, 5:6]
    xyz_size = np.concatenate([z_size, x_size, y_size], axis=1)
    extended_xyz = np.pad(
        bboxes[:, :3], ((0, 0), (0, 1)), 'constant', constant_values=1.0)
    rt_mat = np.linalg.inv(r0_rect @ tr_velo_to_cam)
    xyz = extended_xyz @ rt_mat.T
    bboxes_lidar = np.concatenate(
        [xyz[:, :3], xyz_size, bboxes[:, 6:]], axis=1)
    return np.array(bboxes_lidar, dtype=np.float32)


def bbox_lidar2camera(bboxes, tr_velo_to_cam, r0_rect):
    '''
    bboxes: shape=(N, 7)
    tr_velo_to_cam: shape=(4, 4)
    r0_rect: shape=(4, 4)
    return: shape=(N, 7)
    '''
    x_size, y_size, z_size = bboxes[:, 3:4], bboxes[:, 4:5], bboxes[:, 5:6]
    xyz_size = torch.cat([y_size, z_size, x_size], axis=1)
    extended_xyz = torch.nn.functional.pad(
        bboxes[:, :3], (0, 1), 'constant', value=1.0)
    rt_mat = r0_rect @ tr_velo_to_cam
    xyz = extended_xyz @ rt_mat.T
    bboxes_camera = torch.cat([xyz[:, :3], xyz_size, bboxes[:, 6:]], axis=1)
    return bboxes_camera


def bbox3d2corners_camera(bboxes):
    '''
    bboxes: shape=(n, 7)
    return: shape=(n, 8, 3)
        z (front)            6 ------ 5
        /                  / |     / |
       /                  2 -|---- 1 |
      /                   |  |     | |
    |o ------> x(right)   | 7 -----| 4
    |                     |/   o   |/
    |                     3 ------ 0
    |
    v y(down)
    '''
    centers, dims, angles = bboxes[:, :3], bboxes[:, 3:6], bboxes[:, 6]

    # 1.generate bbox corner coordinates, clockwise from minimal point
    bboxes_corners = torch.tensor([[0.5, 0.0, -0.5], [0.5, -1.0, -0.5], [-0.5, -1.0, -0.5], [-0.5, 0.0, -0.5],
                                   [0.5, 0.0, 0.5], [0.5, -1.0, 0.5], [-0.5, -1.0, 0.5], [-0.5, 0.0, 0.5]])
    # (1, 8, 3) * (n, 1, 3) -> (n, 8, 3)
    bboxes_corners = bboxes_corners[None, :, :] * dims[:, None, :]

    # 2. rotate around y axis
    rot_sin, rot_cos = torch.sin(angles), torch.cos(angles)
    # in fact, angle
    rot_mat = torch.stack([torch.stack([rot_cos, torch.zeros_like(rot_cos), rot_sin]),
                           torch.stack([torch.zeros_like(rot_cos), torch.ones_like(
                               rot_cos), torch.zeros_like(rot_cos)]),
                           torch.stack([-rot_sin, torch.zeros_like(rot_cos), rot_cos])])  # (3, 3, n)
    rot_mat = torch.permute(rot_mat, (2, 1, 0))  # (n, 3, 3)
    bboxes_corners = bboxes_corners @ rot_mat  # (n, 8, 3)

    # 3. translate to centers
    bboxes_corners += centers[:, None, :]
    return bboxes_corners.clone().detach()


def points_camera2image(points, P2):
    '''
    points: shape=(N, 8, 3)
    P2: shape=(4, 4)
    return: shape=(N, 8, 2)
    '''
    extended_points = torch.nn.functional.pad(
        points, (0, 1), 'constant', value=1.0)  # (n, 8, 4)
    image_points = extended_points @ P2.T  # (N, 8, 4)
    image_points = image_points[:, :, :2] / image_points[:, :, 2:3]
    return image_points.clone().detach()


def keep_bbox_from_image_range(
        result, calib_info, num_images, image_info, cam_sync=False):
    r0_rect = calib_info['R0_rect']
    lidar_bboxes = result['lidar_bboxes']
    labels = result['labels']
    scores = result['scores']
    total_keep_flag = torch.zeros(lidar_bboxes.size(dim=0)).bool()
    for i in range(num_images):
        h, w = image_info['camera'][i]['image_shape']
        tr_velo_to_cam = calib_info['Tr_velo_to_cam_' + str(i)]
        P = calib_info['P' + str(i)]
        camera_bboxes = bbox_lidar2camera(
            lidar_bboxes, tr_velo_to_cam, r0_rect)  # (n, 7)
        if i == 0:
            main_camera_bboxes = camera_bboxes.clone()
        bboxes_points = bbox3d2corners_camera(camera_bboxes)  # (n, 8, 3)
        image_points = points_camera2image(bboxes_points, P)  # (n, 8, 2)
        image_x1y1 = torch.min(image_points, axis=1)[0]  # (n, 2)
        image_x1y1 = torch.maximum(image_x1y1, torch.tensor(0))
        image_x2y2 = torch.max(image_points, axis=1)[0]  # (n, 2)
        image_x2y2 = torch.minimum(image_x2y2, torch.tensor([w, h]))
        bboxes2d = torch.cat([image_x1y1, image_x2y2], axis=-1)

        keep_flag = (image_x1y1[:, 0] < w) & (image_x1y1[:, 1] < h) & (
            image_x2y2[:, 0] > 0) & (image_x2y2[:, 1] > 0) & (camera_bboxes[:, 2] > 0)
        total_keep_flag = total_keep_flag | keep_flag
    if cam_sync:
        result = {
            'lidar_bboxes': lidar_bboxes[total_keep_flag],
            'labels': labels[total_keep_flag],
            'scores': scores[total_keep_flag],
            'bboxes2d': bboxes2d[total_keep_flag],
            'camera_bboxes': main_camera_bboxes[total_keep_flag]
        }
    else:
        result = {
            'lidar_bboxes': lidar_bboxes,
            'labels': labels,
            'scores': scores,
            'bboxes2d': bboxes2d,
            'camera_bboxes': main_camera_bboxes
        }
    return result


def limit_period(val, offset=0.5, period=np.pi):
    """
    val: array or float
    offset: float
    period: float
    return: Value in the range of [-offset * period, (1-offset) * period]
    """
    limited_val = val - np.floor(val / period + offset) * period
    return limited_val


def iou2d(bboxes1, bboxes2, metric=0):
    '''
    bboxes1: (n, 4), (x1, y1, x2, y2)
    bboxes2: (m, 4), (x1, y1, x2, y2)
    return: (n, m)
    '''
    rows = len(bboxes1)
    cols = len(bboxes2)
    if rows * cols == 0:
        return torch.empty((rows, cols))
    bboxes_x1 = torch.maximum(
        bboxes1[:, 0][:, None], bboxes2[:, 0][None, :])  # (n, m)
    bboxes_y1 = torch.maximum(
        bboxes1[:, 1][:, None], bboxes2[:, 1][None, :])  # (n, m)
    bboxes_x2 = torch.minimum(bboxes1[:, 2][:, None], bboxes2[:, 2][None, :])
    bboxes_y2 = torch.minimum(bboxes1[:, 3][:, None], bboxes2[:, 3][None, :])

    bboxes_w = torch.clamp(bboxes_x2 - bboxes_x1, min=0)
    bboxes_h = torch.clamp(bboxes_y2 - bboxes_y1, min=0)

    iou_area = bboxes_w * bboxes_h  # (n, m)

    bboxes1_wh = bboxes1[:, 2:] - bboxes1[:, :2]
    area1 = bboxes1_wh[:, 0] * bboxes1_wh[:, 1]  # (n, )
    bboxes2_wh = bboxes2[:, 2:] - bboxes2[:, :2]
    area2 = bboxes2_wh[:, 0] * bboxes2_wh[:, 1]  # (m, )
    if metric == 0:
        iou = iou_area / (area1[:, None] + area2[None, :] - iou_area + 1e-8)
    elif metric == 1:
        iou = iou_area / (area1[:, None] + 1e-8)
    return iou


def nearest_bev(bboxes):
    '''
    bboxes: (n, 7), (x, y, z, w, l, h, theta)
    return: (n, 4), (x1, y1, x2, y2)
    '''
    bboxes_bev = copy.deepcopy(bboxes[:, [0, 1, 3, 4]])
    bboxes_angle = limit_period(
        bboxes[:, 6].cpu(), offset=0.5, period=np.pi).to(bboxes_bev)
    bboxes_bev = torch.where(torch.abs(
        bboxes_angle[:, None]) > np.pi / 4, bboxes_bev[:, [0, 1, 3, 2]], bboxes_bev)

    bboxes_xy = bboxes_bev[:, :2]
    bboxes_wl = bboxes_bev[:, 2:]
    bboxes_bev_x1y1x2y2 = torch.cat(
        [bboxes_xy - bboxes_wl / 2, bboxes_xy + bboxes_wl / 2], dim=-1)
    return bboxes_bev_x1y1x2y2


def iou2d_nearest(bboxes1, bboxes2):
    '''
    bboxes1: (n, 7), (x, y, z, w, l, h, theta)
    bboxes2: (m, 7),
    return: (n, m)
    '''
    bboxes1_bev = nearest_bev(bboxes1)
    bboxes2_bev = nearest_bev(bboxes2)
    iou = iou2d(bboxes1_bev, bboxes2_bev)
    return iou


def limit_period(val, offset=0.5, period=np.pi):
    """
    val: array or float
    offset: float
    period: float
    return: Value in the range of [-offset * period, (1-offset) * period]
    """
    limited_val = val - np.floor(val / period + offset) * period
    return limited_val


def iou3d_camera(bboxes1, bboxes2):
    '''
    bboxes1: (n, 7), (x, y, z, w, l, h, theta)
    bboxes2: (m, 7)
    return: (n, m)
    '''
    rows = len(bboxes1)
    cols = len(bboxes2)
    if rows * cols == 0:
        return torch.empty((rows, cols))
    # 1. height overlap
    bboxes1_bottom, bboxes2_bottom = bboxes1[:, 1] - \
        bboxes1[:, 4], bboxes2[:, 1] - bboxes2[:, 4]  # (n, ), (m, )
    bboxes1_top, bboxes2_top = bboxes1[:, 1], bboxes2[:, 1]  # (n, ), (m, )
    bboxes_bottom = torch.maximum(
        bboxes1_bottom[:, None], bboxes2_bottom[None, :])  # (n, m)
    bboxes_top = torch.minimum(bboxes1_top[:, None], bboxes2_top[None, :])
    height_overlap = torch.clamp(bboxes_top - bboxes_bottom, min=0)

    # 2. bev overlap
    bboxes1_x1y1 = bboxes1[:, [0, 2]] - bboxes1[:, [3, 5]] / 2
    bboxes1_x2y2 = bboxes1[:, [0, 2]] + bboxes1[:, [3, 5]] / 2
    bboxes2_x1y1 = bboxes2[:, [0, 2]] - bboxes2[:, [3, 5]] / 2
    bboxes2_x2y2 = bboxes2[:, [0, 2]] + bboxes2[:, [3, 5]] / 2
    bboxes1_bev = torch.cat(
        [bboxes1_x1y1, bboxes1_x2y2, bboxes1[:, 6:]], dim=-1)
    bboxes2_bev = torch.cat(
        [bboxes2_x1y1, bboxes2_x2y2, bboxes2[:, 6:]], dim=-1)
    bev_overlap = (
        rotated_box_iou(
            bboxes1_bev,
            bboxes2_bev)).to(
        device=height_overlap.device)  # (n, m)

    # 3. overlap and volume
    overlap = height_overlap * bev_overlap
    volume1 = bboxes1[:, 3] * bboxes1[:, 4] * bboxes1[:, 5]
    volume2 = bboxes2[:, 3] * bboxes2[:, 4] * bboxes2[:, 5]
    volume = volume1[:, None] + volume2[None, :]  # (n, m)

    # 4. iou
    iou = overlap / (volume - overlap + 1e-8)

    return iou


def boxes_overlap_bev(boxes_a, boxes_b):
    """Calculate boxes Overlap in the bird view.

    Args:
        boxes_a (torch.Tensor): Input boxes a with shape (M, 5).
        boxes_b (torch.Tensor): Input boxes b with shape (N, 5).

    Returns:
        ans_overlap (torch.Tensor): Overlap result with shape (M, N).
    """
    ans_overlap = boxes_a.new_zeros(
        torch.Size((boxes_a.shape[0], boxes_b.shape[0])))
    if ans_overlap.size(0) * ans_overlap.size(1) == 0:
        return ans_overlap
    boxes_overlap_bev_gpu(
        boxes_a.contiguous(),
        boxes_b.contiguous(),
        ans_overlap)

    return ans_overlap


def rotated_box_iou(boxes1, boxes2):
    """
    Calculates IoU for rotated bounding boxes.

    Args:
        boxes1 (torch.Tensor): Tensor of shape (N, 5) representing rotated boxes in format (x_center, y_center, width, height, angle).
        boxes2 (torch.Tensor): Tensor of shape (M, 5) representing rotated boxes in the same format.

    Returns:
        torch.Tensor: IoU matrix of shape (N, M).
    """

    # Convert boxes to polygons
    polygons1 = boxes_to_polygons(boxes1)
    polygons2 = boxes_to_polygons(boxes2)

    # Calculate IoU for each pair of polygons
    ious = torch.zeros((boxes1.shape[0], boxes2.shape[0]))
    overlaps = torch.zeros((boxes1.shape[0], boxes2.shape[0]))
    for i in range(boxes1.shape[0]):
        for j in range(boxes2.shape[0]):
            intersection = polygon_intersection(polygons1[i], polygons2[j])
            union = polygon_union(polygons1[i], polygons2[j])
            ious[i, j] = intersection / union
            overlaps[i, j] = intersection

    return overlaps


def boxes_to_polygons(boxes):
    # Implementation to convert boxes to polygons
    polygons = []
    for box in boxes:
        x_min = box[0]
        y_min = box[1]
        x_max = box[2]
        y_max = box[3]
        polygon = shapely.geometry.Polygon(
            [(x_min, y_min), (x_max, y_min), (x_max, y_max), (x_min, y_max)])
        polygon = shapely.affinity.rotate(
            polygon, -1 * box[4], use_radians=True)
        polygons.append(polygon)
    return polygons


def polygon_intersection(polygon1, polygon2):
    return shapely.intersection(polygon1, polygon2).area


def polygon_union(polygon1, polygon2):
    # Implementation to calculate union area of polygons
    return shapely.union(polygon1, polygon2).area