utils.py 12.7 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
2
from logging import warning
3
from typing import Tuple, Union
4

5
6
import numpy as np
import torch
7
from torch import Tensor
8

9
from mmdet3d.utils import array_converter
10

11
12

@array_converter(apply_to=('val', ))
13
14
15
def limit_period(val: Union[np.ndarray, Tensor],
                 offset: float = 0.5,
                 period: float = np.pi) -> Union[np.ndarray, Tensor]:
zhangwenwei's avatar
zhangwenwei committed
16
17
18
    """Limit the value into a period for periodic function.

    Args:
19
20
21
        val (np.ndarray or Tensor): The value to be converted.
        offset (float): Offset to set the value range. Defaults to 0.5.
        period (float): Period of the value. Defaults to np.pi.
zhangwenwei's avatar
zhangwenwei committed
22
23

    Returns:
24
25
        np.ndarray or Tensor: Value in the range of
        [-offset * period, (1-offset) * period].
zhangwenwei's avatar
zhangwenwei committed
26
    """
27
28
    limited_val = val - torch.floor(val / period + offset) * period
    return limited_val
29
30


31
@array_converter(apply_to=('points', 'angles'))
32
33
34
35
36
37
38
39
def rotation_3d_in_axis(
    points: Union[np.ndarray, Tensor],
    angles: Union[np.ndarray, Tensor, float],
    axis: int = 0,
    return_mat: bool = False,
    clockwise: bool = False
) -> Union[Tuple[np.ndarray, np.ndarray], Tuple[Tensor, Tensor], np.ndarray,
           Tensor]:
zhangwenwei's avatar
zhangwenwei committed
40
    """Rotate points by angles according to axis.
zhangwenwei's avatar
zhangwenwei committed
41
42

    Args:
43
44
45
46
47
48
49
        points (np.ndarray or Tensor): Points with shape (N, M, 3).
        angles (np.ndarray or Tensor or float): Vector of angles with shape
            (N, ).
        axis (int): The axis to be rotated. Defaults to 0.
        return_mat (bool): Whether or not to return the rotation matrix
            (transposed). Defaults to False.
        clockwise (bool): Whether the rotation is clockwise. Defaults to False.
zhangwenwei's avatar
zhangwenwei committed
50
51

    Raises:
52
53
        ValueError: When the axis is not in range [-3, -2, -1, 0, 1, 2], it
            will raise ValueError.
zhangwenwei's avatar
zhangwenwei committed
54
55

    Returns:
56
57
58
        Tuple[np.ndarray, np.ndarray] or Tuple[Tensor, Tensor] or np.ndarray or
        Tensor: Rotated points with shape (N, M, 3) and rotation matrix with
        shape (N, 3, 3).
zhangwenwei's avatar
zhangwenwei committed
59
    """
60
61
62
63
64
65
66
    batch_free = len(points.shape) == 2
    if batch_free:
        points = points[None]

    if isinstance(angles, float) or len(angles.shape) == 0:
        angles = torch.full(points.shape[:1], angles)

67
68
    assert len(points.shape) == 3 and len(angles.shape) == 1 and \
        points.shape[0] == angles.shape[0], 'Incorrect shape of points ' \
69
70
71
72
73
        f'angles: {points.shape}, {angles.shape}'

    assert points.shape[-1] in [2, 3], \
        f'Points size should be 2 or 3 instead of {points.shape[-1]}'

74
75
76
77
    rot_sin = torch.sin(angles)
    rot_cos = torch.cos(angles)
    ones = torch.ones_like(rot_cos)
    zeros = torch.zeros_like(rot_cos)
78
79
80
81

    if points.shape[-1] == 3:
        if axis == 1 or axis == -2:
            rot_mat_T = torch.stack([
Yezhen Cong's avatar
Yezhen Cong committed
82
                torch.stack([rot_cos, zeros, -rot_sin]),
83
                torch.stack([zeros, ones, zeros]),
Yezhen Cong's avatar
Yezhen Cong committed
84
                torch.stack([rot_sin, zeros, rot_cos])
85
86
87
88
89
90
91
92
93
94
95
96
97
98
            ])
        elif axis == 2 or axis == -1:
            rot_mat_T = torch.stack([
                torch.stack([rot_cos, rot_sin, zeros]),
                torch.stack([-rot_sin, rot_cos, zeros]),
                torch.stack([zeros, zeros, ones])
            ])
        elif axis == 0 or axis == -3:
            rot_mat_T = torch.stack([
                torch.stack([ones, zeros, zeros]),
                torch.stack([zeros, rot_cos, rot_sin]),
                torch.stack([zeros, -rot_sin, rot_cos])
            ])
        else:
99
100
            raise ValueError(
                f'axis should in range [-3, -2, -1, 0, 1, 2], got {axis}')
101
    else:
102
        rot_mat_T = torch.stack([
103
104
            torch.stack([rot_cos, rot_sin]),
            torch.stack([-rot_sin, rot_cos])
105
        ])
106
107
108
109
110
111

    if clockwise:
        rot_mat_T = rot_mat_T.transpose(0, 1)

    if points.shape[0] == 0:
        points_new = points
112
    else:
113
114
115
116
        points_new = torch.einsum('aij,jka->aik', points, rot_mat_T)

    if batch_free:
        points_new = points_new.squeeze(0)
117

118
119
120
121
122
123
124
    if return_mat:
        rot_mat_T = torch.einsum('jka->ajk', rot_mat_T)
        if batch_free:
            rot_mat_T = rot_mat_T.squeeze(0)
        return points_new, rot_mat_T
    else:
        return points_new
125
126


127
@array_converter(apply_to=('boxes_xywhr', ))
128
129
def xywhr2xyxyr(
        boxes_xywhr: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
zhangwenwei's avatar
zhangwenwei committed
130
131
132
    """Convert a rotated boxes in XYWHR format to XYXYR format.

    Args:
133
        boxes_xywhr (Tensor or np.ndarray): Rotated boxes in XYWHR format.
zhangwenwei's avatar
zhangwenwei committed
134
135

    Returns:
136
        Tensor or np.ndarray: Converted boxes in XYXYR format.
zhangwenwei's avatar
zhangwenwei committed
137
    """
138
    boxes = torch.zeros_like(boxes_xywhr)
139
140
141
142
143
144
145
146
    half_w = boxes_xywhr[..., 2] / 2
    half_h = boxes_xywhr[..., 3] / 2

    boxes[..., 0] = boxes_xywhr[..., 0] - half_w
    boxes[..., 1] = boxes_xywhr[..., 1] - half_h
    boxes[..., 2] = boxes_xywhr[..., 0] + half_w
    boxes[..., 3] = boxes_xywhr[..., 1] + half_h
    boxes[..., 4] = boxes_xywhr[..., 4]
147
    return boxes
zhangwenwei's avatar
zhangwenwei committed
148
149


150
def get_box_type(box_type: str) -> Tuple[type, int]:
wuyuefeng's avatar
Demo  
wuyuefeng committed
151
152
153
    """Get the type and mode of box structure.

    Args:
154
155
        box_type (str): The type of box structure. The valid value are "LiDAR",
            "Camera" and "Depth".
wuyuefeng's avatar
Demo  
wuyuefeng committed
156

157
    Raises:
158
159
        ValueError: A ValueError is raised when ``box_type`` does not belong to
            the three valid types.
160

wuyuefeng's avatar
Demo  
wuyuefeng committed
161
    Returns:
wangtai's avatar
wangtai committed
162
        tuple: Box type and box mode.
wuyuefeng's avatar
Demo  
wuyuefeng committed
163
    """
zhangwenwei's avatar
zhangwenwei committed
164
165
    from .box_3d_mode import (Box3DMode, CameraInstance3DBoxes,
                              DepthInstance3DBoxes, LiDARInstance3DBoxes)
wuyuefeng's avatar
Demo  
wuyuefeng committed
166
167
168
169
170
171
172
173
174
175
176
    box_type_lower = box_type.lower()
    if box_type_lower == 'lidar':
        box_type_3d = LiDARInstance3DBoxes
        box_mode_3d = Box3DMode.LIDAR
    elif box_type_lower == 'camera':
        box_type_3d = CameraInstance3DBoxes
        box_mode_3d = Box3DMode.CAM
    elif box_type_lower == 'depth':
        box_type_3d = DepthInstance3DBoxes
        box_mode_3d = Box3DMode.DEPTH
    else:
177
178
        raise ValueError('Only "box_type" of "camera", "lidar", "depth" are '
                         f'supported, got {box_type}')
wuyuefeng's avatar
Demo  
wuyuefeng committed
179
180
181
182

    return box_type_3d, box_mode_3d


183
@array_converter(apply_to=('points_3d', 'proj_mat'))
184
185
186
def points_cam2img(points_3d: Union[Tensor, np.ndarray],
                   proj_mat: Union[Tensor, np.ndarray],
                   with_depth: bool = False) -> Union[Tensor, np.ndarray]:
187
    """Project points in camera coordinates to image coordinates.
zhangwenwei's avatar
zhangwenwei committed
188
189

    Args:
190
191
192
193
        points_3d (Tensor or np.ndarray): Points in shape (N, 3).
        proj_mat (Tensor or np.ndarray): Transformation matrix between
            coordinates.
        with_depth (bool): Whether to keep depth in the output.
194
            Defaults to False.
zhangwenwei's avatar
zhangwenwei committed
195
196

    Returns:
197
198
        Tensor or np.ndarray: Points in image coordinates with shape [N, 2] if
        ``with_depth=False``, else [N, 3].
zhangwenwei's avatar
zhangwenwei committed
199
    """
200
201
    points_shape = list(points_3d.shape)
    points_shape[-1] = 1
202

203
204
205
    assert len(proj_mat.shape) == 2, \
        'The dimension of the projection matrix should be 2 ' \
        f'instead of {len(proj_mat.shape)}.'
206
    d1, d2 = proj_mat.shape[:2]
207
208
209
    assert (d1 == 3 and d2 == 3) or (d1 == 3 and d2 == 4) or \
        (d1 == 4 and d2 == 4), 'The shape of the projection matrix ' \
        f'({d1}*{d2}) is not supported.'
210
211
212
213
214
215
    if d1 == 3:
        proj_mat_expanded = torch.eye(
            4, device=proj_mat.device, dtype=proj_mat.dtype)
        proj_mat_expanded[:d1, :d2] = proj_mat
        proj_mat = proj_mat_expanded

216
217
218
219
    # previous implementation use new_zeros, new_one yields better results
    points_4 = torch.cat([points_3d, points_3d.new_ones(points_shape)], dim=-1)

    point_2d = points_4 @ proj_mat.T
zhangwenwei's avatar
zhangwenwei committed
220
    point_2d_res = point_2d[..., :2] / point_2d[..., 2:3]
221
222

    if with_depth:
223
224
        point_2d_res = torch.cat([point_2d_res, point_2d[..., 2:3]], dim=-1)

zhangwenwei's avatar
zhangwenwei committed
225
    return point_2d_res
226
227


228
@array_converter(apply_to=('points', 'cam2img'))
229
230
231
def points_img2cam(
        points: Union[Tensor, np.ndarray],
        cam2img: Union[Tensor, np.ndarray]) -> Union[Tensor, np.ndarray]:
232
233
234
    """Project points in image coordinates to camera coordinates.

    Args:
235
236
237
238
        points (Tensor or np.ndarray): 2.5D points in 2D images with shape
            [N, 3], 3 corresponds with x, y in the image and depth.
        cam2img (Tensor or np.ndarray): Camera intrinsic matrix. The shape can
            be [3, 3], [3, 4] or [4, 4].
239
240

    Returns:
241
242
        Tensor or np.ndarray: Points in 3D space with shape [N, 3], 3
        corresponds with x, y, z in 3D space.
243
244
245
246
247
248
249
250
251
252
253
254
255
    """
    assert cam2img.shape[0] <= 4
    assert cam2img.shape[1] <= 4
    assert points.shape[1] == 3

    xys = points[:, :2]
    depths = points[:, 2].view(-1, 1)
    unnormed_xys = torch.cat([xys * depths, depths], dim=1)

    pad_cam2img = torch.eye(4, dtype=xys.dtype, device=xys.device)
    pad_cam2img[:cam2img.shape[0], :cam2img.shape[1]] = cam2img
    inv_pad_cam2img = torch.inverse(pad_cam2img).transpose(0, 1)

256
    # Do operation in homogeneous coordinates.
257
258
259
260
261
262
263
    num_points = unnormed_xys.shape[0]
    homo_xys = torch.cat([unnormed_xys, xys.new_ones((num_points, 1))], dim=1)
    points3D = torch.mm(homo_xys, inv_pad_cam2img)[:, :3]

    return points3D


264
265
266
267
268
269
270
271
272
273
274
def mono_cam_box2vis(cam_box):
    """This is a post-processing function on the bboxes from Mono-3D task. If
    we want to perform projection visualization, we need to:

        1. rotate the box along x-axis for np.pi / 2 (roll)
        2. change orientation from local yaw to global yaw
        3. convert yaw by (np.pi / 2 - yaw)

    After applying this function, we can project and draw it on 2D images.

    Args:
275
        cam_box (:obj:`CameraInstance3DBoxes`): 3D bbox in camera coordinate
276
277
            system before conversion. Could be gt bbox loaded from dataset or
            network prediction output.
278
279
280
281

    Returns:
        :obj:`CameraInstance3DBoxes`: Box after conversion.
    """
282
283
284
    warning.warn('DeprecationWarning: The hack of yaw and dimension in the '
                 'monocular 3D detection on nuScenes has been removed. The '
                 'function mono_cam_box2vis will be deprecated.')
285
    from .cam_box3d import CameraInstance3DBoxes
286
287
288
289
290
291
292
293
294
295
296
297
298
    assert isinstance(cam_box, CameraInstance3DBoxes), \
        'input bbox should be CameraInstance3DBoxes!'

    loc = cam_box.gravity_center
    dim = cam_box.dims
    yaw = cam_box.yaw
    feats = cam_box.tensor[:, 7:]
    # rotate along x-axis for np.pi / 2
    # see also here: https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/nuscenes_mono_dataset.py#L557  # noqa
    dim[:, [1, 2]] = dim[:, [2, 1]]
    # change local yaw to global yaw for visualization
    # refer to https://github.com/open-mmlab/mmdetection3d/blob/master/mmdet3d/datasets/nuscenes_mono_dataset.py#L164-L166  # noqa
    yaw += torch.atan2(loc[:, 0], loc[:, 2])
Ziyi Wu's avatar
Ziyi Wu committed
299
300
301
302
    # convert yaw by (-yaw - np.pi / 2)
    # this is because mono 3D box class such as `NuScenesBox` has different
    # definition of rotation with our `CameraInstance3DBoxes`
    yaw = -yaw - np.pi / 2
303
304
305
306
307
    cam_box = torch.cat([loc, dim, yaw[:, None], feats], dim=1)
    cam_box = CameraInstance3DBoxes(
        cam_box, box_dim=cam_box.shape[-1], origin=(0.5, 0.5, 0.5))

    return cam_box
308
309


310
def get_proj_mat_by_coord_type(img_meta: dict, coord_type: str) -> Tensor:
311
312
313
    """Obtain image features using points.

    Args:
314
315
316
        img_meta (dict): Meta information.
        coord_type (str): 'DEPTH' or 'CAMERA' or 'LIDAR'. Can be case-
            insensitive.
317
318

    Returns:
319
        Tensor: Transformation matrix.
320
321
322
323
324
    """
    coord_type = coord_type.upper()
    mapping = {'LIDAR': 'lidar2img', 'DEPTH': 'depth2img', 'CAMERA': 'cam2img'}
    assert coord_type in mapping.keys()
    return img_meta[mapping[coord_type]]
325
326


327
def yaw2local(yaw: Tensor, loc: Tensor) -> Tensor:
328
329
330
331
    """Transform global yaw to local yaw (alpha in kitti) in camera
    coordinates, ranges from -pi to pi.

    Args:
332
333
        yaw (Tensor): A vector with local yaw of each box in shape (N, ).
        loc (Tensor): Gravity center of each box in shape (N, 3).
334
335

    Returns:
336
        Tensor: Local yaw (alpha in kitti).
337
338
    """
    local_yaw = yaw - torch.atan2(loc[:, 0], loc[:, 2])
ChaimZhu's avatar
ChaimZhu committed
339
340
341
342
343
344
345
    larger_idx = (local_yaw > np.pi).nonzero(as_tuple=False)
    small_idx = (local_yaw < -np.pi).nonzero(as_tuple=False)
    if len(larger_idx) != 0:
        local_yaw[larger_idx] -= 2 * np.pi
    if len(small_idx) != 0:
        local_yaw[small_idx] += 2 * np.pi

346
    return local_yaw
347
348


349
def get_lidar2img(cam2img: Tensor, lidar2cam: Tensor) -> Tensor:
350
351
352
353
354
355
356
    """Get the projection matrix of lidar2img.

    Args:
        cam2img (torch.Tensor): A 3x3 or 4x4 projection matrix.
        lidar2cam (torch.Tensor): A 3x3 or 4x4 projection matrix.

    Returns:
357
        Tensor: Transformation matrix with shape 4x4.
358
359
360
361
362
363
364
365
366
367
368
    """
    if cam2img.shape == (3, 3):
        temp = cam2img.new_zeros(4, 4)
        temp[:3, :3] = cam2img
        cam2img = temp

    if lidar2cam.shape == (3, 3):
        temp = lidar2cam.new_zeros(4, 4)
        temp[:3, :3] = lidar2cam
        lidar2cam = temp
    return torch.matmul(cam2img, lidar2cam)