cam_box3d.py 15.3 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
2
3
from typing import Optional, Sequence, Tuple, Union

zhangwenwei's avatar
zhangwenwei committed
4
5
import numpy as np
import torch
6
from torch import Tensor
zhangwenwei's avatar
zhangwenwei committed
7

zhangshilong's avatar
zhangshilong committed
8
from mmdet3d.structures.points import BasePoints
zhangwenwei's avatar
zhangwenwei committed
9
from .base_box3d import BaseInstance3DBoxes
10
from .utils import rotation_3d_in_axis, yaw2local
zhangwenwei's avatar
zhangwenwei committed
11
12


zhangwenwei's avatar
zhangwenwei committed
13
class CameraInstance3DBoxes(BaseInstance3DBoxes):
liyinhao's avatar
liyinhao committed
14
    """3D boxes of instances in CAM coordinates.
zhangwenwei's avatar
zhangwenwei committed
15

16
    Coordinates in Camera:
17

zhangwenwei's avatar
zhangwenwei committed
18
    .. code-block:: none
zhangwenwei's avatar
zhangwenwei committed
19

20
                z front (yaw=-0.5*pi)
zhangwenwei's avatar
zhangwenwei committed
21
22
               /
              /
wuyuefeng's avatar
wuyuefeng committed
23
             0 ------> x right (yaw=0)
zhangwenwei's avatar
zhangwenwei committed
24
25
26
27
             |
             |
             v
        down y
zhangwenwei's avatar
zhangwenwei committed
28

wuyuefeng's avatar
wuyuefeng committed
29
    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
30
31
32
33
34
35
36
37
38
39
40
41
42
43
    and the yaw is around the y axis, thus the rotation axis=1. The yaw is 0 at
    the positive direction of x axis, and decreases from the positive direction
    of x to the positive direction of z.

    Args:
        tensor (Tensor or np.ndarray or Sequence[Sequence[float]]): The boxes
            data with shape (N, box_dim).
        box_dim (int): Number of the dimension of a box. Each row is
            (x, y, z, x_size, y_size, z_size, yaw). Defaults to 7.
        with_yaw (bool): Whether the box is with yaw rotation. If False, the
            value of yaw will be set to 0 as minmax boxes. Defaults to True.
        origin (Tuple[float]): Relative position of the box origin.
            Defaults to (0.5, 1.0, 0.5). This will guide the box be converted
            to (0.5, 1.0, 0.5) mode.
zhangwenwei's avatar
zhangwenwei committed
44
45

    Attributes:
46
47
48
49
50
        tensor (Tensor): Float matrix with shape (N, box_dim).
        box_dim (int): Integer indicating the dimension of a box. Each row is
            (x, y, z, x_size, y_size, z_size, yaw, ...).
        with_yaw (bool): If True, the value of yaw will be set to 0 as minmax
            boxes.
zhangwenwei's avatar
zhangwenwei committed
51
    """
52
    YAW_AXIS = 1
zhangwenwei's avatar
zhangwenwei committed
53

54
55
56
57
58
59
60
61
    def __init__(
        self,
        tensor: Union[Tensor, np.ndarray, Sequence[Sequence[float]]],
        box_dim: int = 7,
        with_yaw: bool = True,
        origin: Tuple[float, float, float] = (0.5, 1.0, 0.5)
    ) -> None:
        if isinstance(tensor, Tensor):
62
63
64
65
66
            device = tensor.device
        else:
            device = torch.device('cpu')
        tensor = torch.as_tensor(tensor, dtype=torch.float32, device=device)
        if tensor.numel() == 0:
67
68
69
70
71
72
73
            # Use reshape, so we don't end up creating a new tensor that does
            # not depend on the inputs (and consequently confuses jit)
            tensor = tensor.reshape((-1, box_dim))
        assert tensor.dim() == 2 and tensor.size(-1) == box_dim, \
            ('The box dimension must be 2 and the length of the last '
             f'dimension must be {box_dim}, but got boxes with shape '
             f'{tensor.shape}.')
74
75

        if tensor.shape[-1] == 6:
76
77
            # If the dimension of boxes is 6, we expand box_dim by padding 0 as
            # a fake yaw and set with_yaw to False
78
79
80
81
82
83
84
85
            assert box_dim == 6
            fake_rot = tensor.new_zeros(tensor.shape[0], 1)
            tensor = torch.cat((tensor, fake_rot), dim=-1)
            self.box_dim = box_dim + 1
            self.with_yaw = False
        else:
            self.box_dim = box_dim
            self.with_yaw = with_yaw
86
        self.tensor = tensor.clone()
87
88
89
90
91
92

        if origin != (0.5, 1.0, 0.5):
            dst = self.tensor.new_tensor((0.5, 1.0, 0.5))
            src = self.tensor.new_tensor(origin)
            self.tensor[:, :3] += self.tensor[:, 3:6] * (dst - src)

93
    @property
94
95
    def height(self) -> Tensor:
        """Tensor: A vector with height of each box in shape (N, )."""
96
97
        return self.tensor[:, 4]

98
    @property
99
100
    def top_height(self) -> Tensor:
        """Tensor: A vector with top height of each box in shape (N, )."""
101
102
103
104
        # the positive direction is down rather than up
        return self.bottom_height - self.height

    @property
105
106
    def bottom_height(self) -> Tensor:
        """Tensor: A vector with bottom height of each box in shape (N, )."""
107
108
        return self.tensor[:, 1]

109
    @property
110
111
112
113
114
    def local_yaw(self) -> Tensor:
        """Tensor: A vector with local yaw of each box in shape (N, ).
        local_yaw equals to alpha in kitti, which is commonly used in monocular
        3D object detection task, so only :obj:`CameraInstance3DBoxes` has the
        property."""
115
116
117
118
119
120
        yaw = self.yaw
        loc = self.gravity_center
        local_yaw = yaw2local(yaw, loc)

        return local_yaw

zhangwenwei's avatar
zhangwenwei committed
121
    @property
122
123
    def gravity_center(self) -> Tensor:
        """Tensor: A tensor with center of each box in shape (N, 3)."""
zhangwenwei's avatar
zhangwenwei committed
124
125
126
        bottom_center = self.bottom_center
        gravity_center = torch.zeros_like(bottom_center)
        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
zhangwenwei's avatar
zhangwenwei committed
127
        gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5
zhangwenwei's avatar
zhangwenwei committed
128
129
130
        return gravity_center

    @property
131
132
133
    def corners(self) -> Tensor:
        """Convert boxes to corners in clockwise order, in the form of (x0y0z0,
        x0y0z1, x0y1z1, x0y1z0, x1y0z0, x1y0z1, x1y1z1, x1y1z0).
zhangwenwei's avatar
zhangwenwei committed
134
135

        .. code-block:: none
zhangwenwei's avatar
zhangwenwei committed
136

zhangwenwei's avatar
zhangwenwei committed
137
138
139
140
141
142
                         front z
                              /
                             /
               (x0, y0, z1) + -----------  + (x1, y0, z1)
                           /|            / |
                          / |           /  |
wuyuefeng's avatar
wuyuefeng committed
143
            (x0, y0, z0) + ----------- +   + (x1, y1, z1)
zhangwenwei's avatar
zhangwenwei committed
144
                         |  /      .   |  /
145
                         | / origin    | /
146
            (x0, y1, z0) + ----------- + -------> right x
zhangwenwei's avatar
zhangwenwei committed
147
148
149
150
                         |             (x1, y1, z0)
                         |
                         v
                    down y
151
152
153

        Returns:
            Tensor: A tensor with 8 corners of each box in shape (N, 8, 3).
zhangwenwei's avatar
zhangwenwei committed
154
        """
155
156
157
        if self.tensor.numel() == 0:
            return torch.empty([0, 8, 3], device=self.tensor.device)

zhangwenwei's avatar
zhangwenwei committed
158
159
160
161
162
163
        dims = self.dims
        corners_norm = torch.from_numpy(
            np.stack(np.unravel_index(np.arange(8), [2] * 3), axis=1)).to(
                device=dims.device, dtype=dims.dtype)

        corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]]
164
        # use relative origin (0.5, 1, 0.5)
zhangwenwei's avatar
zhangwenwei committed
165
166
167
        corners_norm = corners_norm - dims.new_tensor([0.5, 1, 0.5])
        corners = dims.view([-1, 1, 3]) * corners_norm.reshape([1, 8, 3])

168
        corners = rotation_3d_in_axis(
Yezhen Cong's avatar
Yezhen Cong committed
169
            corners, self.tensor[:, 6], axis=self.YAW_AXIS)
zhangwenwei's avatar
zhangwenwei committed
170
171
172
        corners += self.tensor[:, :3].view(-1, 1, 3)
        return corners

173
    @property
174
175
176
    def bev(self) -> Tensor:
        """Tensor: 2D BEV box of each box with rotation in XYWHR format, in
        shape (N, 5)."""
177
178
179
180
181
182
        bev = self.tensor[:, [0, 2, 3, 5, 6]].clone()
        # positive direction of the gravity axis
        # in cam coord system points to the earth
        # so the bev yaw angle needs to be reversed
        bev[:, -1] = -bev[:, -1]
        return bev
183

184
185
186
187
188
189
    def rotate(
        self,
        angle: Union[Tensor, np.ndarray, float],
        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
    ) -> Union[Tuple[Tensor, Tensor], Tuple[np.ndarray, np.ndarray], Tuple[
            BasePoints, Tensor], None]:
190
191
        """Rotate boxes with points (optional) with the given angle or rotation
        matrix.
zhangwenwei's avatar
zhangwenwei committed
192
193

        Args:
194
195
196
            angle (Tensor or np.ndarray or float): Rotation angle or rotation
                matrix.
            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
197
                Points to rotate. Defaults to None.
wuyuefeng's avatar
wuyuefeng committed
198
199

        Returns:
200
201
202
            tuple or None: When ``points`` is None, the function returns None,
            otherwise it returns the rotated points and the rotation matrix
            ``rot_mat_T``.
zhangwenwei's avatar
zhangwenwei committed
203
        """
204
        if not isinstance(angle, Tensor):
zhangwenwei's avatar
zhangwenwei committed
205
            angle = self.tensor.new_tensor(angle)
206

207
208
209
210
        assert angle.shape == torch.Size([3, 3]) or angle.numel() == 1, \
            f'invalid rotation angle shape {angle.shape}'

        if angle.numel() == 1:
211
212
213
214
            self.tensor[:, 0:3], rot_mat_T = rotation_3d_in_axis(
                self.tensor[:, 0:3],
                angle,
                axis=self.YAW_AXIS,
Yezhen Cong's avatar
Yezhen Cong committed
215
                return_mat=True)
216
217
218
219
220
        else:
            rot_mat_T = angle
            rot_sin = rot_mat_T[2, 0]
            rot_cos = rot_mat_T[0, 0]
            angle = np.arctan2(rot_sin, rot_cos)
221
            self.tensor[:, 0:3] = self.tensor[:, 0:3] @ rot_mat_T
zhangwenwei's avatar
zhangwenwei committed
222
223
224

        self.tensor[:, 6] += angle

wuyuefeng's avatar
wuyuefeng committed
225
        if points is not None:
226
            if isinstance(points, Tensor):
wuyuefeng's avatar
wuyuefeng committed
227
228
                points[:, :3] = points[:, :3] @ rot_mat_T
            elif isinstance(points, np.ndarray):
229
                rot_mat_T = rot_mat_T.cpu().numpy()
wuyuefeng's avatar
wuyuefeng committed
230
                points[:, :3] = np.dot(points[:, :3], rot_mat_T)
231
            elif isinstance(points, BasePoints):
232
                points.rotate(rot_mat_T)
wuyuefeng's avatar
wuyuefeng committed
233
234
235
236
            else:
                raise ValueError
            return points, rot_mat_T

237
238
239
240
241
    def flip(
        self,
        bev_direction: str = 'horizontal',
        points: Optional[Union[Tensor, np.ndarray, BasePoints]] = None
    ) -> Union[Tensor, np.ndarray, BasePoints, None]:
zhangwenwei's avatar
zhangwenwei committed
242
        """Flip the boxes in BEV along given BEV direction.
zhangwenwei's avatar
zhangwenwei committed
243

wuyuefeng's avatar
wuyuefeng committed
244
245
246
        In CAM coordinates, it flips the x (horizontal) or z (vertical) axis.

        Args:
247
248
249
            bev_direction (str): Direction by which to flip. Can be chosen from
                'horizontal' and 'vertical'. Defaults to 'horizontal'.
            points (Tensor or np.ndarray or :obj:`BasePoints`, optional):
250
                Points to flip. Defaults to None.
wuyuefeng's avatar
wuyuefeng committed
251
252

        Returns:
253
254
255
            Tensor or np.ndarray or :obj:`BasePoints` or None: When ``points``
            is None, the function returns None, otherwise it returns the
            flipped points.
zhangwenwei's avatar
zhangwenwei committed
256
        """
wuyuefeng's avatar
wuyuefeng committed
257
258
259
260
261
262
263
264
265
        assert bev_direction in ('horizontal', 'vertical')
        if bev_direction == 'horizontal':
            self.tensor[:, 0::7] = -self.tensor[:, 0::7]
            if self.with_yaw:
                self.tensor[:, 6] = -self.tensor[:, 6] + np.pi
        elif bev_direction == 'vertical':
            self.tensor[:, 2::7] = -self.tensor[:, 2::7]
            if self.with_yaw:
                self.tensor[:, 6] = -self.tensor[:, 6]
zhangwenwei's avatar
zhangwenwei committed
266

wuyuefeng's avatar
wuyuefeng committed
267
        if points is not None:
268
269
            assert isinstance(points, (Tensor, np.ndarray, BasePoints))
            if isinstance(points, (Tensor, np.ndarray)):
270
271
272
273
274
275
                if bev_direction == 'horizontal':
                    points[:, 0] = -points[:, 0]
                elif bev_direction == 'vertical':
                    points[:, 2] = -points[:, 2]
            elif isinstance(points, BasePoints):
                points.flip(bev_direction)
wuyuefeng's avatar
wuyuefeng committed
276
277
            return points

278
    @classmethod
279
280
    def height_overlaps(cls, boxes1: 'CameraInstance3DBoxes',
                        boxes2: 'CameraInstance3DBoxes') -> Tensor:
zhangwenwei's avatar
zhangwenwei committed
281
        """Calculate height overlaps of two boxes.
282

283
284
285
        Note:
            This function calculates the height overlaps between ``boxes1`` and
            ``boxes2``, ``boxes1`` and ``boxes2`` should be in the same type.
286
287

        Args:
Wenwei Zhang's avatar
Wenwei Zhang committed
288
289
            boxes1 (:obj:`CameraInstance3DBoxes`): Boxes 1 contain N boxes.
            boxes2 (:obj:`CameraInstance3DBoxes`): Boxes 2 contain M boxes.
290
291

        Returns:
292
            Tensor: Calculated height overlap of the boxes.
293
        """
Wenwei Zhang's avatar
Wenwei Zhang committed
294
295
        assert isinstance(boxes1, CameraInstance3DBoxes)
        assert isinstance(boxes2, CameraInstance3DBoxes)
296
297
298
299
300
301

        boxes1_top_height = boxes1.top_height.view(-1, 1)
        boxes1_bottom_height = boxes1.bottom_height.view(-1, 1)
        boxes2_top_height = boxes2.top_height.view(1, -1)
        boxes2_bottom_height = boxes2.bottom_height.view(1, -1)

302
303
        # positive direction of the gravity axis
        # in cam coord system points to the earth
304
305
306
307
308
        heighest_of_bottom = torch.min(boxes1_bottom_height,
                                       boxes2_bottom_height)
        lowest_of_top = torch.max(boxes1_top_height, boxes2_top_height)
        overlaps_h = torch.clamp(heighest_of_bottom - lowest_of_top, min=0)
        return overlaps_h
309

310
311
312
313
    def convert_to(self,
                   dst: int,
                   rt_mat: Optional[Union[Tensor, np.ndarray]] = None,
                   correct_yaw: bool = False) -> 'BaseInstance3DBoxes':
Wenwei Zhang's avatar
Wenwei Zhang committed
314
        """Convert self to ``dst`` mode.
315
316

        Args:
317
318
            dst (int): The target Box mode.
            rt_mat (Tensor or np.ndarray, optional): The rotation and
319
                translation matrix between different coordinates.
320
321
322
323
                Defaults to None. The conversion from ``src`` coordinates to
                ``dst`` coordinates usually comes along the change of sensors,
                e.g., from camera to LiDAR. This requires a transformation
                matrix.
324
            correct_yaw (bool): Whether to convert the yaw angle to the target
325
                coordinate. Defaults to False.
326

327
        Returns:
328
329
            :obj:`BaseInstance3DBoxes`: The converted box of the same type in
            the ``dst`` mode.
330
331
        """
        from .box_3d_mode import Box3DMode
332
333

        # TODO: always set correct_yaw=True
334
        return Box3DMode.convert(
335
336
337
338
339
            box=self,
            src=Box3DMode.CAM,
            dst=dst,
            rt_mat=rt_mat,
            correct_yaw=correct_yaw)
340

341
342
343
344
    def points_in_boxes_part(
            self,
            points: Tensor,
            boxes_override: Optional[Tensor] = None) -> Tensor:
345
        """Find the box in which each point is.
346
347

        Args:
348
349
350
351
            points (Tensor): Points in shape (1, M, 3) or (M, 3), 3 dimensions
                are (x, y, z) in LiDAR or depth coordinate.
            boxes_override (Tensor, optional): Boxes to override `self.tensor`.
                Defaults to None.
352
353

        Returns:
354
355
356
            Tensor: The index of the first box that each point is in with shape
            (M, ). Default value is -1 (if the point is not enclosed by any
            box).
357
358
359
360
361
        """
        from .coord_3d_mode import Coord3DMode

        points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,
                                           Coord3DMode.LIDAR)
362
363
364
        if boxes_override is not None:
            boxes_lidar = boxes_override
        else:
365
366
367
368
369
            boxes_lidar = Coord3DMode.convert(
                self.tensor,
                Coord3DMode.CAM,
                Coord3DMode.LIDAR,
                is_point=False)
370

371
        box_idx = super().points_in_boxes_part(points_lidar, boxes_lidar)
372
373
        return box_idx

374
375
376
    def points_in_boxes_all(self,
                            points: Tensor,
                            boxes_override: Optional[Tensor] = None) -> Tensor:
377
        """Find all boxes in which each point is.
378
379

        Args:
380
381
382
383
            points (Tensor): Points in shape (1, M, 3) or (M, 3), 3 dimensions
                are (x, y, z) in LiDAR or depth coordinate.
            boxes_override (Tensor, optional): Boxes to override `self.tensor`.
                Defaults to None.
384
385

        Returns:
386
387
            Tensor: The index of all boxes in which each point is with shape
            (M, T).
388
389
390
391
392
        """
        from .coord_3d_mode import Coord3DMode

        points_lidar = Coord3DMode.convert(points, Coord3DMode.CAM,
                                           Coord3DMode.LIDAR)
393
394
395
        if boxes_override is not None:
            boxes_lidar = boxes_override
        else:
396
397
398
399
400
            boxes_lidar = Coord3DMode.convert(
                self.tensor,
                Coord3DMode.CAM,
                Coord3DMode.LIDAR,
                is_point=False)
401

402
        box_idx = super().points_in_boxes_all(points_lidar, boxes_lidar)
403
        return box_idx