box_3d_mode.py 9.82 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
2
from enum import IntEnum, unique
3
from typing import Optional, Sequence, Union
4

5
6
import numpy as np
import torch
7
from torch import Tensor
8

zhangwenwei's avatar
zhangwenwei committed
9
10
from .base_box3d import BaseInstance3DBoxes
from .cam_box3d import CameraInstance3DBoxes
wuyuefeng's avatar
wuyuefeng committed
11
from .depth_box3d import DepthInstance3DBoxes
zhangwenwei's avatar
zhangwenwei committed
12
from .lidar_box3d import LiDARInstance3DBoxes
13
from .utils import limit_period
zhangwenwei's avatar
zhangwenwei committed
14

15
16
17

@unique
class Box3DMode(IntEnum):
18
    """Enum of different ways to represent a box.
zhangwenwei's avatar
zhangwenwei committed
19

zhangwenwei's avatar
zhangwenwei committed
20
    Coordinates in LiDAR:
21

zhangwenwei's avatar
zhangwenwei committed
22
    .. code-block:: none
zhangwenwei's avatar
zhangwenwei committed
23

zhangwenwei's avatar
zhangwenwei committed
24
25
                    up z
                       ^   x front
zhangwenwei's avatar
zhangwenwei committed
26
27
28
                       |  /
                       | /
        left y <------ 0
zhangwenwei's avatar
zhangwenwei committed
29

wuyuefeng's avatar
wuyuefeng committed
30
    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
zhangwenwei's avatar
zhangwenwei committed
31
    and the yaw is around the z axis, thus the rotation axis=2.
zhangwenwei's avatar
zhangwenwei committed
32

33
    Coordinates in Camera:
34

zhangwenwei's avatar
zhangwenwei committed
35
    .. code-block:: none
zhangwenwei's avatar
zhangwenwei committed
36

zhangwenwei's avatar
zhangwenwei committed
37
38
39
40
41
42
43
44
                z front
               /
              /
             0 ------> x right
             |
             |
             v
        down y
zhangwenwei's avatar
zhangwenwei committed
45

Xiangxu-0103's avatar
Xiangxu-0103 committed
46
    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
zhangwenwei's avatar
zhangwenwei committed
47
    and the yaw is around the y axis, thus the rotation axis=1.
zhangwenwei's avatar
zhangwenwei committed
48

49
    Coordinates in Depth:
50

zhangwenwei's avatar
zhangwenwei committed
51
    .. code-block:: none
zhangwenwei's avatar
zhangwenwei committed
52

zhangwenwei's avatar
zhangwenwei committed
53
54
55
56
57
        up z
           ^   y front
           |  /
           | /
           0 ------> x right
zhangwenwei's avatar
zhangwenwei committed
58

wuyuefeng's avatar
wuyuefeng committed
59
    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
zhangwenwei's avatar
zhangwenwei committed
60
    and the yaw is around the z axis, thus the rotation axis=2.
61
62
63
64
65
66
67
    """

    LIDAR = 0
    CAM = 1
    DEPTH = 2

    @staticmethod
68
69
70
71
72
73
74
75
76
    def convert(
        box: Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes],
        src: 'Box3DMode',
        dst: 'Box3DMode',
        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
        with_yaw: bool = True,
        correct_yaw: bool = False
    ) -> Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes]:
        """Convert boxes from ``src`` mode to ``dst`` mode.
zhangwenwei's avatar
zhangwenwei committed
77

78
        Args:
79
80
81
82
83
84
            box (Sequence[float] or np.ndarray or Tensor or
                :obj:`BaseInstance3DBoxes`): Can be a k-tuple, k-list or an Nxk
                array/tensor.
            src (:obj:`Box3DMode`): The source box mode.
            dst (:obj:`Box3DMode`): The target box mode.
            rt_mat (np.ndarray or Tensor, optional): The rotation and
85
                translation matrix between different coordinates.
86
87
88
89
90
                Defaults to None. The conversion from ``src`` coordinates to
                ``dst`` coordinates usually comes along the change of sensors,
                e.g., from camera to LiDAR. This requires a transformation
                matrix.
            with_yaw (bool): If ``box`` is an instance of
91
92
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
93
            correct_yaw (bool): If the yaw is rotated by rt_mat.
94
                Defaults to False.
95
96

        Returns:
97
98
            Sequence[float] or np.ndarray or Tensor or
            :obj:`BaseInstance3DBoxes`: The converted box of the same type.
99
        """
zhangwenwei's avatar
zhangwenwei committed
100
        if src == dst:
101
102
103
            return box

        is_numpy = isinstance(box, np.ndarray)
zhangwenwei's avatar
zhangwenwei committed
104
        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
105
106
107
        single_box = isinstance(box, (list, tuple))
        if single_box:
            assert len(box) >= 7, (
108
                'Box3DMode.convert takes either a k-tuple/list or '
109
110
111
112
113
114
                'an Nxk array/tensor, where k >= 7')
            arr = torch.tensor(box)[None, :]
        else:
            # avoid modifying the input box
            if is_numpy:
                arr = torch.from_numpy(np.asarray(box)).clone()
zhangwenwei's avatar
zhangwenwei committed
115
116
            elif is_Instance3DBoxes:
                arr = box.tensor.clone()
117
118
119
            else:
                arr = box.clone()

120
121
122
        if is_Instance3DBoxes:
            with_yaw = box.with_yaw

zhangwenwei's avatar
zhangwenwei committed
123
124
        # convert box from `src` mode to `dst` mode.
        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
125
126
        if with_yaw:
            yaw = arr[..., 6:7]
zhangwenwei's avatar
zhangwenwei committed
127
128
129
        if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
130
131
            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
            if with_yaw:
132
133
134
135
136
137
138
139
140
141
                if correct_yaw:
                    yaw_vector = torch.cat([
                        torch.cos(yaw),
                        torch.sin(yaw),
                        torch.zeros_like(yaw)
                    ],
                                           dim=1)
                else:
                    yaw = -yaw - np.pi / 2
                    yaw = limit_period(yaw, period=np.pi * 2)
zhangwenwei's avatar
zhangwenwei committed
142
143
144
        elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
145
146
            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
            if with_yaw:
147
148
149
150
151
152
153
154
155
156
                if correct_yaw:
                    yaw_vector = torch.cat([
                        torch.cos(-yaw),
                        torch.zeros_like(yaw),
                        torch.sin(-yaw)
                    ],
                                           dim=1)
                else:
                    yaw = -yaw - np.pi / 2
                    yaw = limit_period(yaw, period=np.pi * 2)
zhangwenwei's avatar
zhangwenwei committed
157
158
        elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
            if rt_mat is None:
159
                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
zhangwenwei's avatar
zhangwenwei committed
160
            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
161
            if with_yaw:
162
163
164
165
166
167
168
169
170
                if correct_yaw:
                    yaw_vector = torch.cat([
                        torch.cos(yaw),
                        torch.sin(yaw),
                        torch.zeros_like(yaw)
                    ],
                                           dim=1)
                else:
                    yaw = -yaw
zhangwenwei's avatar
zhangwenwei committed
171
172
        elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
            if rt_mat is None:
173
                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
zhangwenwei's avatar
zhangwenwei committed
174
            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
175
            if with_yaw:
176
177
178
179
180
181
182
183
184
                if correct_yaw:
                    yaw_vector = torch.cat([
                        torch.cos(-yaw),
                        torch.zeros_like(yaw),
                        torch.sin(-yaw)
                    ],
                                           dim=1)
                else:
                    yaw = -yaw
wuyuefeng's avatar
wuyuefeng committed
185
186
187
        elif src == Box3DMode.LIDAR and dst == Box3DMode.DEPTH:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
188
189
            xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
            if with_yaw:
190
191
192
193
194
195
196
197
198
199
                if correct_yaw:
                    yaw_vector = torch.cat([
                        torch.cos(yaw),
                        torch.sin(yaw),
                        torch.zeros_like(yaw)
                    ],
                                           dim=1)
                else:
                    yaw = yaw + np.pi / 2
                    yaw = limit_period(yaw, period=np.pi * 2)
wuyuefeng's avatar
wuyuefeng committed
200
201
202
        elif src == Box3DMode.DEPTH and dst == Box3DMode.LIDAR:
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
203
204
            xyz_size = torch.cat([x_size, y_size, z_size], dim=-1)
            if with_yaw:
205
206
207
208
209
210
211
212
213
214
                if correct_yaw:
                    yaw_vector = torch.cat([
                        torch.cos(yaw),
                        torch.sin(yaw),
                        torch.zeros_like(yaw)
                    ],
                                           dim=1)
                else:
                    yaw = yaw - np.pi / 2
                    yaw = limit_period(yaw, period=np.pi * 2)
zhangwenwei's avatar
zhangwenwei committed
215
216
217
218
219
        else:
            raise NotImplementedError(
                f'Conversion from Box3DMode {src} to {dst} '
                'is not supported yet')

220
        if not isinstance(rt_mat, Tensor):
zhangwenwei's avatar
zhangwenwei committed
221
222
223
            rt_mat = arr.new_tensor(rt_mat)
        if rt_mat.size(1) == 4:
            extended_xyz = torch.cat(
224
                [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1)
zhangwenwei's avatar
zhangwenwei committed
225
226
            xyz = extended_xyz @ rt_mat.t()
        else:
227
            xyz = arr[..., :3] @ rt_mat.t()
zhangwenwei's avatar
zhangwenwei committed
228

229
230
231
232
233
234
235
236
237
238
239
240
        # Note: we only use rotation in rt_mat
        # so don't need to extend yaw_vector
        if with_yaw and correct_yaw:
            rot_yaw_vector = yaw_vector @ rt_mat[:3, :3].t()
            if dst == Box3DMode.CAM:
                yaw = torch.atan2(-rot_yaw_vector[:, [2]], rot_yaw_vector[:,
                                                                          [0]])
            elif dst in [Box3DMode.LIDAR, Box3DMode.DEPTH]:
                yaw = torch.atan2(rot_yaw_vector[:, [1]], rot_yaw_vector[:,
                                                                         [0]])
            yaw = limit_period(yaw, period=np.pi * 2)

241
242
243
244
245
246
        if with_yaw:
            remains = arr[..., 7:]
            arr = torch.cat([xyz[..., :3], xyz_size, yaw, remains], dim=-1)
        else:
            remains = arr[..., 6:]
            arr = torch.cat([xyz[..., :3], xyz_size, remains], dim=-1)
zhangwenwei's avatar
zhangwenwei committed
247
248

        # convert arr to the original type
zhangwenwei's avatar
zhangwenwei committed
249
        original_type = type(box)
250
251
252
253
        if single_box:
            return original_type(arr.flatten().tolist())
        if is_numpy:
            return arr.numpy()
zhangwenwei's avatar
zhangwenwei committed
254
255
256
257
258
        elif is_Instance3DBoxes:
            if dst == Box3DMode.CAM:
                target_type = CameraInstance3DBoxes
            elif dst == Box3DMode.LIDAR:
                target_type = LiDARInstance3DBoxes
wuyuefeng's avatar
wuyuefeng committed
259
260
            elif dst == Box3DMode.DEPTH:
                target_type = DepthInstance3DBoxes
zhangwenwei's avatar
zhangwenwei committed
261
262
            else:
                raise NotImplementedError(
263
264
                    f'Conversion to {dst} through {original_type} '
                    'is not supported yet')
265
            return target_type(arr, box_dim=arr.size(-1), with_yaw=with_yaw)
266
267
        else:
            return arr