coord_3d_mode.py 10.3 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
2
from enum import IntEnum, unique
3
from typing import Optional, Sequence, Union
4

5
6
import numpy as np
import torch
7
from torch import Tensor
8

zhangshilong's avatar
zhangshilong committed
9
10
from mmdet3d.structures.points import (BasePoints, CameraPoints, DepthPoints,
                                       LiDARPoints)
11
from .base_box3d import BaseInstance3DBoxes
12
from .box_3d_mode import Box3DMode
13
14
15
16


@unique
class Coord3DMode(IntEnum):
17
    """Enum of different ways to represent a box and point cloud.
18
19
20
21
22
23
24
25
26
27
28
29
30
31

    Coordinates in LiDAR:

    .. code-block:: none

                    up z
                       ^   x front
                       |  /
                       | /
        left y <------ 0

    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
    and the yaw is around the z axis, thus the rotation axis=2.

32
    Coordinates in Camera:
33
34
35
36
37
38
39
40
41
42
43
44

    .. code-block:: none

                z front
               /
              /
             0 ------> x right
             |
             |
             v
        down y

Xiangxu-0103's avatar
Xiangxu-0103 committed
45
    The relative coordinate of bottom center in a CAM box is (0.5, 1.0, 0.5),
46
47
    and the yaw is around the y axis, thus the rotation axis=1.

48
    Coordinates in Depth:
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

    .. code-block:: none

        up z
           ^   y front
           |  /
           | /
           0 ------> x right

    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
    and the yaw is around the z axis, thus the rotation axis=2.
    """

    LIDAR = 0
    CAM = 1
    DEPTH = 2

    @staticmethod
67
68
69
70
71
72
73
74
75
    def convert(input: Union[Sequence[float], np.ndarray, Tensor,
                             BaseInstance3DBoxes, BasePoints],
                src: Union[Box3DMode, 'Coord3DMode'],
                dst: Union[Box3DMode, 'Coord3DMode'],
                rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
                with_yaw: bool = True,
                correct_yaw: bool = False,
                is_point: bool = True):
        """Convert boxes or points from ``src`` mode to ``dst`` mode.
76
77

        Args:
78
79
80
81
82
83
            input (Sequence[float] or np.ndarray or Tensor or
                :obj:`BaseInstance3DBoxes` or :obj:`BasePoints`): Can be a
                k-tuple, k-list or an Nxk array/tensor.
            src (:obj:`Box3DMode` or :obj:`Coord3DMode`): The source mode.
            dst (:obj:`Box3DMode` or :obj:`Coord3DMode`): The target mode.
            rt_mat (np.ndarray or Tensor, optional): The rotation and
84
                translation matrix between different coordinates.
85
86
87
88
89
                Defaults to None. The conversion from ``src`` coordinates to
                ``dst`` coordinates usually comes along the change of sensors,
                e.g., from camera to LiDAR. This requires a transformation
                matrix.
            with_yaw (bool): If ``box`` is an instance of
90
91
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
92
93
94
            correct_yaw (bool): If the yaw is rotated by rt_mat.
                Defaults to False.
            is_point (bool): If ``input`` is neither an instance of
95
96
97
98
99
                :obj:`BaseInstance3DBoxes` nor an instance of
                :obj:`BasePoints`, whether or not it is point data.
                Defaults to True.

        Returns:
100
101
102
            Sequence[float] or np.ndarray or Tensor or
            :obj:`BaseInstance3DBoxes` or :obj:`BasePoints`: The converted box
            or points of the same type.
103
        """
104
        if isinstance(input, BaseInstance3DBoxes):
105
            return Coord3DMode.convert_box(
106
107
108
109
110
111
                input,
                src,
                dst,
                rt_mat=rt_mat,
                with_yaw=with_yaw,
                correct_yaw=correct_yaw)
112
113
        elif isinstance(input, BasePoints):
            return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat)
114
        elif isinstance(input, (tuple, list, np.ndarray, Tensor)):
115
116
117
118
119
            if is_point:
                return Coord3DMode.convert_point(
                    input, src, dst, rt_mat=rt_mat)
            else:
                return Coord3DMode.convert_box(
120
121
122
123
124
125
                    input,
                    src,
                    dst,
                    rt_mat=rt_mat,
                    with_yaw=with_yaw,
                    correct_yaw=correct_yaw)
126
127
128
129
        else:
            raise NotImplementedError

    @staticmethod
130
131
132
133
134
135
136
137
138
    def convert_box(
        box: Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes],
        src: Box3DMode,
        dst: Box3DMode,
        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
        with_yaw: bool = True,
        correct_yaw: bool = False
    ) -> Union[Sequence[float], np.ndarray, Tensor, BaseInstance3DBoxes]:
        """Convert boxes from ``src`` mode to ``dst`` mode.
139
140

        Args:
141
142
143
144
145
146
            box (Sequence[float] or np.ndarray or Tensor or
                :obj:`BaseInstance3DBoxes`): Can be a k-tuple, k-list or an Nxk
                array/tensor.
            src (:obj:`Box3DMode`): The source box mode.
            dst (:obj:`Box3DMode`): The target box mode.
            rt_mat (np.ndarray or Tensor, optional): The rotation and
147
                translation matrix between different coordinates.
148
149
150
151
152
                Defaults to None. The conversion from ``src`` coordinates to
                ``dst`` coordinates usually comes along the change of sensors,
                e.g., from camera to LiDAR. This requires a transformation
                matrix.
            with_yaw (bool): If ``box`` is an instance of
153
154
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
155
156
            correct_yaw (bool): If the yaw is rotated by rt_mat.
                Defaults to False.
157
158

        Returns:
159
160
            Sequence[float] or np.ndarray or Tensor or
            :obj:`BaseInstance3DBoxes`: The converted box of the same type.
161
        """
162
163
164
165
166
167
168
        return Box3DMode.convert(
            box,
            src,
            dst,
            rt_mat=rt_mat,
            with_yaw=with_yaw,
            correct_yaw=correct_yaw)
169
170

    @staticmethod
171
172
173
174
175
176
177
    def convert_point(
        point: Union[Sequence[float], np.ndarray, Tensor, BasePoints],
        src: 'Coord3DMode',
        dst: 'Coord3DMode',
        rt_mat: Optional[Union[np.ndarray, Tensor]] = None,
    ) -> Union[Sequence[float], np.ndarray, Tensor, BasePoints]:
        """Convert points from ``src`` mode to ``dst`` mode.
178
179

        Args:
180
            box (Sequence[float] or np.ndarray or Tensor or :obj:`BasePoints`):
181
                Can be a k-tuple, k-list or an Nxk array/tensor.
182
183
184
            src (:obj:`Coord3DMode`): The source point mode.
            dst (:obj:`Coord3DMode`): The target point mode.
            rt_mat (np.ndarray or Tensor, optional): The rotation and
185
                translation matrix between different coordinates.
186
187
188
189
                Defaults to None. The conversion from ``src`` coordinates to
                ``dst`` coordinates usually comes along the change of sensors,
                e.g., from camera to LiDAR. This requires a transformation
                matrix.
190
191

        Returns:
192
193
            Sequence[float] or np.ndarray or Tensor or :obj:`BasePoints`: The
            converted point of the same type.
194
195
196
197
198
199
200
201
202
        """
        if src == dst:
            return point

        is_numpy = isinstance(point, np.ndarray)
        is_InstancePoints = isinstance(point, BasePoints)
        single_point = isinstance(point, (list, tuple))
        if single_point:
            assert len(point) >= 3, (
203
                'Coord3DMode.convert takes either a k-tuple/list or '
204
205
206
207
208
209
210
211
212
213
214
215
216
                'an Nxk array/tensor, where k >= 3')
            arr = torch.tensor(point)[None, :]
        else:
            # avoid modifying the input point
            if is_numpy:
                arr = torch.from_numpy(np.asarray(point)).clone()
            elif is_InstancePoints:
                arr = point.tensor.clone()
            else:
                arr = point.clone()

        # convert point from `src` mode to `dst` mode.
        if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:
217
218
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
219
        elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:
220
221
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
222
        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:
223
224
            if rt_mat is None:
                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
225
        elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:
226
227
            if rt_mat is None:
                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
228
        elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:
229
230
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
231
        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:
232
233
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
234
235
236
237
238
        else:
            raise NotImplementedError(
                f'Conversion from Coord3DMode {src} to {dst} '
                'is not supported yet')

239
        if not isinstance(rt_mat, Tensor):
240
            rt_mat = arr.new_tensor(rt_mat)
241
242
        if rt_mat.size(1) == 4:
            extended_xyz = torch.cat(
243
                [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1)
244
245
            xyz = extended_xyz @ rt_mat.t()
        else:
246
            xyz = arr[..., :3] @ rt_mat.t()
247

248
249
        remains = arr[..., 3:]
        arr = torch.cat([xyz[..., :3], remains], dim=-1)
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265

        # convert arr to the original type
        original_type = type(point)
        if single_point:
            return original_type(arr.flatten().tolist())
        if is_numpy:
            return arr.numpy()
        elif is_InstancePoints:
            if dst == Coord3DMode.CAM:
                target_type = CameraPoints
            elif dst == Coord3DMode.LIDAR:
                target_type = LiDARPoints
            elif dst == Coord3DMode.DEPTH:
                target_type = DepthPoints
            else:
                raise NotImplementedError(
266
267
                    f'Conversion to {dst} through {original_type} '
                    'is not supported yet')
268
269
270
271
272
273
            return target_type(
                arr,
                points_dim=arr.size(-1),
                attribute_dims=point.attribute_dims)
        else:
            return arr