coord_3d_mode.py 8.94 KB
Newer Older
dingchang's avatar
dingchang committed
1
# Copyright (c) OpenMMLab. All rights reserved.
2
3
from enum import IntEnum, unique

4
5
6
import numpy as np
import torch

zhangshilong's avatar
zhangshilong committed
7
8
from mmdet3d.structures.points import (BasePoints, CameraPoints, DepthPoints,
                                       LiDARPoints)
9
from .base_box3d import BaseInstance3DBoxes
10
from .box_3d_mode import Box3DMode
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65


@unique
class Coord3DMode(IntEnum):
    r"""Enum of different ways to represent a box
        and point cloud.

    Coordinates in LiDAR:

    .. code-block:: none

                    up z
                       ^   x front
                       |  /
                       | /
        left y <------ 0

    The relative coordinate of bottom center in a LiDAR box is (0.5, 0.5, 0),
    and the yaw is around the z axis, thus the rotation axis=2.

    Coordinates in camera:

    .. code-block:: none

                z front
               /
              /
             0 ------> x right
             |
             |
             v
        down y

    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
    and the yaw is around the y axis, thus the rotation axis=1.

    Coordinates in Depth mode:

    .. code-block:: none

        up z
           ^   y front
           |  /
           | /
           0 ------> x right

    The relative coordinate of bottom center in a DEPTH box is (0.5, 0.5, 0),
    and the yaw is around the z axis, thus the rotation axis=2.
    """

    LIDAR = 0
    CAM = 1
    DEPTH = 2

    @staticmethod
66
67
68
69
70
71
72
73
74
    def convert(input, src, dst, rt_mat=None, with_yaw=True, is_point=True):
        """Convert boxes or points from `src` mode to `dst` mode.

        Args:
            input (tuple | list | np.ndarray | torch.Tensor |
                :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`):
                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
            src (:obj:`Box3DMode` | :obj:`Coord3DMode`): The source mode.
            dst (:obj:`Box3DMode` | :obj:`Coord3DMode`): The target mode.
75
76
77
            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
                translation matrix between different coordinates.
                Defaults to None.
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
                The conversion from `src` coordinates to `dst` coordinates
                usually comes along the change of sensors, e.g., from camera
                to LiDAR. This requires a transformation matrix.
            with_yaw (bool): If `box` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
            is_point (bool): If `input` is neither an instance of
                :obj:`BaseInstance3DBoxes` nor an instance of
                :obj:`BasePoints`, whether or not it is point data.
                Defaults to True.

        Returns:
            (tuple | list | np.ndarray | torch.Tensor |
                :obj:`BaseInstance3DBoxes` | :obj:`BasePoints`):
                The converted box of the same type.
        """
94
        if isinstance(input, BaseInstance3DBoxes):
95
96
            return Coord3DMode.convert_box(
                input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw)
97
98
        elif isinstance(input, BasePoints):
            return Coord3DMode.convert_point(input, src, dst, rt_mat=rt_mat)
99
100
101
102
103
104
105
        elif isinstance(input, (tuple, list, np.ndarray, torch.Tensor)):
            if is_point:
                return Coord3DMode.convert_point(
                    input, src, dst, rt_mat=rt_mat)
            else:
                return Coord3DMode.convert_box(
                    input, src, dst, rt_mat=rt_mat, with_yaw=with_yaw)
106
107
108
109
        else:
            raise NotImplementedError

    @staticmethod
110
    def convert_box(box, src, dst, rt_mat=None, with_yaw=True):
111
112
113
        """Convert boxes from `src` mode to `dst` mode.

        Args:
114
            box (tuple | list | np.ndarray |
115
                torch.Tensor | :obj:`BaseInstance3DBoxes`):
116
                Can be a k-tuple, k-list or an Nxk array/tensor, where k = 7.
117
118
            src (:obj:`Box3DMode`): The src Box mode.
            dst (:obj:`Box3DMode`): The target Box mode.
119
120
121
            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
                translation matrix between different coordinates.
                Defaults to None.
122
123
124
                The conversion from `src` coordinates to `dst` coordinates
                usually comes along the change of sensors, e.g., from camera
                to LiDAR. This requires a transformation matrix.
125
126
127
            with_yaw (bool): If `box` is an instance of
                :obj:`BaseInstance3DBoxes`, whether or not it has a yaw angle.
                Defaults to True.
128
129

        Returns:
130
131
            (tuple | list | np.ndarray | torch.Tensor |
                :obj:`BaseInstance3DBoxes`):
132
133
                The converted box of the same type.
        """
134
        return Box3DMode.convert(box, src, dst, rt_mat=rt_mat)
135
136
137
138
139
140

    @staticmethod
    def convert_point(point, src, dst, rt_mat=None):
        """Convert points from `src` mode to `dst` mode.

        Args:
141
            point (tuple | list | np.ndarray |
142
                torch.Tensor | :obj:`BasePoints`):
143
144
145
                Can be a k-tuple, k-list or an Nxk array/tensor.
            src (:obj:`CoordMode`): The src Point mode.
            dst (:obj:`CoordMode`): The target Point mode.
146
147
148
            rt_mat (np.ndarray | torch.Tensor, optional): The rotation and
                translation matrix between different coordinates.
                Defaults to None.
149
150
151
152
153
                The conversion from `src` coordinates to `dst` coordinates
                usually comes along the change of sensors, e.g., from camera
                to LiDAR. This requires a transformation matrix.

        Returns:
154
            (tuple | list | np.ndarray | torch.Tensor | :obj:`BasePoints`):
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
                The converted point of the same type.
        """
        if src == dst:
            return point

        is_numpy = isinstance(point, np.ndarray)
        is_InstancePoints = isinstance(point, BasePoints)
        single_point = isinstance(point, (list, tuple))
        if single_point:
            assert len(point) >= 3, (
                'CoordMode.convert takes either a k-tuple/list or '
                'an Nxk array/tensor, where k >= 3')
            arr = torch.tensor(point)[None, :]
        else:
            # avoid modifying the input point
            if is_numpy:
                arr = torch.from_numpy(np.asarray(point)).clone()
            elif is_InstancePoints:
                arr = point.tensor.clone()
            else:
                arr = point.clone()

        # convert point from `src` mode to `dst` mode.
        if src == Coord3DMode.LIDAR and dst == Coord3DMode.CAM:
179
180
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
181
        elif src == Coord3DMode.CAM and dst == Coord3DMode.LIDAR:
182
183
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
184
        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.CAM:
185
186
            if rt_mat is None:
                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
187
        elif src == Coord3DMode.CAM and dst == Coord3DMode.DEPTH:
188
189
            if rt_mat is None:
                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
190
        elif src == Coord3DMode.LIDAR and dst == Coord3DMode.DEPTH:
191
192
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, -1, 0], [1, 0, 0], [0, 0, 1]])
193
        elif src == Coord3DMode.DEPTH and dst == Coord3DMode.LIDAR:
194
195
            if rt_mat is None:
                rt_mat = arr.new_tensor([[0, 1, 0], [-1, 0, 0], [0, 0, 1]])
196
197
198
199
200
        else:
            raise NotImplementedError(
                f'Conversion from Coord3DMode {src} to {dst} '
                'is not supported yet')

201
202
        if not isinstance(rt_mat, torch.Tensor):
            rt_mat = arr.new_tensor(rt_mat)
203
204
        if rt_mat.size(1) == 4:
            extended_xyz = torch.cat(
205
                [arr[..., :3], arr.new_ones(arr.size(0), 1)], dim=-1)
206
207
            xyz = extended_xyz @ rt_mat.t()
        else:
208
            xyz = arr[..., :3] @ rt_mat.t()
209

210
211
        remains = arr[..., 3:]
        arr = torch.cat([xyz[..., :3], remains], dim=-1)
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235

        # convert arr to the original type
        original_type = type(point)
        if single_point:
            return original_type(arr.flatten().tolist())
        if is_numpy:
            return arr.numpy()
        elif is_InstancePoints:
            if dst == Coord3DMode.CAM:
                target_type = CameraPoints
            elif dst == Coord3DMode.LIDAR:
                target_type = LiDARPoints
            elif dst == Coord3DMode.DEPTH:
                target_type = DepthPoints
            else:
                raise NotImplementedError(
                    f'Conversion to {dst} through {original_type}'
                    ' is not supported yet')
            return target_type(
                arr,
                points_dim=arr.size(-1),
                attribute_dims=point.attribute_dims)
        else:
            return arr