Add conversion unittests

6d37a3d0 · zhangwenwei · 1f3164f8 · 6d37a3d0 · 6d37a3d0 · 6d37a3d0
Commit 6d37a3d0 authored May 08, 2020 by zhangwenwei
6 changed files
--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
@@ -7,7 +7,7 @@ from .iou_calculators import (BboxOverlaps3D, BboxOverlapsNearest3D,
 from .samplers import (BaseSampler, CombinedSampler,
                       InstanceBalancedPosSampler, IoUBalancedNegSampler,
                       PseudoSampler, RandomSampler, SamplingResult)
-from .structures import Box3DMode, LiDARInstance3DBoxes
+from .structures import Box3DMode, CameraInstance3DBoxes, LiDARInstance3DBoxes
 from .transforms import boxes3d_to_bev_torch_lidar
 from .assign_sampling import (  # isort:skip, avoid recursive imports
@@ -21,5 +21,6 @@ __all__ = [
    'build_assigner', 'build_sampler', 'assign_and_sample', 'box_torch_ops',
    'build_bbox_coder', 'DeltaXYZWLHRBBoxCoder', 'boxes3d_to_bev_torch_lidar',
    'BboxOverlapsNearest3D', 'BboxOverlaps3D', 'bbox_overlaps_nearest_3d',
-    'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes'
+    'bbox_overlaps_3d', 'Box3DMode', 'LiDARInstance3DBoxes',
+    'CameraInstance3DBoxes'
 ]
--- a/mmdet3d/core/bbox/structures/__init__.py
+++ b/mmdet3d/core/bbox/structures/__init__.py
 from .box_3d_mode import Box3DMode
+from .cam_box3d import CameraInstance3DBoxes
 from .lidar_box3d import LiDARInstance3DBoxes
-__all__ = ['Box3DMode', 'LiDARInstance3DBoxes']
+__all__ = ['Box3DMode', 'LiDARInstance3DBoxes', 'CameraInstance3DBoxes']
--- a/mmdet3d/core/bbox/structures/box_3d_mode.py
+++ b/mmdet3d/core/bbox/structures/box_3d_mode.py
@@ -11,8 +11,8 @@ class Box3DMode(IntEnum):
    Coordinates in LiDAR:
    .. code-block:: none
-                    up z    x front
+                    up z
-                       ^   ^
+                       ^   x front
                       |  /
                       | /
        left y <------ 0
@@ -22,10 +22,10 @@ class Box3DMode(IntEnum):
    Coordinates in camera:
    .. code-block:: none
-                           x right
+                z front
               /
              /
-        front z <------ 0
+             0 ------> x right
             |
             |
             v
@@ -36,11 +36,11 @@ class Box3DMode(IntEnum):
    Coordinates in Depth mode:
    .. code-block:: none
-                     up z   x right
+        up z
-                        ^   ^
+           ^   y front
           |  /
           | /
-        front y <------ 0
+           0 ------> x right
    The relative coordinate of bottom center in a DEPTH box is [0.5, 0.5, 0],
    and the yaw is around the z axis, thus the rotation axis=2.
    """
@@ -50,7 +50,7 @@ class Box3DMode(IntEnum):
    DEPTH = 2
    @staticmethod
-    def convert(box, src, dst):
+    def convert(box, src, dst, rt_mat=None):
        """Convert boxes from `src` mode to `dst` mode.
        Args:
@@ -58,6 +58,11 @@ class Box3DMode(IntEnum):
                can be a k-tuple, k-list or an Nxk array/tensor, where k = 7
            src (BoxMode): the src Box mode
            dst (BoxMode): the target Box mode
+            rt_mat (np.ndarray | torch.Tensor): The rotation and translation
+                matrix between different coordinates. Defaults to None.
+                The conversion from `src` coordinates to `dst` coordinates
+                usually comes along the change of sensors, e.g., from camera
+                to LiDAR. This requires a transformation matrix.
        Returns:
            (tuple | list | np.ndarray | torch.Tensor):
@@ -80,8 +85,42 @@ class Box3DMode(IntEnum):
            else:
                arr = box.clone()
-        # converting logic
+        # convert box from `src` mode to `dst` mode.
-        # TODO: add converting logic to support box conversion
+        x_size, y_size, z_size = arr[..., 3:4], arr[..., 4:5], arr[..., 5:6]
+        if src == Box3DMode.LIDAR and dst == Box3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, -1, 0], [0, 0, -1], [1, 0, 0]])
+            xyz_size = torch.cat([y_size, z_size, x_size], dim=-1)
+        elif src == Box3DMode.CAM and dst == Box3DMode.LIDAR:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[0, 0, 1], [-1, 0, 0], [0, -1, 0]])
+            xyz_size = torch.cat([z_size, x_size, y_size], dim=-1)
+        elif src == Box3DMode.DEPTH and dst == Box3DMode.CAM:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, 1], [0, -1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+        elif src == Box3DMode.CAM and dst == Box3DMode.DEPTH:
+            if rt_mat is None:
+                rt_mat = arr.new_tensor([[1, 0, 0], [0, 0, -1], [0, 1, 0]])
+            xyz_size = torch.cat([x_size, z_size, y_size], dim=-1)
+        else:
+            raise NotImplementedError(
+                f'Conversion from Box3DMode {src} to {dst} '
+                'is not supported yet')
+        if not isinstance(rt_mat, torch.Tensor):
+            rt_mat = arr.new_tensor(rt_mat)
+        if rt_mat.size(1) == 4:
+            extended_xyz = torch.cat(
+                [arr[:, :3], arr.new_ones(arr.size(0), 1)], dim=-1)
+            xyz = extended_xyz @ rt_mat.t()
+        else:
+            xyz = arr[:, :3] @ rt_mat.t()
+        remains = arr[..., 6:]
+        arr = torch.cat([xyz[:, :3], xyz_size, remains], dim=-1)
+        # convert arr to the original type
        original_type = type(box)
        if single_box:
            return original_type(arr.flatten().tolist())

--- a/mmdet3d/core/bbox/structures/cam_box3d.py
+++ b/mmdet3d/core/bbox/structures/cam_box3d.py
@@ -5,7 +5,7 @@ from .base_box3d import BaseInstance3DBoxes
 from .utils import limit_period, rotation_3d_in_axis
-class CAMInstance3DBoxes(BaseInstance3DBoxes):
+class CameraInstance3DBoxes(BaseInstance3DBoxes):
    """3D boxes of instances in CAM coordinates
    Coordinates in camera:

--- a/mmdet3d/datasets/kitti_dataset.py
+++ b/mmdet3d/datasets/kitti_dataset.py
@@ -238,6 +238,8 @@ class KittiDataset(torch_data.Dataset):
                                      axis=1).astype(np.float32)
        difficulty = annos['difficulty']
        # this change gt_bboxes_3d to velodyne coordinates
+        import pdb
+        pdb.set_trace()
        gt_bboxes_3d = box_np_ops.box_camera_to_lidar(gt_bboxes_3d, rect,
                                                      Trv2c)
        # only center format is allowed. so we need to convert

--- a/tests/test_lidar_box3d.py
+++ b/tests/test_lidar_box3d.py
 import numpy as np
+import pytest
 import torch
-from mmdet3d.core.bbox import Box3DMode, LiDARInstance3DBoxes
+from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes,
+                               LiDARInstance3DBoxes)
 def test_lidar_boxes3d():
@@ -250,3 +252,95 @@ def test_lidar_boxes3d():
                                     [3.0032e+01, -1.5474e+01, -1.7879e+00]]])
    # the pytorch print loses some precision
    assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)
+def test_boxes_conversion():
+    """Test the conversion of boxes between different modes.
+    ComandLine:
+        xdoctest tests/test_box3d.py::test_boxes_conversion zero
+    """
+    lidar_boxes = LiDARInstance3DBoxes(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    cam_box_tensor = Box3DMode.convert(lidar_boxes.tensor, Box3DMode.LIDAR,
+                                       Box3DMode.CAM)
+    lidar_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,
+                                         Box3DMode.LIDAR)
+    expected_tensor = torch.tensor(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+         [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+         [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+         [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]])
+    assert torch.allclose(expected_tensor, lidar_box_tensor)
+    assert torch.allclose(lidar_boxes.tensor, lidar_box_tensor)
+    depth_box_tensor = Box3DMode.convert(cam_box_tensor, Box3DMode.CAM,
+                                         Box3DMode.DEPTH)
+    depth_to_cam_box_tensor = Box3DMode.convert(depth_box_tensor,
+                                                Box3DMode.DEPTH, Box3DMode.CAM)
+    assert torch.allclose(cam_box_tensor, depth_to_cam_box_tensor)
+    # test error raise with not supported conversion
+    with pytest.raises(NotImplementedError):
+        Box3DMode.convert(lidar_box_tensor, Box3DMode.LIDAR, Box3DMode.DEPTH)
+    with pytest.raises(NotImplementedError):
+        Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH, Box3DMode.LIDAR)
+    # test conversion with a given rt_mat
+    camera_boxes = CameraInstance3DBoxes(
+        [[0.06, 1.77, 21.4, 3.2, 1.61, 1.66, -1.54],
+         [6.59, 1.53, 6.76, 12.78, 3.66, 2.28, 1.55],
+         [6.71, 1.59, 22.18, 14.73, 3.64, 2.32, 1.59],
+         [7.11, 1.58, 34.54, 10.04, 3.61, 2.32, 1.61],
+         [7.78, 1.65, 45.95, 12.83, 3.63, 2.34, 1.64]])
+    rect = torch.tensor(
+        [[0.9999239, 0.00983776, -0.00744505, 0.],
+         [-0.0098698, 0.9999421, -0.00427846, 0.],
+         [0.00740253, 0.00435161, 0.9999631, 0.], [0., 0., 0., 1.]],
+        dtype=torch.float32)
+    Trv2c = torch.tensor(
+        [[7.533745e-03, -9.999714e-01, -6.166020e-04, -4.069766e-03],
+         [1.480249e-02, 7.280733e-04, -9.998902e-01, -7.631618e-02],
+         [9.998621e-01, 7.523790e-03, 1.480755e-02, -2.717806e-01],
+         [0.000000e+00, 0.000000e+00, 0.000000e+00, 1.000000e+00]],
+        dtype=torch.float32)
+    expected_tensor = torch.tensor(
+        [[
+            2.16902434e+01, -4.06038554e-02, -1.61906639e+00, 1.65999997e+00,
+            3.20000005e+00, 1.61000001e+00, -1.53999996e+00
+        ],
+         [
+             7.05006905e+00, -6.57459601e+00, -1.60107949e+00, 2.27999997e+00,
+             1.27799997e+01, 3.66000009e+00, 1.54999995e+00
+         ],
+         [
+             2.24698818e+01, -6.69203759e+00, -1.50118145e+00, 2.31999993e+00,
+             1.47299995e+01, 3.64000010e+00, 1.59000003e+00
+         ],
+         [
+             3.48291965e+01, -7.09058388e+00, -1.36622983e+00, 2.31999993e+00,
+             1.00400000e+01, 3.60999990e+00, 1.61000001e+00
+         ],
+         [
+             4.62394617e+01, -7.75838800e+00, -1.32405020e+00, 2.33999991e+00,
+             1.28299999e+01, 3.63000011e+00, 1.63999999e+00
+         ]],
+        dtype=torch.float32)
+    rt_mat = rect @ Trv2c
+    cam_to_lidar_box = Box3DMode.convert(camera_boxes.tensor, Box3DMode.CAM,
+                                         Box3DMode.LIDAR, rt_mat.inverse())
+    assert torch.allclose(cam_to_lidar_box, expected_tensor)
+    lidar_to_cam_box = Box3DMode.convert(cam_to_lidar_box, Box3DMode.LIDAR,
+                                         Box3DMode.CAM, rt_mat)
+    assert torch.allclose(lidar_to_cam_box, camera_boxes.tensor)