Full unittest for cam box

0f85fae5 · zhangwenwei · 6d37a3d0 · 0f85fae5 · 0f85fae5 · 0f85fae5
Commit 0f85fae5 authored May 08, 2020 by zhangwenwei
5 changed files
--- a/mmdet3d/core/bbox/structures/base_box3d.py
+++ b/mmdet3d/core/bbox/structures/base_box3d.py
@@ -117,7 +117,6 @@ class BaseInstance3DBoxes(object):
        Args:
            trans_vector (torch.Tensor): translation vector of size 1x3
        """
        if not isinstance(trans_vector, torch.Tensor):
            trans_vector = self.tensor.new_tensor(trans_vector)

--- a/mmdet3d/core/bbox/structures/box_3d_mode.py
+++ b/mmdet3d/core/bbox/structures/box_3d_mode.py
@@ -3,6 +3,10 @@ from enum import IntEnum, unique
 import numpy as np
 import torch
+from .base_box3d import BaseInstance3DBoxes
+from .cam_box3d import CameraInstance3DBoxes
+from .lidar_box3d import LiDARInstance3DBoxes
 @unique
 class Box3DMode(IntEnum):
@@ -16,6 +20,7 @@ class Box3DMode(IntEnum):
                       |  /
                       | /
        left y <------ 0
    The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
    and the yaw is around the z axis, thus the rotation axis=2.
@@ -30,6 +35,7 @@ class Box3DMode(IntEnum):
             |
             v
        down y
    The relative coordinate of bottom center in a CAM box is [0.5, 1.0, 0.5],
    and the yaw is around the y axis, thus the rotation axis=1.
@@ -41,6 +47,7 @@ class Box3DMode(IntEnum):
           |  /
           | /
           0 ------> x right
    The relative coordinate of bottom center in a DEPTH box is [0.5, 0.5, 0],
    and the yaw is around the z axis, thus the rotation axis=2.
    """
@@ -72,6 +79,7 @@ class Box3DMode(IntEnum):
            return box
        is_numpy = isinstance(box, np.ndarray)
+        is_Instance3DBoxes = isinstance(box, BaseInstance3DBoxes)
        single_box = isinstance(box, (list, tuple))
        if single_box:
            assert len(box) >= 7, (
@@ -82,6 +90,8 @@ class Box3DMode(IntEnum):
            # avoid modifying the input box
            if is_numpy:
                arr = torch.from_numpy(np.asarray(box)).clone()
+            elif is_Instance3DBoxes:
+                arr = box.tensor.clone()
            else:
                arr = box.clone()
@@ -126,5 +136,15 @@ class Box3DMode(IntEnum):
            return original_type(arr.flatten().tolist())
        if is_numpy:
            return arr.numpy()
+        elif is_Instance3DBoxes:
+            if dst == Box3DMode.CAM:
+                target_type = CameraInstance3DBoxes
+            elif dst == Box3DMode.LIDAR:
+                target_type = LiDARInstance3DBoxes
+            else:
+                raise NotImplementedError(
+                    f'Conversion to {dst} through {original_type}'
+                    ' is not supported yet')
+            return target_type(arr, box_dim=arr.size(-1))
        else:
            return arr
--- a/mmdet3d/core/bbox/structures/cam_box3d.py
+++ b/mmdet3d/core/bbox/structures/cam_box3d.py
@@ -11,10 +11,10 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
    Coordinates in camera:
    .. code-block:: none
-                           x right
+                z front
               /
              /
-        front z <------ 0
+             0 ------> x right
             |
             |
             v
@@ -39,7 +39,7 @@ class CameraInstance3DBoxes(BaseInstance3DBoxes):
        bottom_center = self.bottom_center
        gravity_center = torch.zeros_like(bottom_center)
        gravity_center[:, [0, 2]] = bottom_center[:, [0, 2]]
-        gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 5] * 0.5
+        gravity_center[:, 1] = bottom_center[:, 1] - self.tensor[:, 4] * 0.5
        return gravity_center
    @property

--- a/mmdet3d/core/bbox/structures/lidar_box3d.py
+++ b/mmdet3d/core/bbox/structures/lidar_box3d.py
@@ -16,6 +16,7 @@ class LiDARInstance3DBoxes(BaseInstance3DBoxes):
                       |  /
                       | /
        left y <------ 0
    The relative coordinate of bottom center in a LiDAR box is [0.5, 0.5, 0],
    and the yaw is around the z axis, thus the rotation axis=2.

--- a/tests/test_box3d.py
+++ b/tests/test_box3d.py
@@ -292,6 +292,11 @@ def test_boxes_conversion():
    with pytest.raises(NotImplementedError):
        Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH, Box3DMode.LIDAR)
+    # test similar mode conversion
+    same_results = Box3DMode.convert(depth_box_tensor, Box3DMode.DEPTH,
+                                     Box3DMode.DEPTH)
+    assert (same_results == depth_box_tensor).all()
    # test conversion with a given rt_mat
    camera_boxes = CameraInstance3DBoxes(
        [[0.06, 1.77, 21.4, 3.2, 1.61, 1.66, -1.54],
@@ -337,10 +342,243 @@ def test_boxes_conversion():
        dtype=torch.float32)
    rt_mat = rect @ Trv2c
-    cam_to_lidar_box = Box3DMode.convert(camera_boxes.tensor, Box3DMode.CAM,
+    # test coversion with Box type
+    cam_to_lidar_box = Box3DMode.convert(camera_boxes, Box3DMode.CAM,
                                         Box3DMode.LIDAR, rt_mat.inverse())
-    assert torch.allclose(cam_to_lidar_box, expected_tensor)
+    assert torch.allclose(cam_to_lidar_box.tensor, expected_tensor)
-    lidar_to_cam_box = Box3DMode.convert(cam_to_lidar_box, Box3DMode.LIDAR,
+    lidar_to_cam_box = Box3DMode.convert(cam_to_lidar_box.tensor,
-                                         Box3DMode.CAM, rt_mat)
+                                         Box3DMode.LIDAR, Box3DMode.CAM,
+                                         rt_mat)
    assert torch.allclose(lidar_to_cam_box, camera_boxes.tensor)
+    # test numpy convert
+    cam_to_lidar_box = Box3DMode.convert(camera_boxes.tensor.numpy(),
+                                         Box3DMode.CAM, Box3DMode.LIDAR,
+                                         rt_mat.inverse().numpy())
+    assert np.allclose(cam_to_lidar_box, expected_tensor.numpy())
+    # test list convert
+    cam_to_lidar_box = Box3DMode.convert(
+        camera_boxes.tensor[0].numpy().tolist(), Box3DMode.CAM,
+        Box3DMode.LIDAR,
+        rt_mat.inverse().numpy())
+    assert np.allclose(np.array(cam_to_lidar_box), expected_tensor[0].numpy())
+def test_camera_boxes3d():
+    # Test init with numpy array
+    np_boxes = np.array(
+        [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+         [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62]],
+        dtype=np.float32)
+    boxes_1 = Box3DMode.convert(
+        LiDARInstance3DBoxes(np_boxes), Box3DMode.LIDAR, Box3DMode.CAM)
+    assert isinstance(boxes_1, CameraInstance3DBoxes)
+    cam_np_boxes = Box3DMode.convert(np_boxes, Box3DMode.LIDAR, Box3DMode.CAM)
+    assert torch.allclose(boxes_1.tensor,
+                          boxes_1.tensor.new_tensor(cam_np_boxes))
+    # test init with torch.Tensor
+    th_boxes = torch.tensor(
+        [[
+            28.29669987, -0.5557558, -1.30332506, 1.47000003, 2.23000002,
+            1.48000002, -1.57000005
+        ],
+         [
+             26.66901946, 21.82302134, -1.73605708, 1.55999994, 3.48000002,
+             1.39999998, -1.69000006
+         ],
+         [
+             31.31977974, 8.16214412, -1.62177875, 1.74000001, 3.76999998,
+             1.48000002, 2.78999996
+         ]],
+        dtype=torch.float32)
+    cam_th_boxes = Box3DMode.convert(th_boxes, Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes_2 = CameraInstance3DBoxes(cam_th_boxes)
+    assert torch.allclose(boxes_2.tensor, cam_th_boxes)
+    # test clone/to/device
+    boxes_2 = boxes_2.clone()
+    boxes_1 = boxes_1.to(boxes_2.device)
+    # test box concatenation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.7802081, 2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.48],
+             [8.959413, 2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.62],
+             [28.2967, -0.5557558, -1.303325, 1.47, 2.23, 1.48, -1.57],
+             [26.66902, 21.82302, -1.736057, 1.56, 3.48, 1.4, -1.69],
+             [31.31978, 8.162144, -1.6217787, 1.74, 3.77, 1.48, 2.79]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes = CameraInstance3DBoxes.cat([boxes_1, boxes_2])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box flip
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.7802081, -2.516249, -1.7501148, 1.75, 3.39, 1.65, 1.6615927],
+             [8.959413, -2.4567227, -1.6357126, 1.54, 4.01, 1.57, 1.5215927],
+             [28.2967, 0.5557558, -1.303325, 1.47, 2.23, 1.48, 4.7115927],
+             [26.66902, -21.82302, -1.736057, 1.56, 3.48, 1.4, 4.8315926],
+             [31.31978, -8.162144, -1.6217787, 1.74, 3.77, 1.48, 0.35159278]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.flip()
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box rotation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor(
+            [[1.0385344, -2.9020846, -1.7501148, 1.75, 3.39, 1.65, 1.9336663],
+             [7.969653, -4.774011, -1.6357126, 1.54, 4.01, 1.57, 1.7936664],
+             [27.405172, -7.0688415, -1.303325, 1.47, 2.23, 1.48, 4.9836664],
+             [19.823532, -28.187025, -1.736057, 1.56, 3.48, 1.4, 5.1036663],
+             [27.974297, -16.27845, -1.6217787, 1.74, 3.77, 1.48, 0.6236664]]),
+        Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.rotate(0.27207362796436096)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box scaling
+    expected_tensor = Box3DMode.convert(
+        torch.tensor([[
+            1.0443488, -2.9183323, -1.7599131, 1.7597977, 3.4089797, 1.6592377,
+            1.9336663
+        ],
+                      [
+                          8.014273, -4.8007393, -1.6448704, 1.5486219,
+                          4.0324507, 1.57879, 1.7936664
+                      ],
+                      [
+                          27.558605, -7.1084175, -1.310622, 1.4782301,
+                          2.242485, 1.488286, 4.9836664
+                      ],
+                      [
+                          19.934517, -28.344835, -1.7457767, 1.5687338,
+                          3.4994833, 1.4078381, 5.1036663
+                      ],
+                      [
+                          28.130915, -16.369587, -1.6308585, 1.7497417,
+                          3.791107, 1.488286, 0.6236664
+                      ]]), Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.scale(1.00559866335275)
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test box translation
+    expected_tensor = Box3DMode.convert(
+        torch.tensor([[
+            1.1281544, -3.0507944, -1.9169292, 1.7597977, 3.4089797, 1.6592377,
+            1.9336663
+        ],
+                      [
+                          8.098079, -4.9332013, -1.8018866, 1.5486219,
+                          4.0324507, 1.57879, 1.7936664
+                      ],
+                      [
+                          27.64241, -7.2408795, -1.4676381, 1.4782301,
+                          2.242485, 1.488286, 4.9836664
+                      ],
+                      [
+                          20.018322, -28.477297, -1.9027928, 1.5687338,
+                          3.4994833, 1.4078381, 5.1036663
+                      ],
+                      [
+                          28.21472, -16.502048, -1.7878747, 1.7497417,
+                          3.791107, 1.488286, 0.6236664
+                      ]]), Box3DMode.LIDAR, Box3DMode.CAM)
+    boxes.translate([0.13246193, 0.15701613, 0.0838056])
+    assert torch.allclose(boxes.tensor, expected_tensor)
+    # test bbox in_range_bev
+    expected_tensor = torch.tensor([1, 1, 1, 1, 1], dtype=torch.bool)
+    mask = boxes.in_range_bev([0., -40., 70.4, 40.])
+    assert (mask == expected_tensor).all()
+    mask = boxes.nonempty()
+    assert (mask == expected_tensor).all()
+    # test bbox in_range
+    expected_tensor = torch.tensor([1, 1, 0, 0, 0], dtype=torch.bool)
+    mask = boxes.in_range_3d([-2, -5, 0, 20, 2, 22])
+    assert (mask == expected_tensor).all()
+    # test properties
+    assert torch.allclose(boxes.bottom_center, boxes.tensor[:, :3])
+    expected_tensor = (
+        boxes.tensor[:, :3] - boxes.tensor[:, 3:6] *
+        (torch.tensor([0.5, 1.0, 0.5]) - torch.tensor([0.5, 0.5, 0.5])))
+    assert torch.allclose(boxes.gravity_center, expected_tensor)
+    boxes.limit_yaw()
+    assert (boxes.tensor[:, 6] <= np.pi / 2).all()
+    assert (boxes.tensor[:, 6] >= -np.pi / 2).all()
+    Box3DMode.convert(boxes, Box3DMode.LIDAR, Box3DMode.LIDAR)
+    expected_tesor = boxes.tensor.clone()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+    boxes.flip()
+    boxes.flip()
+    boxes.limit_yaw()
+    assert torch.allclose(expected_tesor, boxes.tensor)
+    # test nearest_bev
+    # BEV box in lidar coordinates (x, y)
+    lidar_expected_tensor = torch.tensor(
+        [[-0.5763, -3.9307, 2.8326, -2.1709],
+         [6.0819, -5.7075, 10.1143, -4.1589],
+         [26.5212, -7.9800, 28.7637, -6.5018],
+         [18.2686, -29.2617, 21.7681, -27.6929],
+         [27.3398, -18.3976, 29.0896, -14.6065]])
+    # BEV box in camera coordinate (-y, x)
+    expected_tensor = lidar_expected_tensor.clone()
+    expected_tensor[:, 0::2] = -lidar_expected_tensor[:, [3, 1]]
+    expected_tensor[:, 1::2] = lidar_expected_tensor[:, 0::2]
+    # the pytorch print loses some precision
+    assert torch.allclose(
+        boxes.nearset_bev, expected_tensor, rtol=1e-4, atol=1e-7)
+    # obtained by the print of the original implementation
+    expected_tensor = torch.tensor([[[3.2684e+00, 2.5769e-01, -7.7767e-01],
+                                     [1.6232e+00, 2.5769e-01, -1.5301e-01],
+                                     [1.6232e+00, 1.9169e+00, -1.5301e-01],
+                                     [3.2684e+00, 1.9169e+00, -7.7767e-01],
+                                     [4.4784e+00, 2.5769e-01, 2.4093e+00],
+                                     [2.8332e+00, 2.5769e-01, 3.0340e+00],
+                                     [2.8332e+00, 1.9169e+00, 3.0340e+00],
+                                     [4.4784e+00, 1.9169e+00, 2.4093e+00]],
+                                    [[5.2427e+00, 2.2310e-01, 5.9606e+00],
+                                     [3.7324e+00, 2.2310e-01, 6.3029e+00],
+                                     [3.7324e+00, 1.8019e+00, 6.3029e+00],
+                                     [5.2427e+00, 1.8019e+00, 5.9606e+00],
+                                     [6.1340e+00, 2.2310e-01, 9.8933e+00],
+                                     [4.6237e+00, 2.2310e-01, 1.0236e+01],
+                                     [4.6237e+00, 1.8019e+00, 1.0236e+01],
+                                     [6.1340e+00, 1.8019e+00, 9.8933e+00]],
+                                    [[7.6525e+00, -2.0648e-02, 2.6364e+01],
+                                     [6.2283e+00, -2.0648e-02, 2.6760e+01],
+                                     [6.2283e+00, 1.4676e+00, 2.6760e+01],
+                                     [7.6525e+00, 1.4676e+00, 2.6364e+01],
+                                     [8.2534e+00, -2.0648e-02, 2.8525e+01],
+                                     [6.8292e+00, -2.0648e-02, 2.8921e+01],
+                                     [6.8292e+00, 1.4676e+00, 2.8921e+01],
+                                     [8.2534e+00, 1.4676e+00, 2.8525e+01]],
+                                    [[2.8535e+01, 4.9495e-01, 1.8102e+01],
+                                     [2.7085e+01, 4.9495e-01, 1.8700e+01],
+                                     [2.7085e+01, 1.9028e+00, 1.8700e+01],
+                                     [2.8535e+01, 1.9028e+00, 1.8102e+01],
+                                     [2.9870e+01, 4.9495e-01, 2.1337e+01],
+                                     [2.8420e+01, 4.9495e-01, 2.1935e+01],
+                                     [2.8420e+01, 1.9028e+00, 2.1935e+01],
+                                     [2.9870e+01, 1.9028e+00, 2.1337e+01]],
+                                    [[1.4452e+01, 2.9959e-01, 2.8612e+01],
+                                     [1.5474e+01, 2.9959e-01, 3.0032e+01],
+                                     [1.5474e+01, 1.7879e+00, 3.0032e+01],
+                                     [1.4452e+01, 1.7879e+00, 2.8612e+01],
+                                     [1.7530e+01, 2.9959e-01, 2.6398e+01],
+                                     [1.8552e+01, 2.9959e-01, 2.7818e+01],
+                                     [1.8552e+01, 1.7879e+00, 2.7818e+01],
+                                     [1.7530e+01, 1.7879e+00, 2.6398e+01]]])
+    # the pytorch print loses some precision
+    assert torch.allclose(boxes.corners, expected_tensor, rtol=1e-4, atol=1e-7)