[Refactor] Refactor the transformation from image to camera coordinates (#938)

* Refactor points_img2cam * Refine docstring * Support array converter and add unit tests

[Refactor] Refactor the transformation from image to camera coordinates (#938)
* Refactor points_img2cam * Refine docstring * Support array converter and add unit tests
82a0c215 · Tai-Wang · 48ce34da · 82a0c215 · 82a0c215 · 82a0c215
Commit 82a0c215 authored Sep 15, 2021 by Tai-Wang
5 changed files
--- a/mmdet3d/core/bbox/__init__.py
+++ b/mmdet3d/core/bbox/__init__.py
@@ -12,7 +12,8 @@ from .samplers import (BaseSampler, CombinedSampler,
 from .structures import (BaseInstance3DBoxes, Box3DMode, CameraInstance3DBoxes,
                         Coord3DMode, DepthInstance3DBoxes,
                         LiDARInstance3DBoxes, get_box_type, limit_period,
-                         mono_cam_box2vis, points_cam2img, xywhr2xyxyr)
+                         mono_cam_box2vis, points_cam2img, points_img2cam,
+                         xywhr2xyxyr)
 from .transforms import bbox3d2result, bbox3d2roi, bbox3d_mapping_back
 __all__ = [
@@ -25,5 +26,5 @@ __all__ = [
    'LiDARInstance3DBoxes', 'CameraInstance3DBoxes', 'bbox3d2roi',
    'bbox3d2result', 'DepthInstance3DBoxes', 'BaseInstance3DBoxes',
    'bbox3d_mapping_back', 'xywhr2xyxyr', 'limit_period', 'points_cam2img',
-    'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'
+    'points_img2cam', 'get_box_type', 'Coord3DMode', 'mono_cam_box2vis'
 ]
--- a/mmdet3d/core/bbox/structures/__init__.py
+++ b/mmdet3d/core/bbox/structures/__init__.py
@@ -6,12 +6,13 @@ from .coord_3d_mode import Coord3DMode
 from .depth_box3d import DepthInstance3DBoxes
 from .lidar_box3d import LiDARInstance3DBoxes
 from .utils import (get_box_type, get_proj_mat_by_coord_type, limit_period,
-                    mono_cam_box2vis, points_cam2img, rotation_3d_in_axis,
+                    mono_cam_box2vis, points_cam2img, points_img2cam,
-                    xywhr2xyxyr)
+                    rotation_3d_in_axis, xywhr2xyxyr)
 __all__ = [
    'Box3DMode', 'BaseInstance3DBoxes', 'LiDARInstance3DBoxes',
    'CameraInstance3DBoxes', 'DepthInstance3DBoxes', 'xywhr2xyxyr',
    'get_box_type', 'rotation_3d_in_axis', 'limit_period', 'points_cam2img',
-    'Coord3DMode', 'mono_cam_box2vis', 'get_proj_mat_by_coord_type'
+    'points_img2cam', 'Coord3DMode', 'mono_cam_box2vis',
+    'get_proj_mat_by_coord_type'
 ]
--- a/mmdet3d/core/bbox/structures/utils.py
+++ b/mmdet3d/core/bbox/structures/utils.py
@@ -213,6 +213,40 @@ def points_cam2img(points_3d, proj_mat, with_depth=False):
    return point_2d_res
+@array_converter(apply_to=('points', 'cam2img'))
+def points_img2cam(points, cam2img):
+    """Project points in image coordinates to camera coordinates.
+    Args:
+        points (torch.Tensor): 2.5D points in 2D images, [N, 3],
+            3 corresponds with x, y in the image and depth.
+        cam2img (torch.Tensor): Camera instrinsic matrix. The shape can be
+            [3, 3], [3, 4] or [4, 4].
+    Returns:
+        torch.Tensor: points in 3D space. [N, 3],
+            3 corresponds with x, y, z in 3D space.
+    """
+    assert cam2img.shape[0] <= 4
+    assert cam2img.shape[1] <= 4
+    assert points.shape[1] == 3
+    xys = points[:, :2]
+    depths = points[:, 2].view(-1, 1)
+    unnormed_xys = torch.cat([xys * depths, depths], dim=1)
+    pad_cam2img = torch.eye(4, dtype=xys.dtype, device=xys.device)
+    pad_cam2img[:cam2img.shape[0], :cam2img.shape[1]] = cam2img
+    inv_pad_cam2img = torch.inverse(pad_cam2img).transpose(0, 1)
+    # Do operation in homogenous coordinates.
+    num_points = unnormed_xys.shape[0]
+    homo_xys = torch.cat([unnormed_xys, xys.new_ones((num_points, 1))], dim=1)
+    points3D = torch.mm(homo_xys, inv_pad_cam2img)[:, :3]
+    return points3D
 def mono_cam_box2vis(cam_box):
    """This is a post-processing function on the bboxes from Mono-3D task. If
    we want to perform projection visualization, we need to:

--- a/mmdet3d/models/dense_heads/fcos_mono3d_head.py
+++ b/mmdet3d/models/dense_heads/fcos_mono3d_head.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import numpy as np
 import torch
+from logging import warning
 from mmcv.cnn import Scale
 from mmcv.runner import force_fp32
 from torch import nn as nn
-from mmdet3d.core import box3d_multiclass_nms, limit_period, xywhr2xyxyr
+from mmdet3d.core import (box3d_multiclass_nms, limit_period, points_img2cam,
+                          xywhr2xyxyr)
 from mmdet.core import multi_apply
 from mmdet.models.builder import HEADS, build_loss
 from .anchor_free_mono3d_head import AnchorFreeMono3DHead
@@ -639,7 +641,7 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
            if rescale:
                bbox_pred[:, :2] /= bbox_pred[:, :2].new_tensor(scale_factor)
            pred_center2d = bbox_pred[:, :3].clone()
-            bbox_pred[:, :3] = self.pts2Dto3D(bbox_pred[:, :3], view)
+            bbox_pred[:, :3] = points_img2cam(bbox_pred[:, :3], view)
            mlvl_centers2d.append(pred_center2d)
            mlvl_bboxes.append(bbox_pred)
            mlvl_scores.append(scores)
@@ -708,6 +710,10 @@ class FCOSMono3DHead(AnchorFreeMono3DHead):
            torch.Tensor: points in 3D space. [N, 3],
                3 corresponds with x, y, z in 3D space.
        """
+        warning.warn('DeprecationWarning: This static method has been moved '
+                     'out of this class to mmdet3d/core. The function '
+                     'pts2Dto3D will be deprecated.')
        assert view.shape[0] <= 4
        assert view.shape[1] <= 4
        assert points.shape[1] == 3

--- a/tests/test_utils/test_utils.py
+++ b/tests/test_utils/test_utils.py
@@ -3,7 +3,7 @@ import numpy as np
 import pytest
 import torch
-from mmdet3d.core import array_converter, draw_heatmap_gaussian
+from mmdet3d.core import array_converter, draw_heatmap_gaussian, points_img2cam
 def test_gaussian():
@@ -178,3 +178,13 @@ def test_array_converter():
    with pytest.raises(TypeError):
        new_array_a, new_array_b = test_func_9(container,
                                               [True, np.array([3.0])])
+def test_points_img2cam():
+    points = torch.tensor([[0.5764, 0.9109, 0.7576], [0.6656, 0.5498, 0.9813]])
+    cam2img = torch.tensor([[700., 0., 450., 0.], [0., 700., 200., 0.],
+                            [0., 0., 1., 0.]])
+    xyzs = points_img2cam(points, cam2img)
+    expected_xyzs = torch.tensor([[-0.4864, -0.2155, 0.7576],
+                                  [-0.6299, -0.2796, 0.9813]])
+    assert torch.allclose(xyzs, expected_xyzs, atol=1e-3)