[Refactor]: Moved projection2image-type viz functions to a new file (#480)

eead419a · Yezhen Cong · GitHub · 43f5f427 · eead419a · eead419a
Unverified Commit eead419a authored Apr 28, 2021 by Yezhen Cong Committed by GitHub Apr 28, 2021
3 changed files
--- a/mmdet3d/core/visualizer/image_vis.py
+++ b/mmdet3d/core/visualizer/image_vis.py
+import copy
+import cv2
+import numpy as np
+import torch
+from matplotlib import pyplot as plt
+
+
+def project_pts_on_img(points,
+                       raw_img,
+                       lidar2img_rt,
+                       max_distance=70,
+                       thickness=-1):
+    """Project the 3D points cloud on 2D image.
+
+    Args:
+        points (numpy.array): 3D points cloud (x, y, z) to visualize.
+        raw_img (numpy.array): The numpy array of image.
+        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
+            according to the camera intrinsic parameters.
+        max_distance (float): the max distance of the points cloud.
+            Default: 70.
+        thickness (int, optional): The thickness of 2D points. Default: -1.
+    """
+    img = raw_img.copy()
+    num_points = points.shape[0]
+    pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
+    pts_2d = pts_4d @ lidar2img_rt.T
+
+    # cam_points is Tensor of Nx4 whose last column is 1
+    # transform camera coordinate to image coordinate
+    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
+    pts_2d[:, 0] /= pts_2d[:, 2]
+    pts_2d[:, 1] /= pts_2d[:, 2]
+
+    fov_inds = ((pts_2d[:, 0] < img.shape[1])
+                & (pts_2d[:, 0] >= 0)
+                & (pts_2d[:, 1] < img.shape[0])
+                & (pts_2d[:, 1] >= 0))
+
+    imgfov_pts_2d = pts_2d[fov_inds, :3]  # u, v, d
+
+    cmap = plt.cm.get_cmap('hsv', 256)
+    cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
+    for i in range(imgfov_pts_2d.shape[0]):
+        depth = imgfov_pts_2d[i, 2]
+        color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
+        cv2.circle(
+            img,
+            center=(int(np.round(imgfov_pts_2d[i, 0])),
+                    int(np.round(imgfov_pts_2d[i, 1]))),
+            radius=1,
+            color=tuple(color),
+            thickness=thickness,
+        )
+    cv2.imshow('project_pts_img', img.astype(np.uint8))
+    cv2.waitKey(100)
+
+
+def project_bbox3d_on_img(bboxes3d,
+                          raw_img,
+                          lidar2img_rt,
+                          color=(0, 255, 0),
+                          thickness=1):
+    """Project the 3D bbox on 2D image.
+
+    Args:
+        bboxes3d (numpy.array, shape=[M, 7]):
+            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize.
+        raw_img (numpy.array): The numpy array of image.
+        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
+            according to the camera intrinsic parameters.
+        color (tuple[int]): the color to draw bboxes. Default: (0, 255, 0).
+        thickness (int, optional): The thickness of bboxes. Default: 1.
+    """
+    img = raw_img.copy()
+    corners_3d = bboxes3d.corners
+    num_bbox = corners_3d.shape[0]
+    pts_4d = np.concatenate(
+        [corners_3d.reshape(-1, 3),
+         np.ones((num_bbox * 8, 1))], axis=-1)
+    pts_2d = pts_4d @ lidar2img_rt.T
+
+    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
+    pts_2d[:, 0] /= pts_2d[:, 2]
+    pts_2d[:, 1] /= pts_2d[:, 2]
+    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
+
+    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
+                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
+    for i in range(num_bbox):
+        corners = imgfov_pts_2d[i].astype(np.int)
+        for start, end in line_indices:
+            cv2.line(img, (corners[start, 0], corners[start, 1]),
+                     (corners[end, 0], corners[end, 1]), color, thickness,
+                     cv2.LINE_AA)
+
+    cv2.imshow('project_bbox3d_img', img.astype(np.uint8))
+    cv2.waitKey(0)
+
+
+def draw_lidar_bbox3d_on_img(bboxes3d,
+                             raw_img,
+                             lidar2img_rt,
+                             img_metas,
+                             color=(0, 255, 0),
+                             thickness=1):
+    """Project the 3D bbox on 2D plane and draw on input image.
+
+    Args:
+        bboxes3d (numpy.array, shape=[M, 7]):
+            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize.
+        raw_img (numpy.array): The numpy array of image.
+        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
+            according to the camera intrinsic parameters.
+        img_metas (dict): Useless here.
+        color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
+        thickness (int, optional): The thickness of bboxes. Default: 1.
+    """
+    img = raw_img.copy()
+    corners_3d = bboxes3d.corners
+    num_bbox = corners_3d.shape[0]
+    pts_4d = np.concatenate(
+        [corners_3d.reshape(-1, 3),
+         np.ones((num_bbox * 8, 1))], axis=-1)
+    lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
+    if isinstance(lidar2img_rt, torch.Tensor):
+        lidar2img_rt = lidar2img_rt.cpu().numpy()
+    pts_2d = pts_4d @ lidar2img_rt.T
+
+    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
+    pts_2d[:, 0] /= pts_2d[:, 2]
+    pts_2d[:, 1] /= pts_2d[:, 2]
+    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
+
+    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
+                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
+    for i in range(num_bbox):
+        corners = imgfov_pts_2d[i].astype(np.int)
+        for start, end in line_indices:
+            cv2.line(img, (corners[start, 0], corners[start, 1]),
+                     (corners[end, 0], corners[end, 1]), color, thickness,
+                     cv2.LINE_AA)
+
+    return img.astype(np.uint8)
+
+
+def draw_depth_bbox3d_on_img(bboxes3d,
+                             raw_img,
+                             calibs,
+                             img_metas,
+                             color=(0, 255, 0),
+                             thickness=1):
+    """Project the 3D bbox on 2D plane and draw on input image.
+
+    Args:
+        bboxes3d (numpy.array, shape=[M, 7]):
+            3d camera bbox (x, y, z, dx, dy, dz, yaw) to visualize.
+        raw_img (numpy.array): The numpy array of image.
+        calibs (dict): Camera calibration information, Rt and K.
+        img_metas (dict): Used in coordinates transformation.
+        color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
+        thickness (int, optional): The thickness of bboxes. Default: 1.
+    """
+    from mmdet3d.core import Coord3DMode
+    from mmdet3d.core.bbox import points_cam2img
+    from mmdet3d.models import apply_3d_transformation
+
+    img = raw_img.copy()
+    calibs = copy.deepcopy(calibs)
+    img_metas = copy.deepcopy(img_metas)
+    corners_3d = bboxes3d.corners
+    num_bbox = corners_3d.shape[0]
+    points_3d = corners_3d.reshape(-1, 3)
+    assert ('Rt' in calibs.keys() and 'K' in calibs.keys()), \
+        'Rt and K matrix should be provided as camera caliberation information'
+    if not isinstance(calibs['Rt'], torch.Tensor):
+        calibs['Rt'] = torch.from_numpy(np.array(calibs['Rt']))
+    if not isinstance(calibs['K'], torch.Tensor):
+        calibs['K'] = torch.from_numpy(np.array(calibs['K']))
+    calibs['Rt'] = calibs['Rt'].reshape(3, 3).float().cpu()
+    calibs['K'] = calibs['K'].reshape(3, 3).float().cpu()
+
+    # first reverse the data transformations
+    xyz_depth = apply_3d_transformation(
+        points_3d, 'DEPTH', img_metas, reverse=True)
+
+    # then convert from depth coords to camera coords
+    xyz_cam = Coord3DMode.convert_point(
+        xyz_depth, Coord3DMode.DEPTH, Coord3DMode.CAM, rt_mat=calibs['Rt'])
+
+    # project to 2d to get image coords (uv)
+    uv_origin = points_cam2img(xyz_cam, calibs['K'])
+    uv_origin = (uv_origin - 1).round()
+    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
+
+    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
+                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
+    for i in range(num_bbox):
+        corners = imgfov_pts_2d[i].astype(np.int)
+        for start, end in line_indices:
+            cv2.line(img, (corners[start, 0], corners[start, 1]),
+                     (corners[end, 0], corners[end, 1]), color, thickness,
+                     cv2.LINE_AA)
+
+    return img.astype(np.uint8)
--- a/mmdet3d/core/visualizer/open3d_vis.py
+++ b/mmdet3d/core/visualizer/open3d_vis.py
 import copy
-import cv2
 import numpy as np
 import torch
-from matplotlib import pyplot as plt

 try:
    import open3d as o3d
@@ -318,206 +316,6 @@ def show_pts_index_boxes(points,
    vis.destroy_window()


-def project_pts_on_img(points,
-                       raw_img,
-                       lidar2img_rt,
-                       max_distance=70,
-                       thickness=-1):
-    """Project the 3D points cloud on 2D image.
-
-    Args:
-        points (numpy.array): 3D points cloud (x, y, z) to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        max_distance (float): the max distance of the points cloud.
-            Default: 70.
-        thickness (int, optional): The thickness of 2D points. Default: -1.
-    """
-    img = raw_img.copy()
-    num_points = points.shape[0]
-    pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
-    pts_2d = pts_4d @ lidar2img_rt.T
-
-    # cam_points is Tensor of Nx4 whose last column is 1
-    # transform camera coordinate to image coordinate
-    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
-    pts_2d[:, 0] /= pts_2d[:, 2]
-    pts_2d[:, 1] /= pts_2d[:, 2]
-
-    fov_inds = ((pts_2d[:, 0] < img.shape[1])
-                & (pts_2d[:, 0] >= 0)
-                & (pts_2d[:, 1] < img.shape[0])
-                & (pts_2d[:, 1] >= 0))
-
-    imgfov_pts_2d = pts_2d[fov_inds, :3]  # u, v, d
-
-    cmap = plt.cm.get_cmap('hsv', 256)
-    cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
-    for i in range(imgfov_pts_2d.shape[0]):
-        depth = imgfov_pts_2d[i, 2]
-        color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
-        cv2.circle(
-            img,
-            center=(int(np.round(imgfov_pts_2d[i, 0])),
-                    int(np.round(imgfov_pts_2d[i, 1]))),
-            radius=1,
-            color=tuple(color),
-            thickness=thickness,
-        )
-    cv2.imshow('project_pts_img', img.astype(np.uint8))
-    cv2.waitKey(100)
-
-
-def project_bbox3d_on_img(bboxes3d,
-                          raw_img,
-                          lidar2img_rt,
-                          color=(0, 255, 0),
-                          thickness=1):
-    """Project the 3D bbox on 2D image.
-
-    Args:
-        bboxes3d (numpy.array, shape=[M, 7]):
-            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        color (tuple[int]): the color to draw bboxes. Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    img = raw_img.copy()
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    pts_4d = np.concatenate(
-        [corners_3d.reshape(-1, 3),
-         np.ones((num_bbox * 8, 1))], axis=-1)
-    pts_2d = pts_4d @ lidar2img_rt.T
-
-    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
-    pts_2d[:, 0] /= pts_2d[:, 2]
-    pts_2d[:, 1] /= pts_2d[:, 2]
-    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
-
-    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
-                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
-    for i in range(num_bbox):
-        corners = imgfov_pts_2d[i].astype(np.int)
-        for start, end in line_indices:
-            cv2.line(img, (corners[start, 0], corners[start, 1]),
-                     (corners[end, 0], corners[end, 1]), color, thickness,
-                     cv2.LINE_AA)
-
-    cv2.imshow('project_bbox3d_img', img.astype(np.uint8))
-    cv2.waitKey(0)
-
-
-def draw_lidar_bbox3d_on_img(bboxes3d,
-                             raw_img,
-                             lidar2img_rt,
-                             img_metas,
-                             color=(0, 255, 0),
-                             thickness=1):
-    """Project the 3D bbox on 2D plane and draw on input image.
-
-    Args:
-        bboxes3d (numpy.array, shape=[M, 7]):
-            3d bbox (x, y, z, dx, dy, dz, yaw) to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        img_metas (dict): Useless here.
-        color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    img = raw_img.copy()
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    pts_4d = np.concatenate(
-        [corners_3d.reshape(-1, 3),
-         np.ones((num_bbox * 8, 1))], axis=-1)
-    lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
-    if isinstance(lidar2img_rt, torch.Tensor):
-        lidar2img_rt = lidar2img_rt.cpu().numpy()
-    pts_2d = pts_4d @ lidar2img_rt.T
-
-    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
-    pts_2d[:, 0] /= pts_2d[:, 2]
-    pts_2d[:, 1] /= pts_2d[:, 2]
-    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
-
-    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
-                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
-    for i in range(num_bbox):
-        corners = imgfov_pts_2d[i].astype(np.int)
-        for start, end in line_indices:
-            cv2.line(img, (corners[start, 0], corners[start, 1]),
-                     (corners[end, 0], corners[end, 1]), color, thickness,
-                     cv2.LINE_AA)
-
-    return img.astype(np.uint8)
-
-
-def draw_depth_bbox3d_on_img(bboxes3d,
-                             raw_img,
-                             calibs,
-                             img_metas,
-                             color=(0, 255, 0),
-                             thickness=1):
-    """Project the 3D bbox on 2D plane and draw on input image.
-
-    Args:
-        bboxes3d (numpy.array, shape=[M, 7]):
-            3d camera bbox (x, y, z, dx, dy, dz, yaw) to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        calibs (dict): Camera calibration information, Rt and K.
-        img_metas (dict): Used in coordinates transformation.
-        color (tuple[int]): The color to draw bboxes. Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    from mmdet3d.core import Coord3DMode
-    from mmdet3d.core.bbox import points_cam2img
-    from mmdet3d.models import apply_3d_transformation
-
-    img = raw_img.copy()
-    calibs = copy.deepcopy(calibs)
-    img_metas = copy.deepcopy(img_metas)
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    points_3d = corners_3d.reshape(-1, 3)
-    assert ('Rt' in calibs.keys() and 'K' in calibs.keys()), \
-        'Rt and K matrix should be provided as camera caliberation information'
-    if not isinstance(calibs['Rt'], torch.Tensor):
-        calibs['Rt'] = torch.from_numpy(np.array(calibs['Rt']))
-    if not isinstance(calibs['K'], torch.Tensor):
-        calibs['K'] = torch.from_numpy(np.array(calibs['K']))
-    calibs['Rt'] = calibs['Rt'].reshape(3, 3).float().cpu()
-    calibs['K'] = calibs['K'].reshape(3, 3).float().cpu()
-
-    # first reverse the data transformations
-    xyz_depth = apply_3d_transformation(
-        points_3d, 'DEPTH', img_metas, reverse=True)
-
-    # then convert from depth coords to camera coords
-    xyz_cam = Coord3DMode.convert_point(
-        xyz_depth, Coord3DMode.DEPTH, Coord3DMode.CAM, rt_mat=calibs['Rt'])
-
-    # project to 2d to get image coords (uv)
-    uv_origin = points_cam2img(xyz_cam, calibs['K'])
-    uv_origin = (uv_origin - 1).round()
-    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
-
-    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
-                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
-    for i in range(num_bbox):
-        corners = imgfov_pts_2d[i].astype(np.int)
-        for start, end in line_indices:
-            cv2.line(img, (corners[start, 0], corners[start, 1]),
-                     (corners[end, 0], corners[end, 1]), color, thickness,
-                     cv2.LINE_AA)
-
-    return img.astype(np.uint8)
-
-
 class Visualizer(object):
    r"""Online visualizer implemented with Open3d.


--- a/mmdet3d/core/visualizer/show_result.py
+++ b/mmdet3d/core/visualizer/show_result.py
@@ -3,6 +3,8 @@ import numpy as np
 import trimesh
 from os import path as osp

+from .image_vis import draw_depth_bbox3d_on_img, draw_lidar_bbox3d_on_img
+

 def _write_obj(points, out_filename):
    """Write points into ``obj`` format for meshlab visualization.
@@ -212,9 +214,9 @@ def show_multi_modality_result(img,
           The tuple of color should be in BGR order. Default: (72, 101, 241)
    """
    if depth_bbox:
-        from .open3d_vis import draw_depth_bbox3d_on_img as draw_bbox
+        draw_bbox = draw_depth_bbox3d_on_img
    else:
-        from .open3d_vis import draw_lidar_bbox3d_on_img as draw_bbox
+        draw_bbox = draw_lidar_bbox3d_on_img

    result_path = osp.join(out_dir, filename)
    mmcv.mkdir_or_exist(result_path)