[Refactor] Visualization Refactor

27a546e9 · ZCMax · ChaimZhu · 0e17beab · 27a546e9 · 27a546e9
Commit 27a546e9 authored Jul 19, 2022 by ZCMax Committed by ChaimZhu Jul 20, 2022
16 changed files
--- a/configs/_base_/default_runtime.py
+++ b/configs/_base_/default_runtime.py
@@ -14,6 +14,11 @@ env_cfg = dict(
    dist_cfg=dict(backend='nccl'),
 )

+vis_backends = [dict(type='LocalVisBackend')]
+visualizer = dict(
+    type='Det3DLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+log_processor = dict(type='LogProcessor', window_size=50, by_epoch=True)
+
 log_level = 'INFO'
 load_from = None
 resume = False

--- a/demo/mono_det_demo.py
+++ b/demo/mono_det_demo.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from argparse import ArgumentParser

-from mmdet3d.apis import (inference_mono_3d_detector, init_model,
-                          show_result_meshlab)
+import mmcv

+from mmdet3d.apis import inference_mono_3d_detector, init_model
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules

-def main():
+
+def parse_args():
    parser = ArgumentParser()
    parser.add_argument('image', help='image file')
    parser.add_argument('ann', help='ann file')
@@ -26,21 +29,42 @@ def main():
        action='store_true',
        help='whether to save online visualization results')
    args = parser.parse_args()
+    return args
+
+
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()

    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)
+
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = {
+        'CLASSES': model.CLASSES,
+        'PALETTE': model.PALETTE
+    }
+
    # test a single image
    result, data = inference_mono_3d_detector(model, args.image, args.ann)
+
+    img = mmcv.imread(args.img)
+    img = mmcv.imconvert(img, 'bgr', 'rgb')
+
+    data_input = dict(img=img)
    # show the results
-    show_result_meshlab(
-        data,
-        result,
-        args.out_dir,
-        args.score_thr,
-        show=args.show,
-        snapshot=args.snapshot,
-        task='mono-det')
+    visualizer.add_datasample(
+        'result',
+        data_input,
+        pred_sample=result,
+        show=True,
+        wait_time=0,
+        out_file=args.out_file,
+        pred_score_thr=args.score_thr,
+        vis_task='multi_modality-det')


 if __name__ == '__main__':
-    main()
+    args = parse_args()
+    main(args)
--- a/demo/multi_modality_demo.py
+++ b/demo/multi_modality_demo.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from argparse import ArgumentParser

-from mmdet3d.apis import (inference_multi_modality_detector, init_model,
-                          show_result_meshlab)
+import mmcv
+import numpy as np

+from mmdet3d.apis import inference_multi_modality_detector, init_model
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules

-def main():
+
+def parse_args():
    parser = ArgumentParser()
    parser.add_argument('pcd', help='Point cloud file')
    parser.add_argument('image', help='image file')
@@ -27,22 +31,44 @@ def main():
        action='store_true',
        help='whether to save online visualization results')
    args = parser.parse_args()
+    return args
+
+
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()

    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)
-    # test a single image
+
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = {
+        'CLASSES': model.CLASSES,
+        'PALETTE': model.PALETTE
+    }
+
+    # test a single image and point cloud sample
    result, data = inference_multi_modality_detector(model, args.pcd,
                                                     args.image, args.ann)
+
+    points = np.fromfile(args.pcd, dtype=np.float32)
+    img = mmcv.imread(args.img)
+    img = mmcv.imconvert(img, 'bgr', 'rgb')
+
+    data_input = dict(points=points, img=img)
    # show the results
-    show_result_meshlab(
-        data,
-        result,
-        args.out_dir,
-        args.score_thr,
-        show=args.show,
-        snapshot=args.snapshot,
-        task='multi_modality-det')
+    visualizer.add_datasample(
+        'result',
+        data_input,
+        pred_sample=result,
+        show=True,
+        wait_time=0,
+        out_file=args.out_file,
+        pred_score_thr=args.score_thr,
+        vis_task='multi_modality-det')


 if __name__ == '__main__':
-    main()
+    args = parse_args()
+    main(args)
--- a/demo/pc_seg_demo.py
+++ b/demo/pc_seg_demo.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from argparse import ArgumentParser

-from mmdet3d.apis import inference_segmentor, init_model, show_result_meshlab
+import numpy as np

+from mmdet3d.apis import inference_segmentor, init_model
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules

-def main():
+
+def parse_args():
    parser = ArgumentParser()
    parser.add_argument('pcd', help='Point cloud file')
    parser.add_argument('config', help='Config file')
@@ -22,21 +26,40 @@ def main():
        action='store_true',
        help='whether to save online visualization results')
    args = parser.parse_args()
+    return args
+
+
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()

    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)
-    # test a single image
+
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = {
+        'CLASSES': model.CLASSES,
+        'PALETTE': model.PALETTE
+    }
+
+    # test a single point cloud sample
    result, data = inference_segmentor(model, args.pcd)
+
+    points = np.fromfile(args.pcd, dtype=np.float32)
+    data_input = dict(points=points)
    # show the results
-    show_result_meshlab(
-        data,
-        result,
-        args.out_dir,
-        show=args.show,
-        snapshot=args.snapshot,
-        task='seg',
-        palette=model.PALETTE)
+    visualizer.add_datasample(
+        'result',
+        data_input,
+        pred_sample=result,
+        show=True,
+        wait_time=0,
+        out_file=args.out_file,
+        pred_score_thr=args.score_thr,
+        vis_task='seg')


 if __name__ == '__main__':
-    main()
+    args = parse_args()
+    main(args)
--- a/demo/pcd_demo.py
+++ b/demo/pcd_demo.py
 # Copyright (c) OpenMMLab. All rights reserved.
 from argparse import ArgumentParser

-from mmdet3d.apis import inference_detector, init_model, show_result_meshlab
+import numpy as np

+from mmdet3d.apis import inference_detector, init_model
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules

-def main():
+
+def parse_args():
    parser = ArgumentParser()
    parser.add_argument('pcd', help='Point cloud file')
    parser.add_argument('config', help='Config file')
@@ -24,21 +28,41 @@ def main():
        action='store_true',
        help='whether to save online visualization results')
    args = parser.parse_args()
+    return args
+
+
+def main(args):
+    # register all modules in mmdet into the registries
+    register_all_modules()

+    # TODO: Support inference of point cloud numpy file.
    # build the model from a config file and a checkpoint file
    model = init_model(args.config, args.checkpoint, device=args.device)
-    # test a single image
+
+    # init visualizer
+    visualizer = VISUALIZERS.build(model.cfg.visualizer)
+    visualizer.dataset_meta = {
+        'CLASSES': model.CLASSES,
+        'PALETTE': model.palette
+    }
+
+    # test a single point cloud sample
    result, data = inference_detector(model, args.pcd)
+
+    points = np.fromfile(args.pcd, dtype=np.float32)
+    data_input = dict(points=points)
    # show the results
-    show_result_meshlab(
-        data,
-        result,
-        args.out_dir,
-        args.score_thr,
-        show=args.show,
-        snapshot=args.snapshot,
-        task='det')
+    visualizer.add_datasample(
+        'result',
+        data_input,
+        pred_sample=result,
+        show=True,
+        wait_time=0,
+        out_file=args.out_file,
+        pred_score_thr=args.score_thr,
+        vis_task='det')


 if __name__ == '__main__':
-    main()
+    args = parse_args()
+    main(args)
--- a/mmdet3d/apis/__init__.py
+++ b/mmdet3d/apis/__init__.py
@@ -2,13 +2,12 @@
 from .inference import (convert_SyncBN, inference_detector,
                        inference_mono_3d_detector,
                        inference_multi_modality_detector, inference_segmentor,
-                        init_model, show_result_meshlab)
+                        init_model)

 __all__ = [
    'inference_detector',
    'init_model',
    'inference_mono_3d_detector',
-    'show_result_meshlab',
    'convert_SyncBN',
    'inference_multi_modality_detector',
    'inference_segmentor',

--- a/mmdet3d/apis/inference.py
+++ b/mmdet3d/apis/inference.py
@@ -9,10 +9,7 @@ import torch
 from mmcv.parallel import collate, scatter
 from mmcv.runner import load_checkpoint

-from mmdet3d.core import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
-                          DepthInstance3DBoxes, LiDARInstance3DBoxes,
-                          show_multi_modality_result, show_result,
-                          show_seg_result)
+from mmdet3d.core import Box3DMode
 from mmdet3d.core.bbox import get_box_type
 from mmdet3d.datasets.pipelines import Compose
 from mmdet3d.models import build_model
@@ -323,204 +320,3 @@ def inference_segmentor(model, pcd):
    with torch.no_grad():
        result = model(return_loss=False, rescale=True, **data)
    return result, data
-
-
-def show_det_result_meshlab(data,
-                            result,
-                            out_dir,
-                            score_thr=0.0,
-                            show=False,
-                            snapshot=False):
-    """Show 3D detection result by meshlab."""
-    points = data['points'][0][0].cpu().numpy()
-    pts_filename = data['img_metas'][0][0]['pts_filename']
-    file_name = osp.split(pts_filename)[-1].split('.')[0]
-
-    if 'pts_bbox' in result[0].keys():
-        pred_bboxes = result[0]['pts_bbox']['boxes_3d'].tensor.numpy()
-        pred_scores = result[0]['pts_bbox']['scores_3d'].numpy()
-    else:
-        pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
-        pred_scores = result[0]['scores_3d'].numpy()
-
-    # filter out low score bboxes for visualization
-    if score_thr > 0:
-        inds = pred_scores > score_thr
-        pred_bboxes = pred_bboxes[inds]
-
-    # for now we convert points into depth mode
-    box_mode = data['img_metas'][0][0]['box_mode_3d']
-    if box_mode != Box3DMode.DEPTH:
-        points = Coord3DMode.convert(points, box_mode, Coord3DMode.DEPTH)
-        show_bboxes = Box3DMode.convert(pred_bboxes, box_mode, Box3DMode.DEPTH)
-    else:
-        show_bboxes = deepcopy(pred_bboxes)
-
-    show_result(
-        points,
-        None,
-        show_bboxes,
-        out_dir,
-        file_name,
-        show=show,
-        snapshot=snapshot)
-
-    return file_name
-
-
-def show_seg_result_meshlab(data,
-                            result,
-                            out_dir,
-                            palette,
-                            show=False,
-                            snapshot=False):
-    """Show 3D segmentation result by meshlab."""
-    points = data['points'][0][0].cpu().numpy()
-    pts_filename = data['img_metas'][0][0]['pts_filename']
-    file_name = osp.split(pts_filename)[-1].split('.')[0]
-
-    pred_seg = result[0]['semantic_mask'].numpy()
-
-    if palette is None:
-        # generate random color map
-        max_idx = pred_seg.max()
-        palette = np.random.randint(0, 256, size=(max_idx + 1, 3))
-    palette = np.array(palette).astype(np.int)
-
-    show_seg_result(
-        points,
-        None,
-        pred_seg,
-        out_dir,
-        file_name,
-        palette=palette,
-        show=show,
-        snapshot=snapshot)
-
-    return file_name
-
-
-def show_proj_det_result_meshlab(data,
-                                 result,
-                                 out_dir,
-                                 score_thr=0.0,
-                                 show=False,
-                                 snapshot=False):
-    """Show result of projecting 3D bbox to 2D image by meshlab."""
-    assert 'img' in data.keys(), 'image data is not provided for visualization'
-
-    img_filename = data['img_metas'][0][0]['filename']
-    file_name = osp.split(img_filename)[-1].split('.')[0]
-
-    # read from file because img in data_dict has undergone pipeline transform
-    img = mmcv.imread(img_filename)
-
-    if 'pts_bbox' in result[0].keys():
-        result[0] = result[0]['pts_bbox']
-    elif 'img_bbox' in result[0].keys():
-        result[0] = result[0]['img_bbox']
-    pred_bboxes = result[0]['boxes_3d'].tensor.numpy()
-    pred_scores = result[0]['scores_3d'].numpy()
-
-    # filter out low score bboxes for visualization
-    if score_thr > 0:
-        inds = pred_scores > score_thr
-        pred_bboxes = pred_bboxes[inds]
-
-    box_mode = data['img_metas'][0][0]['box_mode_3d']
-    if box_mode == Box3DMode.LIDAR:
-        if 'lidar2img' not in data['img_metas'][0][0]:
-            raise NotImplementedError(
-                'LiDAR to image transformation matrix is not provided')
-
-        show_bboxes = LiDARInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
-
-        show_multi_modality_result(
-            img,
-            None,
-            show_bboxes,
-            data['img_metas'][0][0]['lidar2img'],
-            out_dir,
-            file_name,
-            box_mode='lidar',
-            show=show)
-    elif box_mode == Box3DMode.DEPTH:
-        show_bboxes = DepthInstance3DBoxes(pred_bboxes, origin=(0.5, 0.5, 0))
-
-        show_multi_modality_result(
-            img,
-            None,
-            show_bboxes,
-            None,
-            out_dir,
-            file_name,
-            box_mode='depth',
-            img_metas=data['img_metas'][0][0],
-            show=show)
-    elif box_mode == Box3DMode.CAM:
-        if 'cam2img' not in data['img_metas'][0][0]:
-            raise NotImplementedError(
-                'camera intrinsic matrix is not provided')
-
-        show_bboxes = CameraInstance3DBoxes(
-            pred_bboxes, box_dim=pred_bboxes.shape[-1], origin=(0.5, 1.0, 0.5))
-
-        show_multi_modality_result(
-            img,
-            None,
-            show_bboxes,
-            data['img_metas'][0][0]['cam2img'],
-            out_dir,
-            file_name,
-            box_mode='camera',
-            show=show)
-    else:
-        raise NotImplementedError(
-            f'visualization of {box_mode} bbox is not supported')
-
-    return file_name
-
-
-def show_result_meshlab(data,
-                        result,
-                        out_dir,
-                        score_thr=0.0,
-                        show=False,
-                        snapshot=False,
-                        task='det',
-                        palette=None):
-    """Show result by meshlab.
-
-    Args:
-        data (dict): Contain data from pipeline.
-        result (dict): Predicted result from model.
-        out_dir (str): Directory to save visualized result.
-        score_thr (float, optional): Minimum score of bboxes to be shown.
-            Default: 0.0
-        show (bool, optional): Visualize the results online. Defaults to False.
-        snapshot (bool, optional): Whether to save the online results.
-            Defaults to False.
-        task (str, optional): Distinguish which task result to visualize.
-            Currently we support 3D detection, multi-modality detection and
-            3D segmentation. Defaults to 'det'.
-        palette (list[list[int]]] | np.ndarray, optional): The palette
-            of segmentation map. If None is given, random palette will be
-            generated. Defaults to None.
-    """
-    assert task in ['det', 'multi_modality-det', 'seg', 'mono-det'], \
-        f'unsupported visualization task {task}'
-    assert out_dir is not None, 'Expect out_dir, got none.'
-
-    if task in ['det', 'multi_modality-det']:
-        file_name = show_det_result_meshlab(data, result, out_dir, score_thr,
-                                            show, snapshot)
-
-    if task in ['seg']:
-        file_name = show_seg_result_meshlab(data, result, out_dir, palette,
-                                            show, snapshot)
-
-    if task in ['multi_modality-det', 'mono-det']:
-        file_name = show_proj_det_result_meshlab(data, result, out_dir,
-                                                 score_thr, show, snapshot)
-
-    return out_dir, file_name
--- a/mmdet3d/core/__init__.py
+++ b/mmdet3d/core/__init__.py
@@ -6,5 +6,5 @@ from .evaluation import *  # noqa: F401, F403
 from .points import *  # noqa: F401, F403
 from .post_processing import *  # noqa: F401, F403
 from .utils import *  # noqa: F401, F403
-from .visualizer import *  # noqa: F401, F403
+from .visualization import *  # noqa: F401, F403
 from .voxel import *  # noqa: F401, F403
--- a/mmdet3d/core/visualization/__init__.py
+++ b/mmdet3d/core/visualization/__init__.py
+# Copyright (c) OpenMMLab. All rights reserved.
+from .local_visualizer import Det3DLocalVisualizer
+from .vis_utils import (proj_camera_bbox3d_to_img, proj_depth_bbox3d_to_img,
+                        proj_lidar_bbox3d_to_img, write_obj,
+                        write_oriented_bbox)
+
+__all__ = [
+    'Det3DLocalVisualizer', 'proj_depth_bbox3d_to_img',
+    'proj_camera_bbox3d_to_img', 'proj_lidar_bbox3d_to_img',
+    'write_oriented_bbox', 'write_obj'
+]
--- a/mmdet3d/core/visualization/local_visualizer.py
+++ b/mmdet3d/core/visualization/local_visualizer.py
--- a/mmdet3d/core/visualization/vis_utils.py
+++ b/mmdet3d/core/visualization/vis_utils.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import numpy as np
+import torch
+import trimesh
+
+from mmdet3d.core.bbox import Box3DMode, Coord3DMode
+from mmdet3d.core.bbox.structures.cam_box3d import CameraInstance3DBoxes
+from mmdet3d.core.bbox.structures.depth_box3d import DepthInstance3DBoxes
+from mmdet3d.core.bbox.structures.lidar_box3d import LiDARInstance3DBoxes
+
+
+def write_obj(points, out_filename):
+    """Write points into ``obj`` format for meshlab visualization.
+
+    Args:
+        points (np.ndarray): Points in shape (N, dim).
+        out_filename (str): Filename to be saved.
+    """
+    N = points.shape[0]
+    fout = open(out_filename, 'w')
+    for i in range(N):
+        if points.shape[1] == 6:
+            c = points[i, 3:].astype(int)
+            fout.write(
+                'v %f %f %f %d %d %d\n' %
+                (points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))
+
+        else:
+            fout.write('v %f %f %f\n' %
+                       (points[i, 0], points[i, 1], points[i, 2]))
+    fout.close()
+
+
+def write_oriented_bbox(scene_bbox, out_filename):
+    """Export oriented (around Z axis) scene bbox to meshes.
+
+    Args:
+        scene_bbox(list[ndarray] or ndarray): xyz pos of center and
+            3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
+            Y forward, X right, Z upward. heading angle of positive X is 0,
+            heading angle of positive Y is 90 degrees.
+        out_filename(str): Filename.
+    """
+
+    def heading2rotmat(heading_angle):
+        rotmat = np.zeros((3, 3))
+        rotmat[2, 2] = 1
+        cosval = np.cos(heading_angle)
+        sinval = np.sin(heading_angle)
+        rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])
+        return rotmat
+
+    def convert_oriented_box_to_trimesh_fmt(box):
+        ctr = box[:3]
+        lengths = box[3:6]
+        trns = np.eye(4)
+        trns[0:3, 3] = ctr
+        trns[3, 3] = 1.0
+        trns[0:3, 0:3] = heading2rotmat(box[6])
+        box_trimesh_fmt = trimesh.creation.box(lengths, trns)
+        return box_trimesh_fmt
+
+    if len(scene_bbox) == 0:
+        scene_bbox = np.zeros((1, 7))
+    scene = trimesh.scene.Scene()
+    for box in scene_bbox:
+        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
+
+    mesh_list = trimesh.util.concatenate(scene.dump())
+    # save to obj file
+    trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')
+
+    return
+
+
+def to_depth_mode(points, bboxes):
+    """Convert points and bboxes to Depth Coord and Depth Box mode."""
+    if points is not None:
+        points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
+                                           Coord3DMode.DEPTH)
+    if bboxes is not None:
+        bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
+                                   Box3DMode.DEPTH)
+    return points, bboxes
+
+
+# TODO: refactor lidar2img to img_meta
+def proj_lidar_bbox3d_to_img(bboxes_3d: LiDARInstance3DBoxes,
+                             input_meta: dict) -> np.array:
+    """Project the 3D bbox on 2D plane.
+
+    Args:
+        bboxes_3d (:obj:`LiDARInstance3DBoxes`):
+            3d bbox in lidar coordinate system to visualize.
+        lidar2img (numpy.array, shape=[4, 4]): The projection matrix
+            according to the camera intrinsic parameters.
+    """
+    corners_3d = bboxes_3d.corners
+    num_bbox = corners_3d.shape[0]
+    pts_4d = np.concatenate(
+        [corners_3d.reshape(-1, 3),
+         np.ones((num_bbox * 8, 1))], axis=-1)
+    lidar2img = copy.deepcopy(input_meta['lidar2img']).reshape(4, 4)
+    if isinstance(lidar2img, torch.Tensor):
+        lidar2img = lidar2img.cpu().numpy()
+    pts_2d = pts_4d @ lidar2img.T
+
+    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
+    pts_2d[:, 0] /= pts_2d[:, 2]
+    pts_2d[:, 1] /= pts_2d[:, 2]
+    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
+
+    return imgfov_pts_2d
+
+
+# TODO: remove third parameter in all functions here in favour of img_metas
+def proj_depth_bbox3d_to_img(bboxes_3d: DepthInstance3DBoxes,
+                             input_meta: dict) -> np.array:
+    """Project the 3D bbox on 2D plane and draw on input image.
+
+    Args:
+        bboxes_3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
+            3d bbox in depth coordinate system to visualize.
+        input_meta (dict): Used in coordinates transformation.
+    """
+    from mmdet3d.core.bbox import points_cam2img
+    from mmdet3d.models import apply_3d_transformation
+
+    input_meta = copy.deepcopy(input_meta)
+    corners_3d = bboxes_3d.corners
+    num_bbox = corners_3d.shape[0]
+    points_3d = corners_3d.reshape(-1, 3)
+
+    # first reverse the data transformations
+    xyz_depth = apply_3d_transformation(
+        points_3d, 'DEPTH', input_meta, reverse=True)
+
+    # project to 2d to get image coords (uv)
+    uv_origin = points_cam2img(xyz_depth,
+                               xyz_depth.new_tensor(input_meta['depth2img']))
+    uv_origin = (uv_origin - 1).round()
+    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
+
+    return imgfov_pts_2d
+
+
+# project the camera bboxes 3d to image
+def proj_camera_bbox3d_to_img(bboxes_3d: CameraInstance3DBoxes,
+                              input_meta: dict) -> np.array:
+    """Project the 3D bbox on 2D plane and draw on input image.
+
+    Args:
+        bboxes_3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
+            3d bbox in camera coordinate system to visualize.
+        cam2img (np.array)): Camera intrinsic matrix,
+            denoted as `K` in depth bbox coordinate system.
+    """
+    from mmdet3d.core.bbox import points_cam2img
+
+    cam2img = copy.deepcopy(input_meta['cam2img'])
+    corners_3d = bboxes_3d.corners
+    num_bbox = corners_3d.shape[0]
+    points_3d = corners_3d.reshape(-1, 3)
+    if not isinstance(cam2img, torch.Tensor):
+        cam2img = torch.from_numpy(np.array(cam2img))
+
+    assert (cam2img.shape == torch.Size([3, 3])
+            or cam2img.shape == torch.Size([4, 4]))
+    cam2img = cam2img.float().cpu()
+
+    # project to 2d to get image coords (uv)
+    uv_origin = points_cam2img(points_3d, cam2img)
+    uv_origin = (uv_origin - 1).round()
+    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
+
+    return imgfov_pts_2d
--- a/mmdet3d/core/visualizer/__init__.py
+++ b/mmdet3d/core/visualizer/__init__.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from .show_result import (show_multi_modality_result, show_result,
-                          show_seg_result)
-
-__all__ = ['show_result', 'show_seg_result', 'show_multi_modality_result']
--- a/mmdet3d/core/visualizer/image_vis.py
+++ b/mmdet3d/core/visualizer/image_vis.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import copy
-
-import cv2
-import numpy as np
-import torch
-from matplotlib import pyplot as plt
-
-
-def project_pts_on_img(points,
-                       raw_img,
-                       lidar2img_rt,
-                       max_distance=70,
-                       thickness=-1):
-    """Project the 3D points cloud on 2D image.
-
-    Args:
-        points (numpy.array): 3D points cloud (x, y, z) to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        max_distance (float, optional): the max distance of the points cloud.
-            Default: 70.
-        thickness (int, optional): The thickness of 2D points. Default: -1.
-    """
-    img = raw_img.copy()
-    num_points = points.shape[0]
-    pts_4d = np.concatenate([points[:, :3], np.ones((num_points, 1))], axis=-1)
-    pts_2d = pts_4d @ lidar2img_rt.T
-
-    # cam_points is Tensor of Nx4 whose last column is 1
-    # transform camera coordinate to image coordinate
-    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=99999)
-    pts_2d[:, 0] /= pts_2d[:, 2]
-    pts_2d[:, 1] /= pts_2d[:, 2]
-
-    fov_inds = ((pts_2d[:, 0] < img.shape[1])
-                & (pts_2d[:, 0] >= 0)
-                & (pts_2d[:, 1] < img.shape[0])
-                & (pts_2d[:, 1] >= 0))
-
-    imgfov_pts_2d = pts_2d[fov_inds, :3]  # u, v, d
-
-    cmap = plt.cm.get_cmap('hsv', 256)
-    cmap = np.array([cmap(i) for i in range(256)])[:, :3] * 255
-    for i in range(imgfov_pts_2d.shape[0]):
-        depth = imgfov_pts_2d[i, 2]
-        color = cmap[np.clip(int(max_distance * 10 / depth), 0, 255), :]
-        cv2.circle(
-            img,
-            center=(int(np.round(imgfov_pts_2d[i, 0])),
-                    int(np.round(imgfov_pts_2d[i, 1]))),
-            radius=1,
-            color=tuple(color),
-            thickness=thickness,
-        )
-    cv2.imshow('project_pts_img', img.astype(np.uint8))
-    cv2.waitKey(100)
-
-
-def plot_rect3d_on_img(img,
-                       num_rects,
-                       rect_corners,
-                       color=(0, 255, 0),
-                       thickness=1):
-    """Plot the boundary lines of 3D rectangular on 2D images.
-
-    Args:
-        img (numpy.array): The numpy array of image.
-        num_rects (int): Number of 3D rectangulars.
-        rect_corners (numpy.array): Coordinates of the corners of 3D
-            rectangulars. Should be in the shape of [num_rect, 8, 2].
-        color (tuple[int], optional): The color to draw bboxes.
-            Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    line_indices = ((0, 1), (0, 3), (0, 4), (1, 2), (1, 5), (3, 2), (3, 7),
-                    (4, 5), (4, 7), (2, 6), (5, 6), (6, 7))
-    for i in range(num_rects):
-        corners = rect_corners[i].astype(np.int)
-        for start, end in line_indices:
-            cv2.line(img, (corners[start, 0], corners[start, 1]),
-                     (corners[end, 0], corners[end, 1]), color, thickness,
-                     cv2.LINE_AA)
-
-    return img.astype(np.uint8)
-
-
-def draw_lidar_bbox3d_on_img(bboxes3d,
-                             raw_img,
-                             lidar2img_rt,
-                             img_metas,
-                             color=(0, 255, 0),
-                             thickness=1):
-    """Project the 3D bbox on 2D plane and draw on input image.
-
-    Args:
-        bboxes3d (:obj:`LiDARInstance3DBoxes`):
-            3d bbox in lidar coordinate system to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        lidar2img_rt (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        img_metas (dict): Useless here.
-        color (tuple[int], optional): The color to draw bboxes.
-            Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    img = raw_img.copy()
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    pts_4d = np.concatenate(
-        [corners_3d.reshape(-1, 3),
-         np.ones((num_bbox * 8, 1))], axis=-1)
-    lidar2img_rt = copy.deepcopy(lidar2img_rt).reshape(4, 4)
-    if isinstance(lidar2img_rt, torch.Tensor):
-        lidar2img_rt = lidar2img_rt.cpu().numpy()
-    pts_2d = pts_4d @ lidar2img_rt.T
-
-    pts_2d[:, 2] = np.clip(pts_2d[:, 2], a_min=1e-5, a_max=1e5)
-    pts_2d[:, 0] /= pts_2d[:, 2]
-    pts_2d[:, 1] /= pts_2d[:, 2]
-    imgfov_pts_2d = pts_2d[..., :2].reshape(num_bbox, 8, 2)
-
-    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
-
-
-# TODO: remove third parameter in all functions here in favour of img_metas
-def draw_depth_bbox3d_on_img(bboxes3d,
-                             raw_img,
-                             calibs,
-                             img_metas,
-                             color=(0, 255, 0),
-                             thickness=1):
-    """Project the 3D bbox on 2D plane and draw on input image.
-
-    Args:
-        bboxes3d (:obj:`DepthInstance3DBoxes`, shape=[M, 7]):
-            3d bbox in depth coordinate system to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        calibs (dict): Camera calibration information, Rt and K.
-        img_metas (dict): Used in coordinates transformation.
-        color (tuple[int], optional): The color to draw bboxes.
-            Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    from mmdet3d.core.bbox import points_cam2img
-    from mmdet3d.models import apply_3d_transformation
-
-    img = raw_img.copy()
-    img_metas = copy.deepcopy(img_metas)
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    points_3d = corners_3d.reshape(-1, 3)
-
-    # first reverse the data transformations
-    xyz_depth = apply_3d_transformation(
-        points_3d, 'DEPTH', img_metas, reverse=True)
-
-    # project to 2d to get image coords (uv)
-    uv_origin = points_cam2img(xyz_depth,
-                               xyz_depth.new_tensor(img_metas['depth2img']))
-    uv_origin = (uv_origin - 1).round()
-    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
-
-    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
-
-
-def draw_camera_bbox3d_on_img(bboxes3d,
-                              raw_img,
-                              cam2img,
-                              img_metas,
-                              color=(0, 255, 0),
-                              thickness=1):
-    """Project the 3D bbox on 2D plane and draw on input image.
-
-    Args:
-        bboxes3d (:obj:`CameraInstance3DBoxes`, shape=[M, 7]):
-            3d bbox in camera coordinate system to visualize.
-        raw_img (numpy.array): The numpy array of image.
-        cam2img (dict): Camera intrinsic matrix,
-            denoted as `K` in depth bbox coordinate system.
-        img_metas (dict): Useless here.
-        color (tuple[int], optional): The color to draw bboxes.
-            Default: (0, 255, 0).
-        thickness (int, optional): The thickness of bboxes. Default: 1.
-    """
-    from mmdet3d.core.bbox import points_cam2img
-
-    img = raw_img.copy()
-    cam2img = copy.deepcopy(cam2img)
-    corners_3d = bboxes3d.corners
-    num_bbox = corners_3d.shape[0]
-    points_3d = corners_3d.reshape(-1, 3)
-    if not isinstance(cam2img, torch.Tensor):
-        cam2img = torch.from_numpy(np.array(cam2img))
-
-    assert (cam2img.shape == torch.Size([3, 3])
-            or cam2img.shape == torch.Size([4, 4]))
-    cam2img = cam2img.float().cpu()
-
-    # project to 2d to get image coords (uv)
-    uv_origin = points_cam2img(points_3d, cam2img)
-    uv_origin = (uv_origin - 1).round()
-    imgfov_pts_2d = uv_origin[..., :2].reshape(num_bbox, 8, 2).numpy()
-
-    return plot_rect3d_on_img(img, num_bbox, imgfov_pts_2d, color, thickness)
--- a/mmdet3d/core/visualizer/open3d_vis.py
+++ b/mmdet3d/core/visualizer/open3d_vis.py
-# Copyright (c) OpenMMLab. All rights reserved.
-import copy
-
-import numpy as np
-import torch
-
-try:
-    import open3d as o3d
-    from open3d import geometry
-except ImportError:
-    raise ImportError(
-        'Please run "pip install open3d" to install open3d first.')
-
-
-def _draw_points(points,
-                 vis,
-                 points_size=2,
-                 point_color=(0.5, 0.5, 0.5),
-                 mode='xyz'):
-    """Draw points on visualizer.
-
-    Args:
-        points (numpy.array | torch.tensor, shape=[N, 3+C]):
-            points to visualize.
-        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
-        points_size (int, optional): the size of points to show on visualizer.
-            Default: 2.
-        point_color (tuple[float], optional): the color of points.
-            Default: (0.5, 0.5, 0.5).
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-
-    Returns:
-        tuple: points, color of each point.
-    """
-    vis.get_render_option().point_size = points_size  # set points size
-    if isinstance(points, torch.Tensor):
-        points = points.cpu().numpy()
-
-    points = points.copy()
-    pcd = geometry.PointCloud()
-    if mode == 'xyz':
-        pcd.points = o3d.utility.Vector3dVector(points[:, :3])
-        points_colors = np.tile(np.array(point_color), (points.shape[0], 1))
-    elif mode == 'xyzrgb':
-        pcd.points = o3d.utility.Vector3dVector(points[:, :3])
-        points_colors = points[:, 3:6]
-        # normalize to [0, 1] for open3d drawing
-        if not ((points_colors >= 0.0) & (points_colors <= 1.0)).all():
-            points_colors /= 255.0
-    else:
-        raise NotImplementedError
-
-    pcd.colors = o3d.utility.Vector3dVector(points_colors)
-    vis.add_geometry(pcd)
-
-    return pcd, points_colors
-
-
-def _draw_bboxes(bbox3d,
-                 vis,
-                 points_colors,
-                 pcd=None,
-                 bbox_color=(0, 1, 0),
-                 points_in_box_color=(1, 0, 0),
-                 rot_axis=2,
-                 center_mode='lidar_bottom',
-                 mode='xyz'):
-    """Draw bbox on visualizer and change the color of points inside bbox3d.
-
-    Args:
-        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
-            3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
-        points_colors (numpy.array): color of each points.
-        pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
-            Default: None.
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points inside bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is
-            bottom center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    if isinstance(bbox3d, torch.Tensor):
-        bbox3d = bbox3d.cpu().numpy()
-    bbox3d = bbox3d.copy()
-
-    in_box_color = np.array(points_in_box_color)
-    for i in range(len(bbox3d)):
-        center = bbox3d[i, 0:3]
-        dim = bbox3d[i, 3:6]
-        yaw = np.zeros(3)
-        yaw[rot_axis] = bbox3d[i, 6]
-        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
-
-        if center_mode == 'lidar_bottom':
-            center[rot_axis] += dim[
-                rot_axis] / 2  # bottom center to gravity center
-        elif center_mode == 'camera_bottom':
-            center[rot_axis] -= dim[
-                rot_axis] / 2  # bottom center to gravity center
-        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
-
-        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
-        line_set.paint_uniform_color(bbox_color)
-        # draw bboxes on visualizer
-        vis.add_geometry(line_set)
-
-        # change the color of points which are in box
-        if pcd is not None and mode == 'xyz':
-            indices = box3d.get_point_indices_within_bounding_box(pcd.points)
-            points_colors[indices] = in_box_color
-
-    # update points colors
-    if pcd is not None:
-        pcd.colors = o3d.utility.Vector3dVector(points_colors)
-        vis.update_geometry(pcd)
-
-
-def show_pts_boxes(points,
-                   bbox3d=None,
-                   show=True,
-                   save_path=None,
-                   points_size=2,
-                   point_color=(0.5, 0.5, 0.5),
-                   bbox_color=(0, 1, 0),
-                   points_in_box_color=(1, 0, 0),
-                   rot_axis=2,
-                   center_mode='lidar_bottom',
-                   mode='xyz'):
-    """Draw bbox and points on visualizer.
-
-    Args:
-        points (numpy.array | torch.tensor, shape=[N, 3+C]):
-            points to visualize.
-        bbox3d (numpy.array | torch.tensor, shape=[M, 7], optional):
-            3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-            Defaults to None.
-        show (bool, optional): whether to show the visualization results.
-            Default: True.
-        save_path (str, optional): path to save visualized results.
-            Default: None.
-        points_size (int, optional): the size of points to show on visualizer.
-            Default: 2.
-        point_color (tuple[float], optional): the color of points.
-            Default: (0.5, 0.5, 0.5).
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points which are in bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is bottom
-            center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points, available
-            mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    # TODO: support score and class info
-    assert 0 <= rot_axis <= 2
-
-    # init visualizer
-    vis = o3d.visualization.Visualizer()
-    vis.create_window()
-    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
-        size=1, origin=[0, 0, 0])  # create coordinate frame
-    vis.add_geometry(mesh_frame)
-
-    # draw points
-    pcd, points_colors = _draw_points(points, vis, points_size, point_color,
-                                      mode)
-
-    # draw boxes
-    if bbox3d is not None:
-        _draw_bboxes(bbox3d, vis, points_colors, pcd, bbox_color,
-                     points_in_box_color, rot_axis, center_mode, mode)
-
-    if show:
-        vis.run()
-
-    if save_path is not None:
-        vis.capture_screen_image(save_path)
-
-    vis.destroy_window()
-
-
-def _draw_bboxes_ind(bbox3d,
-                     vis,
-                     indices,
-                     points_colors,
-                     pcd=None,
-                     bbox_color=(0, 1, 0),
-                     points_in_box_color=(1, 0, 0),
-                     rot_axis=2,
-                     center_mode='lidar_bottom',
-                     mode='xyz'):
-    """Draw bbox on visualizer and change the color or points inside bbox3d
-    with indices.
-
-    Args:
-        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
-            3d bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-        vis (:obj:`open3d.visualization.Visualizer`): open3d visualizer.
-        indices (numpy.array | torch.tensor, shape=[N, M]):
-            indicate which bbox3d that each point lies in.
-        points_colors (numpy.array): color of each points.
-        pcd (:obj:`open3d.geometry.PointCloud`, optional): point cloud.
-            Default: None.
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points which are in bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is
-            bottom center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    if isinstance(bbox3d, torch.Tensor):
-        bbox3d = bbox3d.cpu().numpy()
-    if isinstance(indices, torch.Tensor):
-        indices = indices.cpu().numpy()
-    bbox3d = bbox3d.copy()
-
-    in_box_color = np.array(points_in_box_color)
-    for i in range(len(bbox3d)):
-        center = bbox3d[i, 0:3]
-        dim = bbox3d[i, 3:6]
-        yaw = np.zeros(3)
-        # TODO: fix problem of current coordinate system
-        # dim[0], dim[1] = dim[1], dim[0]  # for current coordinate
-        # yaw[rot_axis] = -(bbox3d[i, 6] - 0.5 * np.pi)
-        yaw[rot_axis] = -bbox3d[i, 6]
-        rot_mat = geometry.get_rotation_matrix_from_xyz(yaw)
-        if center_mode == 'lidar_bottom':
-            center[rot_axis] += dim[
-                rot_axis] / 2  # bottom center to gravity center
-        elif center_mode == 'camera_bottom':
-            center[rot_axis] -= dim[
-                rot_axis] / 2  # bottom center to gravity center
-        box3d = geometry.OrientedBoundingBox(center, rot_mat, dim)
-
-        line_set = geometry.LineSet.create_from_oriented_bounding_box(box3d)
-        line_set.paint_uniform_color(bbox_color)
-        # draw bboxes on visualizer
-        vis.add_geometry(line_set)
-
-        # change the color of points which are in box
-        if pcd is not None and mode == 'xyz':
-            points_colors[indices[:, i].astype(np.bool)] = in_box_color
-
-    # update points colors
-    if pcd is not None:
-        pcd.colors = o3d.utility.Vector3dVector(points_colors)
-        vis.update_geometry(pcd)
-
-
-def show_pts_index_boxes(points,
-                         bbox3d=None,
-                         show=True,
-                         indices=None,
-                         save_path=None,
-                         points_size=2,
-                         point_color=(0.5, 0.5, 0.5),
-                         bbox_color=(0, 1, 0),
-                         points_in_box_color=(1, 0, 0),
-                         rot_axis=2,
-                         center_mode='lidar_bottom',
-                         mode='xyz'):
-    """Draw bbox and points on visualizer with indices that indicate which
-    bbox3d that each point lies in.
-
-    Args:
-        points (numpy.array | torch.tensor, shape=[N, 3+C]):
-            points to visualize.
-        bbox3d (numpy.array | torch.tensor, shape=[M, 7]):
-            3D bbox (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-            Defaults to None.
-        show (bool, optional): whether to show the visualization results.
-            Default: True.
-        indices (numpy.array | torch.tensor, shape=[N, M], optional):
-            indicate which bbox3d that each point lies in. Default: None.
-        save_path (str, optional): path to save visualized results.
-            Default: None.
-        points_size (int, optional): the size of points to show on visualizer.
-            Default: 2.
-        point_color (tuple[float], optional): the color of points.
-            Default: (0.5, 0.5, 0.5).
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points which are in bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is
-            bottom center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-    # TODO: support score and class info
-    assert 0 <= rot_axis <= 2
-
-    # init visualizer
-    vis = o3d.visualization.Visualizer()
-    vis.create_window()
-    mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
-        size=1, origin=[0, 0, 0])  # create coordinate frame
-    vis.add_geometry(mesh_frame)
-
-    # draw points
-    pcd, points_colors = _draw_points(points, vis, points_size, point_color,
-                                      mode)
-
-    # draw boxes
-    if bbox3d is not None:
-        _draw_bboxes_ind(bbox3d, vis, indices, points_colors, pcd, bbox_color,
-                         points_in_box_color, rot_axis, center_mode, mode)
-
-    if show:
-        vis.run()
-
-    if save_path is not None:
-        vis.capture_screen_image(save_path)
-
-    vis.destroy_window()
-
-
-class Visualizer(object):
-    r"""Online visualizer implemented with Open3d.
-
-    Args:
-        points (numpy.array, shape=[N, 3+C]): Points to visualize. The Points
-            cloud is in mode of Coord3DMode.DEPTH (please refer to
-            core.structures.coord_3d_mode).
-        bbox3d (numpy.array, shape=[M, 7], optional): 3D bbox
-            (x, y, z, x_size, y_size, z_size, yaw) to visualize.
-            The 3D bbox is in mode of Box3DMode.DEPTH with
-            gravity_center (please refer to core.structures.box_3d_mode).
-            Default: None.
-        save_path (str, optional): path to save visualized results.
-            Default: None.
-        points_size (int, optional): the size of points to show on visualizer.
-            Default: 2.
-        point_color (tuple[float], optional): the color of points.
-            Default: (0.5, 0.5, 0.5).
-        bbox_color (tuple[float], optional): the color of bbox.
-            Default: (0, 1, 0).
-        points_in_box_color (tuple[float], optional):
-            the color of points which are in bbox3d. Default: (1, 0, 0).
-        rot_axis (int, optional): rotation axis of bbox. Default: 2.
-        center_mode (bool, optional): indicate the center of bbox is
-            bottom center or gravity center. available mode
-            ['lidar_bottom', 'camera_bottom']. Default: 'lidar_bottom'.
-        mode (str, optional):  indicate type of the input points,
-            available mode ['xyz', 'xyzrgb']. Default: 'xyz'.
-    """
-
-    def __init__(self,
-                 points,
-                 bbox3d=None,
-                 save_path=None,
-                 points_size=2,
-                 point_color=(0.5, 0.5, 0.5),
-                 bbox_color=(0, 1, 0),
-                 points_in_box_color=(1, 0, 0),
-                 rot_axis=2,
-                 center_mode='lidar_bottom',
-                 mode='xyz'):
-        super(Visualizer, self).__init__()
-        assert 0 <= rot_axis <= 2
-
-        # init visualizer
-        self.o3d_visualizer = o3d.visualization.Visualizer()
-        self.o3d_visualizer.create_window()
-        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
-            size=1, origin=[0, 0, 0])  # create coordinate frame
-        self.o3d_visualizer.add_geometry(mesh_frame)
-
-        self.points_size = points_size
-        self.point_color = point_color
-        self.bbox_color = bbox_color
-        self.points_in_box_color = points_in_box_color
-        self.rot_axis = rot_axis
-        self.center_mode = center_mode
-        self.mode = mode
-        self.seg_num = 0
-
-        # draw points
-        if points is not None:
-            self.pcd, self.points_colors = _draw_points(
-                points, self.o3d_visualizer, points_size, point_color, mode)
-
-        # draw boxes
-        if bbox3d is not None:
-            _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors,
-                         self.pcd, bbox_color, points_in_box_color, rot_axis,
-                         center_mode, mode)
-
-    def add_bboxes(self, bbox3d, bbox_color=None, points_in_box_color=None):
-        """Add bounding box to visualizer.
-
-        Args:
-            bbox3d (numpy.array, shape=[M, 7]):
-                3D bbox (x, y, z, x_size, y_size, z_size, yaw)
-                to be visualized. The 3d bbox is in mode of
-                Box3DMode.DEPTH with gravity_center (please refer to
-                core.structures.box_3d_mode).
-            bbox_color (tuple[float]): the color of bbox. Default: None.
-            points_in_box_color (tuple[float]): the color of points which
-                are in bbox3d. Default: None.
-        """
-        if bbox_color is None:
-            bbox_color = self.bbox_color
-        if points_in_box_color is None:
-            points_in_box_color = self.points_in_box_color
-        _draw_bboxes(bbox3d, self.o3d_visualizer, self.points_colors, self.pcd,
-                     bbox_color, points_in_box_color, self.rot_axis,
-                     self.center_mode, self.mode)
-
-    def add_seg_mask(self, seg_mask_colors):
-        """Add segmentation mask to visualizer via per-point colorization.
-
-        Args:
-            seg_mask_colors (numpy.array, shape=[N, 6]):
-                The segmentation mask whose first 3 dims are point coordinates
-                and last 3 dims are converted colors.
-        """
-        # we can't draw the colors on existing points
-        # in case gt and pred mask would overlap
-        # instead we set a large offset along x-axis for each seg mask
-        self.seg_num += 1
-        offset = (np.array(self.pcd.points).max(0) -
-                  np.array(self.pcd.points).min(0))[0] * 1.2 * self.seg_num
-        mesh_frame = geometry.TriangleMesh.create_coordinate_frame(
-            size=1, origin=[offset, 0, 0])  # create coordinate frame for seg
-        self.o3d_visualizer.add_geometry(mesh_frame)
-        seg_points = copy.deepcopy(seg_mask_colors)
-        seg_points[:, 0] += offset
-        _draw_points(
-            seg_points, self.o3d_visualizer, self.points_size, mode='xyzrgb')
-
-    def show(self, save_path=None):
-        """Visualize the points cloud.
-
-        Args:
-            save_path (str, optional): path to save image. Default: None.
-        """
-
-        self.o3d_visualizer.run()
-
-        if save_path is not None:
-            self.o3d_visualizer.capture_screen_image(save_path)
-
-        self.o3d_visualizer.destroy_window()
-        return
--- a/mmdet3d/core/visualizer/show_result.py
+++ b/mmdet3d/core/visualizer/show_result.py
-# Copyright (c) OpenMMLab. All rights reserved.
-from os import path as osp
-
-import mmcv
-import numpy as np
-import trimesh
-
-from .image_vis import (draw_camera_bbox3d_on_img, draw_depth_bbox3d_on_img,
-                        draw_lidar_bbox3d_on_img)
-
-
-def _write_obj(points, out_filename):
-    """Write points into ``obj`` format for meshlab visualization.
-
-    Args:
-        points (np.ndarray): Points in shape (N, dim).
-        out_filename (str): Filename to be saved.
-    """
-    N = points.shape[0]
-    fout = open(out_filename, 'w')
-    for i in range(N):
-        if points.shape[1] == 6:
-            c = points[i, 3:].astype(int)
-            fout.write(
-                'v %f %f %f %d %d %d\n' %
-                (points[i, 0], points[i, 1], points[i, 2], c[0], c[1], c[2]))
-
-        else:
-            fout.write('v %f %f %f\n' %
-                       (points[i, 0], points[i, 1], points[i, 2]))
-    fout.close()
-
-
-def _write_oriented_bbox(scene_bbox, out_filename):
-    """Export oriented (around Z axis) scene bbox to meshes.
-
-    Args:
-        scene_bbox(list[ndarray] or ndarray): xyz pos of center and
-            3 lengths (x_size, y_size, z_size) and heading angle around Z axis.
-            Y forward, X right, Z upward. heading angle of positive X is 0,
-            heading angle of positive Y is 90 degrees.
-        out_filename(str): Filename.
-    """
-
-    def heading2rotmat(heading_angle):
-        rotmat = np.zeros((3, 3))
-        rotmat[2, 2] = 1
-        cosval = np.cos(heading_angle)
-        sinval = np.sin(heading_angle)
-        rotmat[0:2, 0:2] = np.array([[cosval, -sinval], [sinval, cosval]])
-        return rotmat
-
-    def convert_oriented_box_to_trimesh_fmt(box):
-        ctr = box[:3]
-        lengths = box[3:6]
-        trns = np.eye(4)
-        trns[0:3, 3] = ctr
-        trns[3, 3] = 1.0
-        trns[0:3, 0:3] = heading2rotmat(box[6])
-        box_trimesh_fmt = trimesh.creation.box(lengths, trns)
-        return box_trimesh_fmt
-
-    if len(scene_bbox) == 0:
-        scene_bbox = np.zeros((1, 7))
-    scene = trimesh.scene.Scene()
-    for box in scene_bbox:
-        scene.add_geometry(convert_oriented_box_to_trimesh_fmt(box))
-
-    mesh_list = trimesh.util.concatenate(scene.dump())
-    # save to obj file
-    trimesh.io.export.export_mesh(mesh_list, out_filename, file_type='obj')
-
-    return
-
-
-def show_result(points,
-                gt_bboxes,
-                pred_bboxes,
-                out_dir,
-                filename,
-                show=False,
-                snapshot=False,
-                pred_labels=None):
-    """Convert results into format that is directly readable for meshlab.
-
-    Args:
-        points (np.ndarray): Points.
-        gt_bboxes (np.ndarray): Ground truth boxes.
-        pred_bboxes (np.ndarray): Predicted boxes.
-        out_dir (str): Path of output directory
-        filename (str): Filename of the current frame.
-        show (bool, optional): Visualize the results online. Defaults to False.
-        snapshot (bool, optional): Whether to save the online results.
-            Defaults to False.
-        pred_labels (np.ndarray, optional): Predicted labels of boxes.
-            Defaults to None.
-    """
-    result_path = osp.join(out_dir, filename)
-    mmcv.mkdir_or_exist(result_path)
-
-    if show:
-        from .open3d_vis import Visualizer
-
-        vis = Visualizer(points)
-        if pred_bboxes is not None:
-            if pred_labels is None:
-                vis.add_bboxes(bbox3d=pred_bboxes)
-            else:
-                palette = np.random.randint(
-                    0, 255, size=(pred_labels.max() + 1, 3)) / 256
-                labelDict = {}
-                for j in range(len(pred_labels)):
-                    i = int(pred_labels[j].numpy())
-                    if labelDict.get(i) is None:
-                        labelDict[i] = []
-                    labelDict[i].append(pred_bboxes[j])
-                for i in labelDict:
-                    vis.add_bboxes(
-                        bbox3d=np.array(labelDict[i]),
-                        bbox_color=palette[i],
-                        points_in_box_color=palette[i])
-
-        if gt_bboxes is not None:
-            vis.add_bboxes(bbox3d=gt_bboxes, bbox_color=(0, 0, 1))
-        show_path = osp.join(result_path,
-                             f'{filename}_online.png') if snapshot else None
-        vis.show(show_path)
-
-    if points is not None:
-        _write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
-
-    if gt_bboxes is not None:
-        # bottom center to gravity center
-        gt_bboxes[..., 2] += gt_bboxes[..., 5] / 2
-
-        _write_oriented_bbox(gt_bboxes,
-                             osp.join(result_path, f'{filename}_gt.obj'))
-
-    if pred_bboxes is not None:
-        # bottom center to gravity center
-        pred_bboxes[..., 2] += pred_bboxes[..., 5] / 2
-
-        _write_oriented_bbox(pred_bboxes,
-                             osp.join(result_path, f'{filename}_pred.obj'))
-
-
-def show_seg_result(points,
-                    gt_seg,
-                    pred_seg,
-                    out_dir,
-                    filename,
-                    palette,
-                    ignore_index=None,
-                    show=False,
-                    snapshot=False):
-    """Convert results into format that is directly readable for meshlab.
-
-    Args:
-        points (np.ndarray): Points.
-        gt_seg (np.ndarray): Ground truth segmentation mask.
-        pred_seg (np.ndarray): Predicted segmentation mask.
-        out_dir (str): Path of output directory
-        filename (str): Filename of the current frame.
-        palette (np.ndarray): Mapping between class labels and colors.
-        ignore_index (int, optional): The label index to be ignored, e.g.
-            unannotated points. Defaults to None.
-        show (bool, optional): Visualize the results online. Defaults to False.
-        snapshot (bool, optional): Whether to save the online results.
-            Defaults to False.
-    """
-    # we need 3D coordinates to visualize segmentation mask
-    if gt_seg is not None or pred_seg is not None:
-        assert points is not None, \
-            '3D coordinates are required for segmentation visualization'
-
-    # filter out ignored points
-    if gt_seg is not None and ignore_index is not None:
-        if points is not None:
-            points = points[gt_seg != ignore_index]
-        if pred_seg is not None:
-            pred_seg = pred_seg[gt_seg != ignore_index]
-        gt_seg = gt_seg[gt_seg != ignore_index]
-
-    if gt_seg is not None:
-        gt_seg_color = palette[gt_seg]
-        gt_seg_color = np.concatenate([points[:, :3], gt_seg_color], axis=1)
-    if pred_seg is not None:
-        pred_seg_color = palette[pred_seg]
-        pred_seg_color = np.concatenate([points[:, :3], pred_seg_color],
-                                        axis=1)
-
-    result_path = osp.join(out_dir, filename)
-    mmcv.mkdir_or_exist(result_path)
-
-    # online visualization of segmentation mask
-    # we show three masks in a row, scene_points, gt_mask, pred_mask
-    if show:
-        from .open3d_vis import Visualizer
-        mode = 'xyzrgb' if points.shape[1] == 6 else 'xyz'
-        vis = Visualizer(points, mode=mode)
-        if gt_seg is not None:
-            vis.add_seg_mask(gt_seg_color)
-        if pred_seg is not None:
-            vis.add_seg_mask(pred_seg_color)
-        show_path = osp.join(result_path,
-                             f'{filename}_online.png') if snapshot else None
-        vis.show(show_path)
-
-    if points is not None:
-        _write_obj(points, osp.join(result_path, f'{filename}_points.obj'))
-
-    if gt_seg is not None:
-        _write_obj(gt_seg_color, osp.join(result_path, f'{filename}_gt.obj'))
-
-    if pred_seg is not None:
-        _write_obj(pred_seg_color, osp.join(result_path,
-                                            f'{filename}_pred.obj'))
-
-
-def show_multi_modality_result(img,
-                               gt_bboxes,
-                               pred_bboxes,
-                               proj_mat,
-                               out_dir,
-                               filename,
-                               box_mode='lidar',
-                               img_metas=None,
-                               show=False,
-                               gt_bbox_color=(61, 102, 255),
-                               pred_bbox_color=(241, 101, 72)):
-    """Convert multi-modality detection results into 2D results.
-
-    Project the predicted 3D bbox to 2D image plane and visualize them.
-
-    Args:
-        img (np.ndarray): The numpy array of image in cv2 fashion.
-        gt_bboxes (:obj:`BaseInstance3DBoxes`): Ground truth boxes.
-        pred_bboxes (:obj:`BaseInstance3DBoxes`): Predicted boxes.
-        proj_mat (numpy.array, shape=[4, 4]): The projection matrix
-            according to the camera intrinsic parameters.
-        out_dir (str): Path of output directory.
-        filename (str): Filename of the current frame.
-        box_mode (str, optional): Coordinate system the boxes are in.
-            Should be one of 'depth', 'lidar' and 'camera'.
-            Defaults to 'lidar'.
-        img_metas (dict, optional): Used in projecting depth bbox.
-            Defaults to None.
-        show (bool, optional): Visualize the results online. Defaults to False.
-        gt_bbox_color (str or tuple(int), optional): Color of bbox lines.
-           The tuple of color should be in BGR order. Default: (255, 102, 61).
-        pred_bbox_color (str or tuple(int), optional): Color of bbox lines.
-           The tuple of color should be in BGR order. Default: (72, 101, 241).
-    """
-    if box_mode == 'depth':
-        draw_bbox = draw_depth_bbox3d_on_img
-    elif box_mode == 'lidar':
-        draw_bbox = draw_lidar_bbox3d_on_img
-    elif box_mode == 'camera':
-        draw_bbox = draw_camera_bbox3d_on_img
-    else:
-        raise NotImplementedError(f'unsupported box mode {box_mode}')
-
-    result_path = osp.join(out_dir, filename)
-    mmcv.mkdir_or_exist(result_path)
-
-    if show:
-        show_img = img.copy()
-        if gt_bboxes is not None:
-            show_img = draw_bbox(
-                gt_bboxes, show_img, proj_mat, img_metas, color=gt_bbox_color)
-        if pred_bboxes is not None:
-            show_img = draw_bbox(
-                pred_bboxes,
-                show_img,
-                proj_mat,
-                img_metas,
-                color=pred_bbox_color)
-        mmcv.imshow(show_img, win_name='project_bbox3d_img', wait_time=0)
-
-    if img is not None:
-        mmcv.imwrite(img, osp.join(result_path, f'{filename}_img.png'))
-
-    if gt_bboxes is not None:
-        gt_img = draw_bbox(
-            gt_bboxes, img, proj_mat, img_metas, color=gt_bbox_color)
-        mmcv.imwrite(gt_img, osp.join(result_path, f'{filename}_gt.png'))
-
-    if pred_bboxes is not None:
-        pred_img = draw_bbox(
-            pred_bboxes, img, proj_mat, img_metas, color=pred_bbox_color)
-        mmcv.imwrite(pred_img, osp.join(result_path, f'{filename}_pred.png'))
--- a/tools/misc/browse_dataset.py
+++ b/tools/misc/browse_dataset.py
 # Copyright (c) OpenMMLab. All rights reserved.
 import argparse
-import warnings
 from os import path as osp
-from pathlib import Path

 import mmcv
-import numpy as np
 from mmcv import Config, DictAction, mkdir_or_exist

-from mmdet3d.core.bbox import (Box3DMode, CameraInstance3DBoxes, Coord3DMode,
-                               DepthInstance3DBoxes, LiDARInstance3DBoxes)
-from mmdet3d.core.visualizer import (show_multi_modality_result, show_result,
-                                     show_seg_result)
 from mmdet3d.datasets import build_dataset
+from mmdet3d.registry import VISUALIZERS
+from mmdet3d.utils import register_all_modules


 def parse_args():
@@ -80,8 +75,8 @@ def build_data_cfg(config_path, skip_type, aug, cfg_options):
            if cfg.train_pipeline[i]['type'] == 'LoadAnnotations3D':
                show_pipeline.insert(i, cfg.train_pipeline[i])
            # Collect points as well as labels
-            if cfg.train_pipeline[i]['type'] == 'Collect3D':
-                if show_pipeline[-1]['type'] == 'Collect3D':
+            if cfg.train_pipeline[i]['type'] == 'Pack3DInputs':
+                if show_pipeline[-1]['type'] == 'Pack3DInputs':
                    show_pipeline[-1] = cfg.train_pipeline[i]
                else:
                    show_pipeline.append(cfg.train_pipeline[i])
@@ -93,105 +88,6 @@ def build_data_cfg(config_path, skip_type, aug, cfg_options):
    return cfg


-def to_depth_mode(points, bboxes):
-    """Convert points and bboxes to Depth Coord and Depth Box mode."""
-    if points is not None:
-        points = Coord3DMode.convert_point(points.copy(), Coord3DMode.LIDAR,
-                                           Coord3DMode.DEPTH)
-    if bboxes is not None:
-        bboxes = Box3DMode.convert(bboxes.clone(), Box3DMode.LIDAR,
-                                   Box3DMode.DEPTH)
-    return points, bboxes
-
-
-def show_det_data(input, out_dir, show=False):
-    """Visualize 3D point cloud and 3D bboxes."""
-    img_metas = input['img_metas']._data
-    points = input['points']._data.numpy()
-    gt_bboxes = input['gt_bboxes_3d']._data.tensor
-    if img_metas['box_mode_3d'] != Box3DMode.DEPTH:
-        points, gt_bboxes = to_depth_mode(points, gt_bboxes)
-    filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
-    show_result(
-        points,
-        gt_bboxes.clone(),
-        None,
-        out_dir,
-        filename,
-        show=show,
-        snapshot=True)
-
-
-def show_seg_data(input, out_dir, show=False):
-    """Visualize 3D point cloud and segmentation mask."""
-    img_metas = input['img_metas']._data
-    points = input['points']._data.numpy()
-    gt_seg = input['pts_semantic_mask']._data.numpy()
-    filename = osp.splitext(osp.basename(img_metas['pts_filename']))[0]
-    show_seg_result(
-        points,
-        gt_seg.copy(),
-        None,
-        out_dir,
-        filename,
-        np.array(img_metas['PALETTE']),
-        img_metas['ignore_index'],
-        show=show,
-        snapshot=True)
-
-
-def show_proj_bbox_img(input, out_dir, show=False, is_nus_mono=False):
-    """Visualize 3D bboxes on 2D image by projection."""
-    gt_bboxes = input['gt_bboxes_3d']._data
-    img_metas = input['img_metas']._data
-    img = input['img']._data.numpy()
-    # need to transpose channel to first dim
-    img = img.transpose(1, 2, 0)
-    # no 3D gt bboxes, just show img
-    if gt_bboxes.tensor.shape[0] == 0:
-        gt_bboxes = None
-    filename = Path(img_metas['filename']).name
-    if isinstance(gt_bboxes, DepthInstance3DBoxes):
-        show_multi_modality_result(
-            img,
-            gt_bboxes,
-            None,
-            None,
-            out_dir,
-            filename,
-            box_mode='depth',
-            img_metas=img_metas,
-            show=show)
-    elif isinstance(gt_bboxes, LiDARInstance3DBoxes):
-        show_multi_modality_result(
-            img,
-            gt_bboxes,
-            None,
-            img_metas['lidar2img'],
-            out_dir,
-            filename,
-            box_mode='lidar',
-            img_metas=img_metas,
-            show=show)
-    elif isinstance(gt_bboxes, CameraInstance3DBoxes):
-        show_multi_modality_result(
-            img,
-            gt_bboxes,
-            None,
-            img_metas['cam2img'],
-            out_dir,
-            filename,
-            box_mode='camera',
-            img_metas=img_metas,
-            show=show)
-    else:
-        # can't project, just show img
-        warnings.warn(
-            f'unrecognized gt box type {type(gt_bboxes)}, only show image')
-        show_multi_modality_result(
-            img, None, None, None, out_dir, filename, show=show)
-
-
 def main():
    args = parse_args()

@@ -200,31 +96,42 @@ def main():

    cfg = build_data_cfg(args.config, args.skip_type, args.aug,
                         args.cfg_options)
+
+    # register all modules in mmdet3d into the registries
+    register_all_modules()
+
    try:
        dataset = build_dataset(
-            cfg.data.train, default_args=dict(filter_empty_gt=False))
+            cfg.train_dataloader.dataset,
+            default_args=dict(filter_empty_gt=False))
    except TypeError:  # seg dataset doesn't have `filter_empty_gt` key
-        dataset = build_dataset(cfg.data.train)
+        dataset = build_dataset(cfg.train_dataloader.dataset)

-    dataset_type = cfg.dataset_type
    # configure visualization mode
    vis_task = args.task  # 'det', 'seg', 'multi_modality-det', 'mono-det'
+
+    visualizer = VISUALIZERS.build(cfg.visualizer)
+    visualizer.dataset_meta = dataset.metainfo
+
    progress_bar = mmcv.ProgressBar(len(dataset))

-    for input in dataset:
-        if vis_task in ['det', 'multi_modality-det']:
-            # show 3D bboxes on 3D point clouds
-            show_det_data(input, args.output_dir, show=args.online)
-        if vis_task in ['multi_modality-det', 'mono-det']:
-            # project 3D bboxes to 2D image
-            show_proj_bbox_img(
-                input,
-                args.output_dir,
-                show=args.online,
-                is_nus_mono=(dataset_type == 'NuScenesMonoDataset'))
-        elif vis_task in ['seg']:
-            # show 3D segmentation mask on 3D point clouds
-            show_seg_data(input, args.output_dir, show=args.online)
+    for item in dataset:
+        # the 3D Boxes in input could be in any of three coordinates
+        data_input = item['inputs']
+        data_sample = item['data_sample'].numpy()
+
+        out_file = osp.join(
+            args.output_dir) if args.output_dir is not None else None
+
+        visualizer.add_datasample(
+            '3d visualzier',
+            data_input,
+            data_sample,
+            show=not args.not_show,
+            wait_time=args.show_interval,
+            out_file=out_file,
+            vis_task=vis_task)
+
        progress_bar.update()